commit 6f6b95522a6e61a7af9ea6980b75fe8fd26e77cd
parent c7b47ff81cd070a3254159cdc5574528ee77a82f
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Thu, 8 Nov 2018 14:22:05 +0100
Merge branch 'feature_mpi'
Diffstat:
16 files changed, 1407 insertions(+), 195 deletions(-)
diff --git a/README.md b/README.md
@@ -1,7 +1,22 @@
# High-Tune: RenDeRer
-This programs is used to test the implementation of radiative transfer
-Monte-Carlo algorithms in cloudy atmopsheres.
+This program is a part of the [High-Tune](http://www.umr-cnrm.fr/high-tune/)
+project: it illustrates the implementation of efficient radiative transfer
+Monte-Carlo algorithms in cloudy atmospheres.
+
+This program implements a rendering algorithm that computes the radiance in the
+spectral interval [380, 780] nanometres that reaches an image through a pinhole
+camera. The rendered scene is at least composed of an infinite 1D atmosphere
+along the Z axis. Optionally, one can add 3D data describing the cloud
+properties and/or a geometry describing the ground with a lambertian
+reflectivity. The clouds and the ground, can be both infinitely repeated along
+the X and Y axis.
+
+In addition of shared memory parallelism, htrdr supports the [*M*essage
+*P*assing *I*nterface](https://www.mpi-forum.org/) specification to
+parallelise its computations in a distribute memory environment; the HTRDR
+binary can be run either directly or through a MPI process launcher like
+`mpirun`.
## How to build
@@ -17,8 +32,9 @@ on the
[Star-3DAW](https://gitlab.com/meso-star/star-3daw/),
[Star-SF](https://gitlab.com/meso-star/star-sf/),
[Star-SP](https://gitlab.com/meso-star/stat-sp/) and
-[Star-VX](https://gitlab.com/meso-star/star-vx/) libraries and on the
-[OpenMP](http://www.openmp.org) 1.2 specification to parallelize its
+[Star-VX](https://gitlab.com/meso-star/star-vx/) libraries and on
+[OpenMP](http://www.openmp.org) 1.2 and the
+[MPI](https://www.mpi-forum.org/) 2.0 specification to parallelize its
computations.
First ensure that CMake is installed on your system. Then install the RCMake
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -34,11 +34,14 @@ find_package(HTCP REQUIRED)
find_package(HTGOP REQUIRED)
find_package(HTMIE REQUIRED)
find_package(OpenMP 1.2 REQUIRED)
+find_package(MPI 2 REQUIRED)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${RCMAKE_SOURCE_DIR})
include(rcmake)
include(rcmake_runtime)
+set(CMAKE_C_COMPILER ${MPI_C_COMPILER})
+
include_directories(
${RSys_INCLUDE_DIR}
${Star3D_INCLUDE_DIR}
@@ -48,7 +51,8 @@ include_directories(
${StarVX_INCLUDE_DIR}
${HTCP_INCLUDE_DIR}
${HTGOP_INCLUDE_DIR}
- ${HTMIE_INCLUDE_DIR})
+ ${HTMIE_INCLUDE_DIR}
+ ${MPI_INCLUDE_PATH})
################################################################################
# Configure and define targets
diff --git a/src/htrdr.c b/src/htrdr.c
@@ -25,7 +25,7 @@
#include "htrdr_sun.h"
#include "htrdr_solve.h"
-#include <rsys/clock_time.h>
+#include <rsys/cstr.h>
#include <rsys/mem_allocator.h>
#include <rsys/str.h>
@@ -39,10 +39,12 @@
#include <stdarg.h>
#include <stdio.h>
#include <unistd.h>
+#include <time.h>
#include <sys/time.h> /* timespec */
#include <sys/stat.h> /* S_IRUSR & S_IWUSR */
#include <omp.h>
+#include <mpi.h>
/*******************************************************************************
* Helper functions
@@ -243,6 +245,185 @@ spherical_to_cartesian_dir
dir[2] = sin_elevation;
}
+static void
+release_mpi(struct htrdr* htrdr)
+{
+ ASSERT(htrdr);
+ if(htrdr->mpi_working_procs) {
+ MEM_RM(htrdr->allocator, htrdr->mpi_working_procs);
+ htrdr->mpi_working_procs = NULL;
+ }
+ if(htrdr->mpi_progress_octree) {
+ MEM_RM(htrdr->allocator, htrdr->mpi_progress_octree);
+ htrdr->mpi_progress_octree = NULL;
+ }
+ if(htrdr->mpi_progress_render) {
+ MEM_RM(htrdr->allocator, htrdr->mpi_progress_render);
+ htrdr->mpi_progress_render = NULL;
+ }
+ if(htrdr->mpi_err_str) {
+ MEM_RM(htrdr->allocator, htrdr->mpi_err_str);
+ htrdr->mpi_err_str = NULL;
+ }
+ if(htrdr->mpi_mutex) {
+ mutex_destroy(htrdr->mpi_mutex);
+ htrdr->mpi_mutex = NULL;
+ }
+}
+
+static res_T
+mpi_print_proc_info(struct htrdr* htrdr)
+{
+ char proc_name[MPI_MAX_PROCESSOR_NAME];
+ int proc_name_len;
+ char* proc_names = NULL;
+ uint32_t* proc_nthreads = NULL;
+ uint32_t nthreads = 0;
+ int iproc;
+ res_T res = RES_OK;
+ ASSERT(htrdr);
+
+ if(htrdr->mpi_rank == 0) {
+ proc_names = MEM_CALLOC(htrdr->allocator, (size_t)htrdr->mpi_nprocs,
+ MPI_MAX_PROCESSOR_NAME*sizeof(*proc_names));
+ if(!proc_names) {
+ res = RES_MEM_ERR;
+ htrdr_log_err(htrdr,
+ "could not allocate the temporary memory for MPI process names -- "
+ "%s.\n", res_to_cstr(res));
+ goto error;
+ }
+
+ proc_nthreads = MEM_CALLOC(htrdr->allocator, (size_t)htrdr->mpi_nprocs,
+ sizeof(*proc_nthreads));
+ if(!proc_nthreads) {
+ res = RES_MEM_ERR;
+ htrdr_log_err(htrdr,
+ "could not allocate the temporary memory for the #threads of the MPI "
+ "processes -- %s.\n", res_to_cstr(res));
+ goto error;
+ }
+ }
+
+ /* Gather process name */
+ MPI(Get_processor_name(proc_name, &proc_name_len));
+ MPI(Gather(proc_name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, proc_names,
+ MPI_MAX_PROCESSOR_NAME, MPI_CHAR, 0, MPI_COMM_WORLD));
+
+ /* Gather process #threads */
+ nthreads = (uint32_t)htrdr->nthreads;
+ MPI(Gather(&nthreads, 1, MPI_UINT32_T, proc_nthreads, 1, MPI_UINT32_T, 0,
+ MPI_COMM_WORLD));
+
+ if(htrdr->mpi_rank == 0) {
+ FOR_EACH(iproc, 0, htrdr->mpi_nprocs) {
+ htrdr_log(htrdr, "Process %d -- %s; #threads: %u\n",
+ iproc, proc_names + iproc*MPI_MAX_PROCESSOR_NAME, proc_nthreads[iproc]);
+ }
+ }
+
+exit:
+ if(proc_names) MEM_RM(htrdr->allocator, proc_names);
+ if(proc_nthreads) MEM_RM(htrdr->allocator, proc_nthreads);
+ return res;
+error:
+ goto exit;
+}
+
+static res_T
+init_mpi(struct htrdr* htrdr)
+{
+ size_t n;
+ int err;
+ res_T res = RES_OK;
+ ASSERT(htrdr);
+
+ htrdr->mpi_err_str = MEM_CALLOC
+ (htrdr->allocator, htrdr->nthreads, MPI_MAX_ERROR_STRING);
+ if(!htrdr->mpi_err_str) {
+ res = RES_MEM_ERR;
+ htrdr_log_err(htrdr,
+ "could not allocate the MPI error strings -- %s.\n",
+ res_to_cstr(res));
+ goto error;
+ }
+
+ err = MPI_Comm_rank(MPI_COMM_WORLD, &htrdr->mpi_rank);
+ if(err != MPI_SUCCESS) {
+ htrdr_log_err(htrdr,
+ "could not determine the MPI rank of the calling process -- %s.\n",
+ htrdr_mpi_error_string(htrdr, err));
+ res = RES_UNKNOWN_ERR;
+ goto error;
+ }
+
+ err = MPI_Comm_size(MPI_COMM_WORLD, &htrdr->mpi_nprocs);
+ if(err != MPI_SUCCESS) {
+ htrdr_log_err(htrdr,
+ "could retrieve the size of the MPI group -- %s.\n",
+ htrdr_mpi_error_string(htrdr, err));
+ res = RES_UNKNOWN_ERR;
+ goto error;
+ }
+
+ htrdr->mpi_working_procs = MEM_CALLOC(htrdr->allocator,
+ (size_t)htrdr->mpi_nprocs, sizeof(*htrdr->mpi_working_procs));
+ if(!htrdr->mpi_working_procs) {
+ htrdr_log_err(htrdr,
+ "could not allocate the list of working processes.\n");
+ res = RES_MEM_ERR;
+ goto error;
+ }
+
+ /* Initialy, all the processes are working */
+ htrdr->mpi_nworking_procs = (size_t)htrdr->mpi_nprocs;
+ memset(htrdr->mpi_working_procs, 0xFF,
+ htrdr->mpi_nworking_procs*sizeof(*htrdr->mpi_working_procs));
+
+ /* Allocate #processes progress statuses on the master process and only 1
+ * progress status on the other ones: the master process will gather the
+ * status of the other processes to report their progression. */
+ n = (size_t)(htrdr->mpi_rank == 0 ? htrdr->mpi_nprocs : 1);
+
+ htrdr->mpi_progress_octree = MEM_CALLOC
+ (htrdr->allocator, n, sizeof(*htrdr->mpi_progress_octree));
+ if(!htrdr->mpi_progress_octree) {
+ res = RES_MEM_ERR;
+ htrdr_log_err(htrdr,
+ "could not allocate the progress state of the octree building -- %s.\n",
+ res_to_cstr(res));
+ goto error;
+ }
+
+ htrdr->mpi_progress_render = MEM_CALLOC
+ (htrdr->allocator, n, sizeof(*htrdr->mpi_progress_render));
+ if(!htrdr->mpi_progress_render) {
+ res = RES_MEM_ERR;
+ htrdr_log_err(htrdr,
+ "could not allocate the progress state of the scene rendering -- %s.\n",
+ res_to_cstr(res));
+ goto error;
+ }
+
+ htrdr->mpi_mutex = mutex_create();
+ if(!htrdr->mpi_mutex) {
+ res = RES_MEM_ERR;
+ htrdr_log_err(htrdr,
+ "could not create the mutex to protect MPI calls from concurrent "
+ "threads -- %s.\n", res_to_cstr(res));
+ goto error;
+ }
+
+ if(htrdr->mpi_nprocs != 1)
+ mpi_print_proc_info(htrdr);
+
+exit:
+ return res;
+error:
+ release_mpi(htrdr);
+ goto exit;
+}
+
/*******************************************************************************
* Local functions
******************************************************************************/
@@ -271,9 +452,20 @@ htrdr_init
str_init(htrdr->allocator, &htrdr->output_name);
htrdr->dump_vtk = args->dump_vtk;
+ htrdr->cache_grids = args->cache_grids;
htrdr->verbose = args->verbose;
htrdr->nthreads = MMIN(args->nthreads, (unsigned)omp_get_num_procs());
htrdr->spp = args->image.spp;
+ htrdr->width = args->image.definition[0];
+ htrdr->height = args->image.definition[1];
+
+ res = init_mpi(htrdr);
+ if(res != RES_OK) goto error;
+
+ if(htrdr->cache_grids && htrdr->mpi_nprocs != 1) {
+ htrdr_log_warn(htrdr, "cached grids are not supported in a MPI execution.\n");
+ htrdr->cache_grids = 0;
+ }
if(!args->output) {
htrdr->output = stdout;
@@ -287,14 +479,17 @@ htrdr_init
res = str_set(&htrdr->output_name, output_name);
if(res != RES_OK) {
htrdr_log_err(htrdr,
- "could not store the name of the output stream `%s'.\n", output_name);
+ "%s: could not store the name of the output stream `%s' -- %s.\n",
+ FUNC_NAME, output_name, res_to_cstr(res));
goto error;
}
res = svx_device_create
(&htrdr->logger, htrdr->allocator, args->verbose, &htrdr->svx);
if(res != RES_OK) {
- htrdr_log_err(htrdr, "could not create the Star-VoXel device.\n");
+ htrdr_log_err(htrdr,
+ "%s: could not create the Star-VoXel device -- %s.\n",
+ FUNC_NAME, res_to_cstr(res));
goto error;
}
@@ -304,12 +499,14 @@ htrdr_init
res = s3d_device_create
(&htrdr->logger, htrdr->allocator, 0, &htrdr->s3d);
if(res != RES_OK) {
- htrdr_log_err(htrdr, "could not create the Star-3D device.\n");
+ htrdr_log_err(htrdr,
+ "%s: could not create the Star-3D device -- %s.\n",
+ FUNC_NAME, res_to_cstr(res));
goto error;
}
- res = htrdr_ground_create
- (htrdr, args->filename_obj, args->repeat_ground, &htrdr->ground);
+ res = htrdr_ground_create(htrdr, args->filename_obj,
+ args->ground_reflectivity, args->repeat_ground, &htrdr->ground);
if(res != RES_OK) goto error;
proj_ratio =
@@ -319,14 +516,18 @@ htrdr_init
args->camera.up, proj_ratio, MDEG2RAD(args->camera.fov_y), &htrdr->cam);
if(res != RES_OK) goto error;
- res = htrdr_buffer_create(htrdr,
- args->image.definition[0], /* Width */
- args->image.definition[1], /* Height */
- args->image.definition[0]*sizeof(struct htrdr_accum[3]), /* Pitch */
- sizeof(struct htrdr_accum[3]),
- ALIGNOF(struct htrdr_accum[3]), /* Alignment */
- &htrdr->buf);
- if(res != RES_OK) goto error;
+ /* Create the image buffer only on the master process; the image parts
+ * rendered by the processes are gathered onto the master process. */
+ if(!htrdr->dump_vtk && htrdr->mpi_rank == 0) {
+ res = htrdr_buffer_create(htrdr,
+ args->image.definition[0], /* Width */
+ args->image.definition[1], /* Height */
+ args->image.definition[0]*sizeof(struct htrdr_accum[3]), /* Pitch */
+ sizeof(struct htrdr_accum[3]),
+ ALIGNOF(struct htrdr_accum[3]), /* Alignment */
+ &htrdr->buf);
+ if(res != RES_OK) goto error;
+ }
res = htrdr_sun_create(htrdr, &htrdr->sun);
if(res != RES_OK) goto error;
@@ -342,24 +543,25 @@ htrdr_init
htrdr->lifo_allocators = MEM_CALLOC
(htrdr->allocator, htrdr->nthreads, sizeof(*htrdr->lifo_allocators));
if(!htrdr->lifo_allocators) {
- htrdr_log_err(htrdr,
- "could not allocate the list of per thread LIFO allocator.\n");
res = RES_MEM_ERR;
+ htrdr_log_err(htrdr,
+ "%s: could not allocate the list of per thread LIFO allocator -- %s.\n",
+ FUNC_NAME, res_to_cstr(res));
goto error;
}
FOR_EACH(ithread, 0, htrdr->nthreads) {
res = mem_init_lifo_allocator
- (&htrdr->lifo_allocators[ithread], htrdr->allocator, 4096);
+ (&htrdr->lifo_allocators[ithread], htrdr->allocator, 16384);
if(res != RES_OK) {
htrdr_log_err(htrdr,
- "could not initialise the LIFO allocator of the thread %lu.\n",
- (unsigned long)ithread);
+ "%s: could not initialise the LIFO allocator of the thread %lu -- %s.\n",
+ FUNC_NAME, (unsigned long)ithread, res_to_cstr(res));
goto error;
}
}
- exit:
+exit:
return res;
error:
htrdr_release(htrdr);
@@ -370,6 +572,7 @@ void
htrdr_release(struct htrdr* htrdr)
{
ASSERT(htrdr);
+ release_mpi(htrdr);
if(htrdr->s3d) S3D(device_ref_put(htrdr->s3d));
if(htrdr->svx) SVX(device_ref_put(htrdr->svx));
if(htrdr->ground) htrdr_ground_ref_put(htrdr->ground);
@@ -395,6 +598,10 @@ htrdr_run(struct htrdr* htrdr)
if(htrdr->dump_vtk) {
const size_t nbands = htrdr_sky_get_sw_spectral_bands_count(htrdr->sky);
size_t i;
+
+ /* Nothing to do */
+ if(htrdr->mpi_rank != 0) goto exit;
+
FOR_EACH(i, 0, nbands) {
const size_t iband = htrdr_sky_get_sw_spectral_band_id(htrdr->sky, i);
const size_t nquads = htrdr_sky_get_sw_spectral_band_quadrature_length
@@ -407,19 +614,14 @@ htrdr_run(struct htrdr* htrdr)
}
}
} else {
- struct time t0, t1;
- char buf[128];
-
- time_current(&t0);
- res = htrdr_draw_radiance_sw(htrdr, htrdr->cam, htrdr->spp, htrdr->buf);
- if(res != RES_OK) goto error;
- time_sub(&t0, time_current(&t1), &t0);
- time_dump(&t0, TIME_ALL, NULL, buf, sizeof(buf));
- htrdr_log(htrdr, "Rendering time: %s\n", buf);
-
- res = dump_accum_buffer
- (htrdr, htrdr->buf, str_cget(&htrdr->output_name), htrdr->output);
+ res = htrdr_draw_radiance_sw(htrdr, htrdr->cam, htrdr->width,
+ htrdr->height, htrdr->spp, htrdr->buf);
if(res != RES_OK) goto error;
+ if(htrdr->mpi_rank == 0) {
+ res = dump_accum_buffer
+ (htrdr, htrdr->buf, str_cget(&htrdr->output_name), htrdr->output);
+ if(res != RES_OK) goto error;
+ }
}
exit:
return res;
@@ -430,11 +632,14 @@ error:
void
htrdr_log(struct htrdr* htrdr, const char* msg, ...)
{
- va_list vargs_list;
ASSERT(htrdr && msg);
- va_start(vargs_list, msg);
- log_msg(htrdr, LOG_OUTPUT, msg, vargs_list);
- va_end(vargs_list);
+ /* Log standard message only on master process */
+ if(htrdr->mpi_rank == 0) {
+ va_list vargs_list;
+ va_start(vargs_list, msg);
+ log_msg(htrdr, LOG_OUTPUT, msg, vargs_list);
+ va_end(vargs_list);
+ }
}
void
@@ -442,6 +647,7 @@ htrdr_log_err(struct htrdr* htrdr, const char* msg, ...)
{
va_list vargs_list;
ASSERT(htrdr && msg);
+ /* Log errors on all processes */
va_start(vargs_list, msg);
log_msg(htrdr, LOG_ERROR, msg, vargs_list);
va_end(vargs_list);
@@ -450,11 +656,48 @@ htrdr_log_err(struct htrdr* htrdr, const char* msg, ...)
void
htrdr_log_warn(struct htrdr* htrdr, const char* msg, ...)
{
- va_list vargs_list;
ASSERT(htrdr && msg);
- va_start(vargs_list, msg);
- log_msg(htrdr, LOG_WARNING, msg, vargs_list);
- va_end(vargs_list);
+ /* Log warnings only on master process */
+ if(htrdr->mpi_rank == 0) {
+ va_list vargs_list;
+ va_start(vargs_list, msg);
+ log_msg(htrdr, LOG_WARNING, msg, vargs_list);
+ va_end(vargs_list);
+ }
+}
+
+const char*
+htrdr_mpi_error_string(struct htrdr* htrdr, const int mpi_err)
+{
+ const int ithread = omp_get_thread_num();
+ char* str;
+ int strlen_err;
+ int err;
+ ASSERT(htrdr && (size_t)ithread < htrdr->nthreads);
+ str = htrdr->mpi_err_str + ithread*MPI_MAX_ERROR_STRING;
+ err = MPI_Error_string(mpi_err, str, &strlen_err);
+ return err == MPI_SUCCESS ? str : "Invalid MPI error";
+}
+
+void
+htrdr_fprintf(struct htrdr* htrdr, FILE* stream, const char* msg, ...)
+{
+ ASSERT(htrdr && msg);
+ if(htrdr->mpi_rank == 0) {
+ va_list vargs_list;
+ va_start(vargs_list, msg);
+ vfprintf(stream, msg, vargs_list);
+ va_end(vargs_list);
+ }
+}
+
+void
+htrdr_fflush(struct htrdr* htrdr, FILE* stream)
+{
+ ASSERT(htrdr);
+ if(htrdr->mpi_rank == 0) {
+ fflush(stream);
+ }
}
/*******************************************************************************
@@ -642,3 +885,140 @@ exit:
error:
goto exit;
}
+
+void
+send_mpi_progress
+ (struct htrdr* htrdr, const enum htrdr_mpi_message msg, const int32_t percent)
+{
+ ASSERT(htrdr);
+ ASSERT(msg == HTRDR_MPI_PROGRESS_RENDERING
+ || msg == HTRDR_MPI_PROGRESS_BUILD_OCTREE);
+ (void)htrdr;
+ mutex_lock(htrdr->mpi_mutex);
+ MPI(Send(&percent, 1, MPI_INT32_T, 0, msg, MPI_COMM_WORLD));
+ mutex_unlock(htrdr->mpi_mutex);
+}
+
+void
+fetch_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_message msg)
+{
+ struct timespec t;
+ int32_t* progress = NULL;
+ int iproc;
+ ASSERT(htrdr && htrdr->mpi_rank == 0);
+
+ t.tv_sec = 0;
+ t.tv_nsec = 10000000; /* 10ms */
+
+ switch(msg) {
+ case HTRDR_MPI_PROGRESS_BUILD_OCTREE:
+ progress = htrdr->mpi_progress_octree;
+ break;
+ case HTRDR_MPI_PROGRESS_RENDERING:
+ progress = htrdr->mpi_progress_render;
+ break;
+ default: FATAL("Unreachable code.\n"); break;
+ }
+
+ FOR_EACH(iproc, 1, htrdr->mpi_nprocs) {
+ /* Flush the last sent percentage of the process `iproc' */
+ for(;;) {
+ MPI_Request req;
+ int flag;
+ int complete;
+
+ mutex_lock(htrdr->mpi_mutex);
+ MPI(Iprobe(iproc, msg, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE));
+ mutex_unlock(htrdr->mpi_mutex);
+
+ if(flag == 0) break; /* No more message */
+
+ mutex_lock(htrdr->mpi_mutex);
+ MPI(Irecv(&progress[iproc], 1, MPI_INT32_T, iproc, msg, MPI_COMM_WORLD, &req));
+ mutex_unlock(htrdr->mpi_mutex);
+ for(;;) {
+ mutex_lock(htrdr->mpi_mutex);
+ MPI(Test(&req, &complete, MPI_STATUS_IGNORE));
+ mutex_unlock(htrdr->mpi_mutex);
+ if(complete) break;
+ nanosleep(&t, NULL);
+ }
+ }
+ }
+}
+
+void
+print_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_message msg)
+{
+ ASSERT(htrdr && htrdr->mpi_rank == 0);
+
+ if(htrdr->mpi_nprocs == 1) {
+ switch(msg) {
+ case HTRDR_MPI_PROGRESS_BUILD_OCTREE:
+ htrdr_fprintf(htrdr, stderr, "\033[2K\rBuilding octree: %3d%%",
+ htrdr->mpi_progress_octree[0]);
+ break;
+ case HTRDR_MPI_PROGRESS_RENDERING:
+ htrdr_fprintf(htrdr, stderr, "\033[2K\rRendering: %3d%%",
+ htrdr->mpi_progress_render[0]);
+ break;
+ default: FATAL("Unreachable code.\n"); break;
+ }
+ htrdr_fflush(htrdr, stderr);
+ } else {
+ int iproc;
+ FOR_EACH(iproc, 0, htrdr->mpi_nprocs) {
+ switch(msg) {
+ case HTRDR_MPI_PROGRESS_BUILD_OCTREE:
+ htrdr_fprintf(htrdr, stderr,
+ "\033[2K\rProcess %d -- building octree: %3d%%%c",
+ iproc, htrdr->mpi_progress_octree[iproc],
+ iproc == htrdr->mpi_nprocs - 1 ? '\r' : '\n');
+ break;
+ case HTRDR_MPI_PROGRESS_RENDERING:
+ htrdr_fprintf(htrdr, stderr,
+ "\033[2K\rProcess %d -- rendering: %3d%%%c",
+ iproc, htrdr->mpi_progress_render[iproc],
+ iproc == htrdr->mpi_nprocs - 1 ? '\r' : '\n');
+ break;
+ default: FATAL("Unreachable code.\n"); break;
+ }
+ }
+ }
+}
+
+void
+clear_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_message msg)
+{
+ ASSERT(htrdr);
+ (void)msg;
+ if(htrdr->mpi_nprocs > 1) {
+ htrdr_fprintf(htrdr, stderr, "\033[%dA", htrdr->mpi_nprocs-1);
+ }
+}
+
+int
+total_mpi_progress(const struct htrdr* htrdr, const enum htrdr_mpi_message msg)
+{
+ const int* progress = NULL;
+ int total = 0;
+ int iproc;
+ ASSERT(htrdr && htrdr->mpi_rank == 0);
+
+ switch(msg) {
+ case HTRDR_MPI_PROGRESS_BUILD_OCTREE:
+ progress = htrdr->mpi_progress_octree;
+ break;
+ case HTRDR_MPI_PROGRESS_RENDERING:
+ progress = htrdr->mpi_progress_render;
+ break;
+ default: FATAL("Unreachable code.\n"); break;
+ }
+
+ FOR_EACH(iproc, 0, htrdr->mpi_nprocs) {
+ total += progress[iproc];
+ }
+ total = total / htrdr->mpi_nprocs;
+ return total;
+}
+
diff --git a/src/htrdr.h b/src/htrdr.h
@@ -35,6 +35,7 @@ struct htrdr_buffer;
struct htrdr_sky;
struct htrdr_rectangle;
struct mem_allocator;
+struct mutext;
struct s3d_device;
struct s3d_scene;
struct ssf_bsdf;
@@ -52,13 +53,28 @@ struct htrdr {
struct htrdr_camera* cam;
struct htrdr_buffer* buf;
size_t spp; /* #samples per pixel */
+ size_t width; /* Image width */
+ size_t height; /* Image height */
FILE* output;
struct str output_name;
- unsigned nthreads;
- int dump_vtk;
- int verbose;
+ unsigned nthreads; /* #threads of the process */
+ int dump_vtk; /* Dump octree VTK */
+ int cache_grids; /* Use/Precompute grid caches */
+ int verbose; /* Verbosity level */
+
+ int mpi_rank; /* Rank of the process in the MPI group */
+ int mpi_nprocs; /* Overall #processes in the MPI group */
+ char* mpi_err_str; /* Temp buffer used to store MPI error string */
+ int8_t* mpi_working_procs; /* Define the rank of active processes */
+ size_t mpi_nworking_procs;
+
+ /* Process progress percentage */
+ int32_t* mpi_progress_octree;
+ int32_t* mpi_progress_render;
+
+ struct mutex* mpi_mutex; /* Protect MPI calls from concurrent threads */
struct logger logger;
struct mem_allocator* allocator;
@@ -109,5 +125,26 @@ htrdr_log_warn
#endif
;
+extern LOCAL_SYM const char*
+htrdr_mpi_error_string
+ (struct htrdr* htrdr,
+ const int mpi_err);
+
+extern LOCAL_SYM void
+htrdr_fprintf
+ (struct htrdr* htrdr,
+ FILE* stream,
+ const char* msg,
+ ...)
+#ifdef COMPILER_GCC
+ __attribute((format(printf, 3, 4)))
+#endif
+ ;
+
+extern LOCAL_SYM void
+htrdr_fflush
+ (struct htrdr* htrdr,
+ FILE* stream);
+
#endif /* HTRDR_H */
diff --git a/src/htrdr_args.c b/src/htrdr_args.c
@@ -46,10 +46,19 @@ print_help(const char* cmd)
printf(
" -d dump octree data to OUTPUT wrt the VTK ASCII file format.\n");
printf(
+" -e ground reflectivity in [0, 1]. By default its value is `%g'.\n",
+ HTRDR_ARGS_DEFAULT.ground_reflectivity);
+ printf(
" -f overwrite the OUTPUT file if it already exists.\n");
printf(
" -g FILENAME path of an OBJ file representing the ground geometry.\n");
printf(
+" -G precompute/use grids of raw cloud opitical properties built\n"
+" from the HTCP file, the atmospheric profile and the Mie's\n"
+" data. If the corresponding grids were generated in a\n"
+" previous run, reuse them as far as it is possible, i.e. if\n"
+" the HTCP, Mie and atmospheric files were not updated.\n");
+ printf(
" -h display this help and exit.\n");
printf(
" -i <image> define the image to compute.\n");
@@ -320,7 +329,7 @@ htrdr_args_init(struct htrdr_args* args, int argc, char** argv)
*args = HTRDR_ARGS_DEFAULT;
- while((opt = getopt(argc, argv, "a:C:c:D:dfg:hi:m:o:RrT:t:v")) != -1) {
+ while((opt = getopt(argc, argv, "a:C:c:D:de:fGg:hi:m:o:RrT:t:v")) != -1) {
switch(opt) {
case 'a': args->filename_gas = optarg; break;
case 'C':
@@ -330,7 +339,14 @@ htrdr_args_init(struct htrdr_args* args, int argc, char** argv)
case 'c': args->filename_les = optarg; break;
case 'D': res = parse_sun_dir(args, optarg); break;
case 'd': args->dump_vtk = 1; break;
+ case 'e':
+ res = cstr_to_double(optarg, &args->ground_reflectivity);
+ if(args->ground_reflectivity < 0 || args->ground_reflectivity > 1) {
+ res = RES_BAD_ARG;
+ }
+ break;
case 'f': args->force_overwriting = 1; break;
+ case 'G': args->cache_grids = 1; break;
case 'g': args->filename_obj = optarg; break;
case 'h':
print_help(argv[0]);
diff --git a/src/htrdr_args.h b/src/htrdr_args.h
@@ -47,10 +47,12 @@ struct htrdr_args {
double sun_azimuth; /* In degrees */
double sun_elevation; /* In degrees */
double optical_thickness; /* Threshold used during octree building */
+ double ground_reflectivity; /* Reflectivity of the ground */
unsigned nthreads; /* Hint on the number of threads to use */
int force_overwriting;
int dump_vtk; /* Dump the loaded cloud properties in a VTK file */
+ int cache_grids; /* Use grid caching mechanism */
int verbose; /* Verbosity level */
int repeat_clouds; /* Make the clouds infinite in X and Y */
int repeat_ground; /* Make the ground infinite in X and Y */
@@ -80,9 +82,11 @@ struct htrdr_args {
0, /* Sun azimuth */ \
90, /* Sun elevation */ \
1.0, /* Optical thickness */ \
+ 0.5, /* Ground reflectivity */ \
(unsigned)~0, /* #threads */ \
0, /* Force overwriting */ \
0, /* dump VTK */ \
+ 0, /* Grid cache */ \
0, /* Verbose flag */ \
0, /* Repeat clouds */ \
0, /* Repeat ground */ \
diff --git a/src/htrdr_buffer.c b/src/htrdr_buffer.c
@@ -163,3 +163,4 @@ htrdr_buffer_at(struct htrdr_buffer* buf, const size_t x, const size_t y)
ASSERT(buf && x < buf->width && y < buf->height);
return buf->mem + y*buf->pitch + x*buf->elmtsz;
}
+
diff --git a/src/htrdr_c.h b/src/htrdr_c.h
@@ -18,6 +18,20 @@
#include <rsys/rsys.h>
+#ifndef NDEBUG
+ #define MPI(Func) ASSERT(MPI_##Func == MPI_SUCCESS)
+#else
+ #define MPI(Func) MPI_##Func
+#endif
+
+enum htrdr_mpi_message {
+ HTRDR_MPI_PROGRESS_BUILD_OCTREE,
+ HTRDR_MPI_PROGRESS_RENDERING,
+ HTRDR_MPI_STEAL_REQUEST,
+ HTRDR_MPI_WORK_STEALING,
+ HTRDR_MPI_TILE_DATA
+};
+
struct htrdr;
#define SW_WAVELENGTH_MIN 380 /* In nanometer */
@@ -96,7 +110,7 @@ is_file_updated
extern LOCAL_SYM res_T
update_file_stamp
- (struct htrdr* htrdr,
+ (struct htrdr* htrdr,
const char* filename);
extern LOCAL_SYM res_T
@@ -104,5 +118,40 @@ create_directory
(struct htrdr* htrdt,
const char* path);
+extern LOCAL_SYM void
+send_mpi_progress
+ (struct htrdr* htrdr,
+ const enum htrdr_mpi_message progress,
+ const int32_t percent);
+
+extern LOCAL_SYM void
+fetch_mpi_progress
+ (struct htrdr* htrdr,
+ const enum htrdr_mpi_message progress);
+
+extern LOCAL_SYM void
+print_mpi_progress
+ (struct htrdr* htrdr,
+ const enum htrdr_mpi_message progress);
+
+extern LOCAL_SYM void
+clear_mpi_progress
+ (struct htrdr* htrdr,
+ const enum htrdr_mpi_message progress);
+
+extern int32_t
+total_mpi_progress
+ (const struct htrdr* htrdr,
+ const enum htrdr_mpi_message progress);
+
+static INLINE void
+update_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_message progress)
+{
+ ASSERT(htrdr);
+ fetch_mpi_progress(htrdr, progress);
+ clear_mpi_progress(htrdr, progress);
+ print_mpi_progress(htrdr, progress);
+}
+
#endif /* HTRDR_C_H */
diff --git a/src/htrdr_compute_radiance_sw.c b/src/htrdr_compute_radiance_sw.c
@@ -285,7 +285,8 @@ htrdr_compute_radiance_sw
CHK(RES_OK == ssf_phase_create
(&htrdr->lifo_allocators[ithread], &ssf_phase_rayleigh, &phase_rayleigh));
- SSF(lambertian_reflection_setup(bsdf, 0.5));
+ SSF(lambertian_reflection_setup
+ (bsdf, htrdr_ground_get_reflectivity(htrdr->ground)));
/* Setup the phase function for this spectral band & quadrature point */
g = htrdr_sky_fetch_particle_phase_function_asymmetry_parameter
diff --git a/src/htrdr_draw_radiance_sw.c b/src/htrdr_draw_radiance_sw.c
@@ -20,14 +20,44 @@
#include "htrdr_sky.h"
#include "htrdr_solve.h"
+#include <rsys/clock_time.h>
+#include <rsys/cstr.h>
+#include <rsys/dynamic_array_u32.h>
+#include <rsys/math.h>
+#include <rsys/mutex.h>
#include <star/ssp.h>
#include <omp.h>
-#include <rsys/math.h>
+#include <mpi.h>
+#include <time.h>
+#include <unistd.h>
+
+#define RNG_SEQUENCE_SIZE 10000
-#define TILE_SIZE 32 /* definition in X & Y of a tile */
+#define TILE_MCODE_NULL UINT32_MAX
+#define TILE_SIZE 32 /* Definition in X & Y of a tile */
STATIC_ASSERT(IS_POW2(TILE_SIZE), TILE_SIZE_must_be_a_power_of_2);
+/* Tile of row ordered image pixels */
+struct tile {
+ struct list_node node;
+ struct mem_allocator* allocator;
+ ref_T ref;
+
+ struct tile_data {
+ uint16_t x, y; /* 2D coordinates of the tile in tile space */
+ /* Simulate the flexible array member of the C99 standard. */
+ struct htrdr_accum accums[1/*dummy element*/];
+ } data;
+};
+
+/* List of tile to compute onto the MPI process. */
+struct proc_work {
+ struct mutex* mutex;
+ struct darray_u32 tiles; /* #tiles to render */
+ size_t itile; /* Next tile to render in the above list of tiles */
+};
+
/*******************************************************************************
* Helper functions
******************************************************************************/
@@ -42,6 +72,378 @@ morton2D_decode(const uint32_t u32)
return (uint16_t)x;
}
+static FINLINE uint32_t
+morton2D_encode(const uint16_t u16)
+{
+ uint32_t u32 = u16;
+ u32 = (u32 | (u32 << 8)) & 0x00FF00FF;
+ u32 = (u32 | (u32 << 4)) & 0X0F0F0F0F;
+ u32 = (u32 | (u32 << 2)) & 0x33333333;
+ u32 = (u32 | (u32 << 1)) & 0x55555555;
+ return u32;
+}
+
+static FINLINE struct tile*
+tile_create(struct mem_allocator* allocator)
+{
+ struct tile* tile;
+ const size_t tile_sz =
+ sizeof(struct tile) - sizeof(struct htrdr_accum)/*rm dummy accum*/;
+ const size_t buf_sz = /* Flexiblbe array element */
+ TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum)*3/*#channels*/;
+ ASSERT(allocator);
+
+ tile = MEM_ALLOC(allocator, tile_sz+buf_sz);
+ if(!tile) return NULL;
+
+ ref_init(&tile->ref);
+ list_init(&tile->node);
+ tile->allocator = allocator;
+ ASSERT(IS_ALIGNED(&tile->data.accums, ALIGNOF(struct htrdr_accum)));
+
+ return tile;
+}
+
+static INLINE void
+tile_ref_get(struct tile* tile)
+{
+ ASSERT(tile);
+ tile_ref_get(tile);
+}
+
+static INLINE void
+release_tile(ref_T* ref)
+{
+ struct tile* tile = CONTAINER_OF(ref, struct tile, ref);
+ ASSERT(ref);
+ MEM_RM(tile->allocator, tile);
+}
+
+static INLINE void
+tile_ref_put(struct tile* tile)
+{
+ ASSERT(tile);
+ ref_put(&tile->ref, release_tile);
+}
+
+static FINLINE struct htrdr_accum*
+tile_at
+ (struct tile* tile,
+ const size_t x, /* In tile space */
+ const size_t y) /* In tile space */
+{
+ ASSERT(tile && x < TILE_SIZE && y < TILE_SIZE);
+ return tile->data.accums + (y*TILE_SIZE + x)*3/*#channels*/;
+}
+
+static void
+write_tile_data(struct htrdr_buffer* buf, const struct tile_data* tile_data)
+{
+ struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL;
+ size_t icol, irow;
+ size_t irow_tile;
+ size_t ncols_tile, nrows_tile;
+ char* buf_mem;
+ ASSERT(buf && tile_data);
+
+ htrdr_buffer_get_layout(buf, &layout);
+ buf_mem = htrdr_buffer_get_data(buf);
+
+ /* Compute the row/column of the tile origin into the buffer */
+ icol = tile_data->x * (size_t)TILE_SIZE;
+ irow = tile_data->y * (size_t)TILE_SIZE;
+
+ /* Define the number of tile row/columns to write into the buffer */
+ ncols_tile = MMIN(icol + TILE_SIZE, layout.width) - icol;
+ nrows_tile = MMIN(irow + TILE_SIZE, layout.height) - irow;
+
+ /* Copy the tile data, row by row */
+ FOR_EACH(irow_tile, 0, nrows_tile) {
+ char* buf_row = buf_mem + (irow + irow_tile) * layout.pitch;
+ const struct htrdr_accum* tile_row =
+ tile_data->accums + irow_tile*TILE_SIZE*3/*#channels*/;
+ memcpy(buf_row + icol*sizeof(struct htrdr_accum)*3, tile_row,
+ ncols_tile*sizeof(struct htrdr_accum)*3/*#channels*/);
+ }
+}
+
+static INLINE void
+proc_work_init(struct mem_allocator* allocator, struct proc_work* work)
+{
+ ASSERT(work);
+ darray_u32_init(allocator, &work->tiles);
+ work->itile = 0;
+ CHK(work->mutex = mutex_create());
+}
+
+static INLINE void
+proc_work_release(struct proc_work* work)
+{
+ darray_u32_release(&work->tiles);
+ mutex_destroy(work->mutex);
+}
+
+static INLINE void
+proc_work_reset(struct proc_work* work)
+{
+ ASSERT(work);
+ mutex_lock(work->mutex);
+ darray_u32_clear(&work->tiles);
+ work->itile = 0;
+ mutex_unlock(work->mutex);
+}
+
+static INLINE void
+proc_work_add_tile(struct proc_work* work, const uint32_t mcode)
+{
+ mutex_lock(work->mutex);
+ CHK(darray_u32_push_back(&work->tiles, &mcode) == RES_OK);
+ mutex_unlock(work->mutex);
+}
+
+static INLINE uint32_t
+proc_work_get_tile(struct proc_work* work)
+{
+ uint32_t mcode;
+ ASSERT(work);
+ mutex_lock(work->mutex);
+ if(work->itile >= darray_u32_size_get(&work->tiles)) {
+ mcode = TILE_MCODE_NULL;
+ } else {
+ mcode = darray_u32_cdata_get(&work->tiles)[work->itile];
+ ++work->itile;
+ }
+ mutex_unlock(work->mutex);
+ return mcode;
+}
+
+static INLINE size_t
+proc_work_get_ntiles(struct proc_work* work)
+{
+ size_t sz = 0;
+ ASSERT(work);
+ mutex_lock(work->mutex);
+ sz = darray_u32_size_get(&work->tiles);
+ mutex_unlock(work->mutex);
+ return sz;
+}
+
+static void
+mpi_wait_for_request(struct htrdr* htrdr, MPI_Request* req)
+{
+ ASSERT(htrdr && req);
+
+ /* Wait for process synchronisation */
+ for(;;) {
+ struct timespec t;
+ int complete;
+ t.tv_sec = 0;
+ t.tv_nsec = 10000000; /* 10ms */
+
+ mutex_lock(htrdr->mpi_mutex);
+ MPI(Test(req, &complete, MPI_STATUS_IGNORE));
+ mutex_unlock(htrdr->mpi_mutex);
+ if(complete) break;
+
+ nanosleep(&t, NULL);
+ }
+}
+
+static void
+mpi_probe_thieves
+ (struct htrdr* htrdr,
+ struct proc_work* work,
+ ATOMIC* probe_thieves)
+{
+ uint32_t tiles[UINT8_MAX];
+ struct timespec t;
+ ASSERT(htrdr && work && probe_thieves);
+
+ if(htrdr->mpi_nprocs == 1) /* The process is alone. No thief is possible */
+ return;
+
+ t.tv_sec = 0;
+
+ /* Protect MPI calls of multiple invocations from concurrent threads */
+ #define P_MPI(Func) { \
+ mutex_lock(htrdr->mpi_mutex); \
+ MPI(Func); \
+ mutex_unlock(htrdr->mpi_mutex); \
+ } (void)0
+
+ while(ATOMIC_GET(probe_thieves)) {
+ MPI_Status status;
+ size_t itile;
+ int msg;
+
+ /* Probe if a steal request was submitted by any processes */
+ P_MPI(Iprobe(MPI_ANY_SOURCE, HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &msg,
+ &status));
+
+ if(msg) { /* A steal request was posted */
+ MPI_Request req;
+ uint8_t ntiles_to_steal;
+
+ /* Asynchronously receive the steal request */
+ P_MPI(Irecv(&ntiles_to_steal, 1, MPI_UINT8_T, status.MPI_SOURCE,
+ HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &req));
+
+ /* Wait for the completion of the steal request */
+ mpi_wait_for_request(htrdr, &req);
+
+ /* Thief some tiles */
+ FOR_EACH(itile, 0, ntiles_to_steal) {
+ tiles[itile] = proc_work_get_tile(work);
+ }
+ P_MPI(Send(&tiles, ntiles_to_steal, MPI_UINT32_T, status.MPI_SOURCE,
+ HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD));
+ }
+ t.tv_nsec = 500000000; /* 500ms */
+ nanosleep(&t, NULL);
+ }
+ #undef P_MPI
+}
+
+static int
+mpi_sample_working_process(struct htrdr* htrdr, struct ssp_rng* rng)
+{
+ int iproc, i;
+ int dst_rank;
+ ASSERT(htrdr && rng && htrdr->mpi_nworking_procs);
+
+ /* Sample the index of the 1st active process */
+ iproc = (int)(ssp_rng_canonical(rng) * (double)htrdr->mpi_nworking_procs);
+
+ /* Find the rank of the sampled active process. Use a simple linear search
+ * since the overall number of processes should be quite low; at most few
+ * dozens. */
+ i = 0;
+ FOR_EACH(dst_rank, 0, htrdr->mpi_nprocs) {
+ if(htrdr->mpi_working_procs[dst_rank] == 0) continue; /* Inactive process */
+ if(i == iproc) break; /* The rank of the sampled process is found */
+ ++i;
+ }
+ ASSERT(dst_rank < htrdr->mpi_nprocs);
+ return dst_rank;
+}
+
+/* Return the number of stolen tiles */
+static size_t
+mpi_steal_work
+ (struct htrdr* htrdr,
+ struct ssp_rng* rng,
+ struct proc_work* work)
+{
+ MPI_Request req;
+ size_t itile;
+ size_t nthieves = 0;
+ uint32_t tiles[UINT8_MAX]; /* Morton code of the stolen tile */
+ int proc_to_steal; /* Process to steal */
+ uint8_t ntiles_to_steal = MMIN((uint8_t)(htrdr->nthreads*2), 16);
+ ASSERT(htrdr && rng && work && htrdr->nthreads < UINT8_MAX);
+
+ /* Protect MPI calls of multiple invocations from concurrent threads */
+ #define P_MPI(Func) { \
+ mutex_lock(htrdr->mpi_mutex); \
+ MPI(Func); \
+ mutex_unlock(htrdr->mpi_mutex); \
+ } (void)0
+
+ /* No more working process => nohting to steal */
+ if(!htrdr->mpi_nworking_procs) return 0;
+
+ /* Sample a process to steal */
+ proc_to_steal = mpi_sample_working_process(htrdr, rng);
+
+ /* Send a steal request to the sampled process and wait for a response */
+ P_MPI(Send(&ntiles_to_steal, 1, MPI_UINT8_T, proc_to_steal,
+ HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD));
+
+ /* Receive the stolen tile from the sampled process */
+ P_MPI(Irecv(tiles, ntiles_to_steal, MPI_UINT32_T, proc_to_steal,
+ HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD, &req));
+
+ mpi_wait_for_request(htrdr, &req);
+
+ FOR_EACH(itile, 0, ntiles_to_steal) {
+ if(tiles[itile] == TILE_MCODE_NULL) {
+ ASSERT(htrdr->mpi_working_procs[proc_to_steal] != 0);
+ htrdr->mpi_working_procs[proc_to_steal] = 0;
+ htrdr->mpi_nworking_procs--;
+ break;
+ }
+ proc_work_add_tile(work, tiles[itile]);
+ ++nthieves;
+ }
+ #undef P_MPI
+ return nthieves;
+}
+
+static res_T
+mpi_gather_tiles
+ (struct htrdr* htrdr,
+ struct htrdr_buffer* buf,
+ const size_t ntiles,
+ struct list_node* tiles)
+{
+ /* Compute the size of the tile_data */
+ const size_t msg_sz =
+ sizeof(struct tile_data) - sizeof(struct htrdr_accum)/*dummy*/
+ + TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum)*3/*#channels*/;
+
+ struct list_node* node = NULL;
+ struct tile* tile = NULL;
+ res_T res = RES_OK;
+ ASSERT(htrdr && tiles);
+ ASSERT(htrdr->mpi_rank != 0 || buf);
+ (void)ntiles;
+
+ if(htrdr->mpi_rank != 0) { /* Non master process */
+ /* Send the computed tile to the master process */
+ LIST_FOR_EACH(node, tiles) {
+ struct tile* t = CONTAINER_OF(node, struct tile, node);
+ MPI(Send(&t->data, (int)msg_sz, MPI_CHAR, 0,
+ HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD));
+ }
+ } else { /* Master process */
+ size_t itile = 0;
+
+ LIST_FOR_EACH(node, tiles) {
+ struct tile* t = CONTAINER_OF(node, struct tile, node);
+ write_tile_data(buf, &t->data);
+ ++itile;
+ }
+
+ if(itile != ntiles) {
+ ASSERT(htrdr->mpi_nprocs > 1);
+
+ /* Create a temporary tile to receive the tile data computed by the
+ * concurrent MPI processes */
+ tile = tile_create(htrdr->allocator);
+ if(!tile) {
+ res = RES_MEM_ERR;
+ htrdr_log_err(htrdr,
+ "could not allocate the temporary tile used to gather the process "
+ "output data -- %s.\n", res_to_cstr(res));
+ goto error;
+ }
+
+ /* Receive the tile data of the concurret MPI processes */
+ FOR_EACH(itile, itile, ntiles) {
+ MPI(Recv(&tile->data, (int)msg_sz, MPI_CHAR, MPI_ANY_SOURCE,
+ HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE));
+ write_tile_data(buf, &tile->data);
+ }
+ }
+ }
+
+exit:
+ if(tile) tile_ref_put(tile);
+ return res;
+error:
+ goto exit;
+}
+
static res_T
draw_tile
(struct htrdr* htrdr,
@@ -53,11 +455,11 @@ draw_tile
const struct htrdr_camera* cam,
const size_t spp, /* #samples per pixel */
struct ssp_rng* rng,
- struct htrdr_buffer* buf)
+ struct tile* tile)
{
size_t npixels;
size_t mcode; /* Morton code of tile pixel */
- ASSERT(htrdr && tile_org && tile_sz && pix_sz && cam && spp && buf);
+ ASSERT(htrdr && tile_org && tile_sz && pix_sz && cam && spp && tile);
(void)tile_mcode;
/* Adjust the #pixels to process them wrt a morton order */
npixels = round_up_pow2(MMAX(tile_sz[0], tile_sz[1]));
@@ -74,13 +476,13 @@ draw_tile
ipix_tile[1] = morton2D_decode((uint32_t)(mcode>>1));
if(ipix_tile[1] >= tile_sz[1]) continue; /* Pixel is out of tile */
+ /* Fetch and reset the pixel accumulator */
+ pix_accums = tile_at(tile, ipix_tile[0], ipix_tile[1]);
+
/* Compute the pixel coordinate */
ipix[0] = tile_org[0] + ipix_tile[0];
ipix[1] = tile_org[1] + ipix_tile[1];
- /* Fetch and reset the pixel accumulator */
- pix_accums = htrdr_buffer_at(buf, ipix[0], ipix[1]);
-
FOR_EACH(ichannel, 0, 3) {
size_t isamp;
pix_accums[ichannel] = HTRDR_ACCUM_NULL;
@@ -133,131 +535,293 @@ draw_tile
return RES_OK;
}
-/*******************************************************************************
- * Local functions
- ******************************************************************************/
-res_T
-htrdr_draw_radiance_sw
+static res_T
+draw_image
(struct htrdr* htrdr,
const struct htrdr_camera* cam,
+ const size_t width, /* Image width */
+ const size_t height, /* Image height */
const size_t spp,
- struct htrdr_buffer* buf)
+ const size_t ntiles_x,
+ const size_t ntiles_y,
+ const size_t ntiles_adjusted,
+ const double pix_sz[2], /* Pixel size in the normalized image plane */
+ struct proc_work* work,
+ struct list_node* tiles)
{
- struct ssp_rng_proxy* rng_proxy = NULL;
- struct ssp_rng** rngs = NULL;
- size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted;
- size_t progress = 0;
- size_t i;
- int32_t mcode; /* Morton code of the tile */
- struct htrdr_buffer_layout layout;
- double pix_sz[2]; /* Pixel size in the normalized image plane */
+ struct ssp_rng* rng_proc = NULL;
+ MPI_Request req;
+ size_t nthreads = 0;
+ size_t nthieves = 0;
+ size_t proc_ntiles = 0;
ATOMIC nsolved_tiles = 0;
ATOMIC res = RES_OK;
- ASSERT(htrdr && cam && buf);
+ ASSERT(htrdr && cam && spp && ntiles_adjusted && work && tiles);
+ ASSERT(pix_sz && pix_sz[0] > 0 && pix_sz[1] > 0);
+ ASSERT(width && height);
+ (void)ntiles_x, (void)ntiles_y;
- htrdr_buffer_get_layout(buf, &layout);
- ASSERT(layout.width || layout.height || layout.elmt_size);
-
- if(layout.elmt_size != sizeof(struct htrdr_accum[3])/*#channels*/
- || layout.alignment < ALIGNOF(struct htrdr_accum[3])) {
- htrdr_log_err(htrdr,
- "%s: invalid buffer layout. "
- "The pixel size must be the size of 3 * accumulators.\n",
- FUNC_NAME);
- res = RES_BAD_ARG;
- goto error;
- }
-
- res = ssp_rng_proxy_create
- (htrdr->allocator, &ssp_rng_mt19937_64, htrdr->nthreads, &rng_proxy);
+ res = ssp_rng_create(htrdr->allocator, &ssp_rng_mt19937_64, &rng_proc);
if(res != RES_OK) {
- htrdr_log_err(htrdr, "%s: could not create the RNG proxy.\n", FUNC_NAME);
+ htrdr_log_err(htrdr, "could not create the RNG used to sample a process "
+ "to steal -- %s.\n", res_to_cstr((res_T)res));
goto error;
}
- rngs = MEM_CALLOC(htrdr->allocator, htrdr->nthreads, sizeof(*rngs));
- if(!rngs) {
- htrdr_log_err(htrdr, "%s: could not allocate the RNGs list.\n", FUNC_NAME);
- res = RES_MEM_ERR;
- goto error;
- }
+ proc_ntiles = proc_work_get_ntiles(work);
+ nthreads = MMIN(htrdr->nthreads, proc_ntiles);
- FOR_EACH(i, 0, htrdr->nthreads) {
- res = ssp_rng_proxy_create_rng(rng_proxy, i, &rngs[i]);
- if(res != RES_OK) {
- htrdr_log_err(htrdr,"%s: could not create the per thread RNG.\n", FUNC_NAME);
- goto error;
- }
- }
-
- ntiles_x = (layout.width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
- ntiles_y = (layout.height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
- ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y));
- ntiles_adjusted *= ntiles_adjusted;
- ntiles = ntiles_x * ntiles_y;
-
- pix_sz[0] = 1.0 / (double)layout.width;
- pix_sz[1] = 1.0 / (double)layout.height;
-
- fprintf(stderr, "Rendering: %3i%%", 0);
- fflush(stderr);
+ /* The process is not considered as a working process for himself */
+ htrdr->mpi_working_procs[htrdr->mpi_rank] = 0;
+ --htrdr->mpi_nworking_procs;
- omp_set_num_threads((int)htrdr->nthreads);
-
- #pragma omp parallel for schedule(static, 1/*chunck size*/)
- for(mcode=0; mcode<(int64_t)ntiles_adjusted; ++mcode) {
+ omp_set_num_threads((int)nthreads);
+ #pragma omp parallel
+ for(;;) {
const int ithread = omp_get_thread_num();
- struct ssp_rng* rng = rngs[ithread];
+ struct ssp_rng_proxy* rng_proxy = NULL;
+ struct ssp_rng* rng;
+ struct tile* tile;
+ uint32_t mcode = TILE_MCODE_NULL;
size_t tile_org[2];
size_t tile_sz[2];
- size_t pcent;
size_t n;
res_T res_local = RES_OK;
+ int32_t pcent;
+
+ /* Get a tile to draw */
+ #pragma omp critical
+ {
+ mcode = proc_work_get_tile(work);
+ if(mcode == TILE_MCODE_NULL) { /* No more work on this process */
+ /* Try to steal works to concurrent processes */
+ proc_work_reset(work);
+ nthieves = mpi_steal_work(htrdr, rng_proc, work);
+ if(nthieves != 0) {
+ mcode = proc_work_get_tile(work);
+ }
+ }
+ }
+ if(mcode == TILE_MCODE_NULL) break; /* No more work */
/* Decode the morton code to retrieve the tile index */
tile_org[0] = morton2D_decode((uint32_t)(mcode>>0));
- if(tile_org[0] >= ntiles_x) continue; /* Skip border tile */
tile_org[1] = morton2D_decode((uint32_t)(mcode>>1));
- if(tile_org[1] >= ntiles_y) continue; /* Skip border tile */
+ ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y);
+
+ /* Create the tile */
+ tile = tile_create(htrdr->allocator);
+ if(!tile) {
+ ATOMIC_SET(&res, RES_MEM_ERR);
+ htrdr_log_err(htrdr,
+ "could not allocate the memory space of the tile (%lu, %lu) -- %s.\n",
+ (unsigned long)tile_org[0], (unsigned long)tile_org[1],
+ res_to_cstr((res_T)ATOMIC_GET(&res)));
+ break;
+ }
+
+ /* Register the tile */
+ #pragma omp critical
+ list_add_tail(tiles, &tile->node);
+
+ tile->data.x = (uint16_t)tile_org[0];
+ tile->data.y = (uint16_t)tile_org[1];
/* Define the tile origin in pixel space */
tile_org[0] *= TILE_SIZE;
tile_org[1] *= TILE_SIZE;
/* Compute the size of the tile clamped by the borders of the buffer */
- tile_sz[0] = MMIN(TILE_SIZE, layout.width - tile_org[0]);
- tile_sz[1] = MMIN(TILE_SIZE, layout.height - tile_org[1]);
-
+ tile_sz[0] = MMIN(TILE_SIZE, width - tile_org[0]);
+ tile_sz[1] = MMIN(TILE_SIZE, height - tile_org[1]);
+
+ /* Create a proxy RNG for the current tile. This proxy is used for the
+ * current thread only and thus it has to manage only one RNG. This proxy
+ * is initialised in order to ensure that a unique and predictable set of
+ * random numbers is used for the current tile. */
+ SSP(rng_proxy_create2
+ (&htrdr->lifo_allocators[ithread],
+ &ssp_rng_threefry,
+ RNG_SEQUENCE_SIZE * (size_t)mcode, /* Offset */
+ RNG_SEQUENCE_SIZE, /* Size */
+ RNG_SEQUENCE_SIZE * (size_t)ntiles_adjusted, /* Pitch */
+ 1, &rng_proxy));
+ SSP(rng_proxy_create_rng(rng_proxy, 0, &rng));
+
+ /* Launch the tile rendering */
res_local = draw_tile(htrdr, (size_t)ithread, mcode, tile_org, tile_sz,
- pix_sz, cam, spp, rng, buf);
+ pix_sz, cam, spp, rng, tile);
+
+ SSP(rng_proxy_ref_put(rng_proxy));
+ SSP(rng_ref_put(rng));
+
if(res_local != RES_OK) {
ATOMIC_SET(&res, res_local);
- continue;
+ break;
}
+ /* Update the progress status */
n = (size_t)ATOMIC_INCR(&nsolved_tiles);
- pcent = n * 100 / ntiles;
+ pcent = (int32_t)((double)n * 100.0 / (double)proc_ntiles + 0.5/*round*/);
#pragma omp critical
- if(pcent > progress) {
- progress = pcent;
- fprintf(stderr, "%c[2K\rRendering: %3lu%%",
- 27, (unsigned long)pcent);
- fflush(stderr);
+ if(pcent > htrdr->mpi_progress_render[0]) {
+ htrdr->mpi_progress_render[0] = pcent;
+ if(htrdr->mpi_rank == 0) {
+ update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+ } else { /* Send the progress percentage to the master process */
+ send_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING, pcent);
+ }
}
+ }
+
+ if(ATOMIC_GET(&res) != RES_OK) goto error;
+
+ /* Synchronize the process */
+ mutex_lock(htrdr->mpi_mutex);
+ MPI(Ibarrier(MPI_COMM_WORLD, &req));
+ mutex_unlock(htrdr->mpi_mutex);
+
+ /* Wait for processes synchronization */
+ mpi_wait_for_request(htrdr, &req);
+
+exit:
+ if(rng_proc) SSP(rng_ref_put(rng_proc));
+ return (res_T)res;
+error:
+ goto exit;
+}
- if(ATOMIC_GET(&res) != RES_OK) continue;
+/*******************************************************************************
+ * Local functions
+ ******************************************************************************/
+res_T
+htrdr_draw_radiance_sw
+ (struct htrdr* htrdr,
+ const struct htrdr_camera* cam,
+ const size_t width,
+ const size_t height,
+ const size_t spp,
+ struct htrdr_buffer* buf)
+{
+ char strbuf[128];
+ struct time t0, t1;
+ struct list_node tiles;
+ size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted;
+ size_t itile;
+ struct proc_work work;
+ struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL;
+ size_t proc_ntiles_adjusted;
+ double pix_sz[2];
+ ATOMIC probe_thieves = 1;
+ ATOMIC res = RES_OK;
+ ASSERT(htrdr && cam && width && height);
+ ASSERT(htrdr->mpi_rank != 0 || buf);
+
+ list_init(&tiles);
+ proc_work_init(htrdr->allocator, &work);
+
+ if(htrdr->mpi_rank == 0) {
+ htrdr_buffer_get_layout(buf, &layout);
+ ASSERT(layout.width || layout.height || layout.elmt_size);
+ ASSERT(layout.width == width && layout.height == height);
+
+ if(layout.elmt_size != sizeof(struct htrdr_accum[3])/*#channels*/
+ || layout.alignment < ALIGNOF(struct htrdr_accum[3])) {
+ htrdr_log_err(htrdr,
+ "%s: invalid buffer layout. "
+ "The pixel size must be the size of 3 * accumulators.\n",
+ FUNC_NAME);
+ res = RES_BAD_ARG;
+ goto error;
+ }
}
- fprintf(stderr, "\n");
+
+ /* Compute the overall number of tiles */
+ ntiles_x = (width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
+ ntiles_y = (height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
+ ntiles = ntiles_x * ntiles_y;
+
+ /* Compute the pixel size in the normalized image plane */
+ pix_sz[0] = 1.0 / (double)width;
+ pix_sz[1] = 1.0 / (double)height;
+
+ /* Adjust the #tiles for the morton-encoding procedure */
+ ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y));
+ ntiles_adjusted *= ntiles_adjusted;
+
+ /* Define the initial number of tiles of the current process */
+ proc_ntiles_adjusted = ntiles_adjusted / (size_t)htrdr->mpi_nprocs;
+ if(htrdr->mpi_rank == 0) { /* Affect the remaining tiles to the master proc */
+ proc_ntiles_adjusted +=
+ ntiles_adjusted - proc_ntiles_adjusted*(size_t)htrdr->mpi_nprocs;
+ }
+
+ /* Define the initial list of tiles of the process */
+ FOR_EACH(itile, 0, proc_ntiles_adjusted) {
+ uint32_t mcode;
+ uint16_t tile_org[2];
+
+ mcode = (uint32_t)itile*(uint32_t)htrdr->mpi_nprocs
+ + (uint32_t)htrdr->mpi_rank;
+
+ tile_org[0] = morton2D_decode(mcode>>0);
+ if(tile_org[0] >= ntiles_x) continue;
+ tile_org[1] = morton2D_decode(mcode>>1);
+ if(tile_org[1] >= ntiles_y) continue;
+ proc_work_add_tile(&work, mcode);
+ }
+
+ if(htrdr->mpi_rank == 0) {
+ fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+ print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+ }
+
+ time_current(&t0);
+
+ omp_set_nested(1); /* Enable nested threads for draw_image */
+ #pragma omp parallel sections num_threads(2)
+ {
+ #pragma omp section
+ mpi_probe_thieves(htrdr, &work, &probe_thieves);
+
+ #pragma omp section
+ {
+ draw_image(htrdr, cam, width, height, spp, ntiles_x, ntiles_y,
+ ntiles_adjusted, pix_sz, &work, &tiles);
+ /* The processes have no more work to do. Stop probing for thieves */
+ ATOMIC_SET(&probe_thieves, 0);
+ }
+ }
+
+ if(htrdr->mpi_rank == 0) {
+ update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+ fprintf(stderr, "\n"); /* Add a new line after the progress statuses */
+ }
+
+ time_sub(&t0, time_current(&t1), &t0);
+ time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf));
+ htrdr_log(htrdr, "Rendering time: %s\n", strbuf);
+
+ /* Gather accum buffers from the group of processes */
+ time_current(&t0);
+ res = mpi_gather_tiles(htrdr, buf, ntiles, &tiles);
+ if(res != RES_OK) goto error;
+ time_sub(&t0, time_current(&t1), &t0);
+ time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf));
+ htrdr_log(htrdr, "Image gathering time: %s\n", strbuf);
exit:
- if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy));
- if(rngs) {
- FOR_EACH(i, 0, htrdr->nthreads) {
- if(rngs[i]) SSP(rng_ref_put(rngs[i]));
+ { /* Free allocated tiles */
+ struct list_node* node;
+ struct list_node* tmp;
+ LIST_FOR_EACH_SAFE(node, tmp, &tiles) {
+ struct tile* tile = CONTAINER_OF(node, struct tile, node);
+ list_del(node);
+ tile_ref_put(tile);
}
- MEM_RM(htrdr->allocator, rngs);
}
+ proc_work_release(&work);
return (res_T)res;
error:
goto exit;
diff --git a/src/htrdr_ground.c b/src/htrdr_ground.c
@@ -48,6 +48,7 @@ struct htrdr_ground {
struct s3d_scene_view* view;
float lower[3]; /* Ground lower bound */
float upper[3]; /* Ground upper bound */
+ double reflectivity;
int repeat; /* Make the ground infinite in X and Y */
struct htrdr* htrdr;
@@ -223,6 +224,7 @@ res_T
htrdr_ground_create
(struct htrdr* htrdr,
const char* obj_filename,
+ const double reflectivity,
const int repeat_ground, /* Infinitely repeat the ground in X and Y */
struct htrdr_ground** out_ground)
{
@@ -231,6 +233,7 @@ htrdr_ground_create
struct time t0, t1;
res_T res = RES_OK;
ASSERT(htrdr && obj_filename && out_ground);
+ ASSERT(reflectivity >= 0 || reflectivity <= 1);
ground = MEM_CALLOC(htrdr->allocator, 1, sizeof(*ground));
if(!ground) {
@@ -243,6 +246,7 @@ htrdr_ground_create
ref_init(&ground->ref);
ground->htrdr = htrdr;
ground->repeat = repeat_ground;
+ ground->reflectivity = reflectivity;
time_current(&t0);
res = setup_ground(ground, obj_filename);
@@ -276,6 +280,13 @@ htrdr_ground_ref_put(struct htrdr_ground* ground)
ref_put(&ground->ref, release_ground);
}
+double
+htrdr_ground_get_reflectivity(const struct htrdr_ground* ground)
+{
+ ASSERT(ground);
+ return ground->reflectivity;
+}
+
res_T
htrdr_ground_trace_ray
(struct htrdr_ground* ground,
diff --git a/src/htrdr_ground.h b/src/htrdr_ground.h
@@ -27,6 +27,7 @@ extern LOCAL_SYM res_T
htrdr_ground_create
(struct htrdr* htrdr,
const char* obj_filename,
+ const double reflectivity, /* In [0, 1] */
const int repeat_ground, /* Infinitely repeat the ground in X and Y */
struct htrdr_ground** ground);
@@ -38,6 +39,10 @@ extern LOCAL_SYM void
htrdr_ground_ref_put
(struct htrdr_ground* ground);
+extern LOCAL_SYM double
+htrdr_ground_get_reflectivity
+ (const struct htrdr_ground* ground);
+
extern LOCAL_SYM res_T
htrdr_ground_trace_ray
(struct htrdr_ground* ground,
diff --git a/src/htrdr_main.c b/src/htrdr_main.c
@@ -16,8 +16,21 @@
#include "htrdr.h"
#include "htrdr_args.h"
+#include <mpi.h>
#include <rsys/mem_allocator.h>
+static const char*
+thread_support_string(const int val)
+{
+ switch(val) {
+ case MPI_THREAD_SINGLE: return "MPI_THREAD_SINGLE";
+ case MPI_THREAD_FUNNELED: return "MPI_THREAD_FUNNELED";
+ case MPI_THREAD_SERIALIZED: return "MPI_THREAD_SERIALIZED";
+ case MPI_THREAD_MULTIPLE: return "MPI_THREAD_MULTIPLE";
+ default: FATAL("Unreachable code.\n"); break;
+ }
+}
+
/*******************************************************************************
* Program
******************************************************************************/
@@ -29,12 +42,32 @@ main(int argc, char** argv)
size_t memsz = 0;
int err = 0;
int is_htrdr_init = 0;
+ int thread_support = 0;
res_T res = RES_OK;
+ err = MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support);
+ if(err != MPI_SUCCESS) {
+ fprintf(stderr, "Error initializing MPI.\n");
+ goto error;
+ }
+
+ if(thread_support != MPI_THREAD_SERIALIZED) {
+ fprintf(stderr, "The provided MPI implementation does not support "
+ "serialized API calls from multiple threads. Provided thread support: "
+ "%s.\n", thread_support_string(thread_support));
+ goto error;
+ }
+
res = htrdr_args_init(&args, argc, argv);
if(res != RES_OK) goto error;
if(args.quit) goto exit;
+ if(args.dump_vtk) {
+ int rank;
+ CHK(MPI_Comm_rank(MPI_COMM_WORLD, &rank) == MPI_SUCCESS);
+ if(rank != 0) goto exit; /* Nothing to do except for the master process */
+ }
+
res = htrdr_init(NULL, &args, &htrdr);
if(res != RES_OK) goto error;
is_htrdr_init = 1;
@@ -43,6 +76,7 @@ main(int argc, char** argv)
if(res != RES_OK) goto error;
exit:
+ MPI_Finalize();
if(is_htrdr_init) htrdr_release(&htrdr);
htrdr_args_release(&args);
if((memsz = mem_allocated_size()) != 0) {
diff --git a/src/htrdr_sky.c b/src/htrdr_sky.c
@@ -38,6 +38,7 @@
#include <fcntl.h> /* open */
#include <libgen.h>
#include <math.h>
+#include <mpi.h>
#include <omp.h>
#include <string.h>
#include <sys/stat.h> /* mkdir */
@@ -65,6 +66,15 @@ struct split {
#define DARRAY_DATA struct split
#include <rsys/dynamic_array.h>
+struct spectral_data {
+ size_t iband; /* Index of the spectral band */
+ size_t iquad; /* Quadrature point into the band */
+};
+
+#define DARRAY_NAME specdata
+#define DARRAY_DATA struct spectral_data
+#include <rsys/dynamic_array.h>
+
struct build_tree_context {
const struct htrdr_sky* sky;
double vxsz[3];
@@ -1009,8 +1019,9 @@ setup_cloud_grid
/* Compute the overall number of voxels in the htrdr_grid */
ncells = definition[0] * definition[1] * definition[2];
- fprintf(stderr, "Generating cloud grid %lu.%lu: %3u%%", iband, iquad, 0);
- fflush(stderr);
+ htrdr_fprintf(sky->htrdr, stderr,
+ "Generating cloud grid %lu.%lu: %3u%%", iband, iquad, 0);
+ htrdr_fflush(sky->htrdr, stderr);
omp_set_num_threads((int)sky->htrdr->nthreads);
#pragma omp parallel for
@@ -1033,12 +1044,13 @@ setup_cloud_grid
#pragma omp critical
if(pcent > progress) {
progress = pcent;
- fprintf(stderr, "%c[2K\rGenerating cloud grid %lu.%lu: %3u%%",
+ htrdr_fprintf(sky->htrdr, stderr,
+ "%c[2K\rGenerating cloud grid %lu.%lu: %3u%%",
27, iband, iquad, (unsigned)pcent);
- fflush(stderr);
+ htrdr_fflush(sky->htrdr, stderr);
}
}
- fprintf(stderr, "\n");
+ htrdr_fprintf(sky->htrdr, stderr, "\n");
exit:
*out_grid = grid;
@@ -1063,16 +1075,20 @@ setup_clouds
const double optical_thickness_threshold,
const int force_cache_update)
{
- struct svx_voxel_desc vox_desc = SVX_VOXEL_DESC_NULL;
- struct build_tree_context ctx = BUILD_TREE_CONTEXT_NULL;
+ struct darray_specdata specdata;
size_t nvoxs[3];
+ double vxsz[3];
double low[3];
double upp[3];
+ int64_t ispecdata;
size_t nbands;
size_t i;
- res_T res = RES_OK;
+ ATOMIC nbuilt_octrees = 0;
+ ATOMIC res = RES_OK;
ASSERT(sky && sky->sw_bands && optical_thickness_threshold >= 0);
+ darray_specdata_init(sky->htrdr->allocator, &specdata);
+
res = htcp_get_desc(sky->htcp, &sky->htcp_desc);
if(res != RES_OK) {
htrdr_log_err(sky->htrdr, "could not retrieve the HTCP descriptor.\n");
@@ -1126,27 +1142,21 @@ setup_clouds
const double upp_z = (double)(iz + 1) * sky->lut_cell_sz + low[2];
darray_split_data_get(&sky->svx2htcp_z)[iz].index = iz2;
darray_split_data_get(&sky->svx2htcp_z)[iz].height = upp[2];
- if(upp_z >= upp[2]) upp[2] += sky->htcp_desc.vxsz_z[++iz2];
+ if(upp_z >= upp[2] && iz + 1 < nsplits) {
+ ASSERT(iz2 + 1 < sky->htcp_desc.spatial_definition[2]);
+ upp[2] += sky->htcp_desc.vxsz_z[++iz2];
+ }
}
ASSERT(eq_eps(upp[2] - low[2], len_z, 1.e-6));
}
/* Setup the build context */
- ctx.sky = sky;
- ctx.tau_threshold = optical_thickness_threshold;
- ctx.vxsz[0] = sky->htcp_desc.upper[0] - sky->htcp_desc.lower[0];
- ctx.vxsz[1] = sky->htcp_desc.upper[1] - sky->htcp_desc.lower[1];
- ctx.vxsz[2] = sky->htcp_desc.upper[2] - sky->htcp_desc.lower[2];
- ctx.vxsz[0] = ctx.vxsz[0] / (double)sky->htcp_desc.spatial_definition[0];
- ctx.vxsz[1] = ctx.vxsz[1] / (double)sky->htcp_desc.spatial_definition[1];
- ctx.vxsz[2] = ctx.vxsz[2] / (double)sky->htcp_desc.spatial_definition[2];
-
- /* Setup the voxel descriptor */
- vox_desc.get = cloud_vox_get;
- vox_desc.merge = cloud_vox_merge;
- vox_desc.challenge_merge = cloud_vox_challenge_merge;
- vox_desc.context = &ctx;
- vox_desc.size = sizeof(float) * NFLOATS_PER_VOXEL;
+ vxsz[0] = sky->htcp_desc.upper[0] - sky->htcp_desc.lower[0];
+ vxsz[1] = sky->htcp_desc.upper[1] - sky->htcp_desc.lower[1];
+ vxsz[2] = sky->htcp_desc.upper[2] - sky->htcp_desc.lower[2];
+ vxsz[0] = vxsz[0] / (double)sky->htcp_desc.spatial_definition[0];
+ vxsz[1] = vxsz[1] / (double)sky->htcp_desc.spatial_definition[1];
+ vxsz[2] = vxsz[2] / (double)sky->htcp_desc.spatial_definition[2];
/* Create as many cloud data structure than considered SW spectral bands */
nbands = htrdr_sky_get_sw_spectral_bands_count(sky);
@@ -1158,58 +1168,135 @@ setup_clouds
goto error;
}
+ /* Compute how many octree are going to be built */
FOR_EACH(i, 0, nbands) {
- size_t iquad;
struct htgop_spectral_interval band;
- ctx.iband = i + sky->sw_bands_range[0];
+ const size_t iband = i + sky->sw_bands_range[0];
+ size_t iquad;
- HTGOP(get_sw_spectral_interval(sky->htgop, ctx.iband, &band));
+ HTGOP(get_sw_spectral_interval(sky->htgop, iband, &band));
sky->clouds[i] = MEM_CALLOC(sky->htrdr->allocator,
band.quadrature_length, sizeof(*sky->clouds[i]));
if(!sky->clouds[i]) {
htrdr_log_err(sky->htrdr,
"could not create the list of per quadrature point cloud data "
- "for the band %lu.\n", (unsigned long)ctx.iband);
+ "for the band %lu.\n", (unsigned long)iband);
res = RES_MEM_ERR;
goto error;
}
- /* Build a cloud octree for each quadrature point of the considered
- * spectral band */
FOR_EACH(iquad, 0, band.quadrature_length) {
- ctx.quadrature_range[0] = iquad;
- ctx.quadrature_range[1] = iquad;
-
- /* Compute grid of voxels data */
- res = setup_cloud_grid(sky, nvoxs, ctx.iband, iquad, htcp_filename,
- htgop_filename, htmie_filename, force_cache_update, &ctx.grid);
- if(res != RES_OK) goto error;
-
- /* Create the octree */
- res = svx_octree_create(sky->htrdr->svx, low, upp, nvoxs, &vox_desc,
- &sky->clouds[i][iquad].octree);
+ struct spectral_data spectral_data;
+ spectral_data.iband = iband;
+ spectral_data.iquad = iquad;
+ res = darray_specdata_push_back(&specdata, &spectral_data);
if(res != RES_OK) {
- htrdr_log_err(sky->htrdr, "could not create the octree of the cloud "
- "properties for the band %lu.\n", (unsigned long)ctx.iband);
+ htrdr_log_err(sky->htrdr,
+ "could not register the quadrature point %lu of the spectral band "
+ "%lu .\n", (unsigned long)iband, (unsigned long)iquad);
goto error;
}
+ }
+ }
- /* Fetch the octree descriptor for future use */
- SVX(tree_get_desc
- (sky->clouds[i][iquad].octree, &sky->clouds[i][iquad].octree_desc));
+ /* Make the generation of the octrees parallel or sequential whether the
+ * cache is disabled or not respectively: when the cache is enabled, we
+ * parallelize the generation of the cached grids, not the building of the
+ * octrees. */
+ if(sky->htrdr->cache_grids) {
+ omp_set_num_threads(1);
+ } else {
+ omp_set_num_threads((int)sky->htrdr->nthreads);
+ if(sky->htrdr->mpi_rank == 0) {
+ fetch_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE);
+ print_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE);
+ }
+ }
+ #pragma omp parallel for schedule(dynamic, 1/*chunksize*/)
+ for(ispecdata=0;
+ (size_t)ispecdata<darray_specdata_size_get(&specdata);
+ ++ispecdata) {
+ struct svx_voxel_desc vox_desc = SVX_VOXEL_DESC_NULL;
+ struct build_tree_context ctx = BUILD_TREE_CONTEXT_NULL;
+ const size_t iband = darray_specdata_data_get(&specdata)[ispecdata].iband;
+ const size_t iquad = darray_specdata_data_get(&specdata)[ispecdata].iquad;
+ const size_t id = iband - sky->sw_bands_range[0];
+ int32_t pcent;
+ size_t n;
+ res_T res_local = RES_OK;
+
+ if(ATOMIC_GET(&res) != RES_OK) continue;
+
+ /* Setup the build context */
+ ctx.sky = sky;
+ ctx.vxsz[0] = vxsz[0];
+ ctx.vxsz[1] = vxsz[1];
+ ctx.vxsz[2] = vxsz[2];
+ ctx.tau_threshold = optical_thickness_threshold;
+ ctx.iband = iband;
+ ctx.quadrature_range[0] = iquad;
+ ctx.quadrature_range[1] = iquad;
+
+ /* Setup the voxel descriptor */
+ vox_desc.get = cloud_vox_get;
+ vox_desc.get = cloud_vox_get;
+ vox_desc.merge = cloud_vox_merge;
+ vox_desc.challenge_merge = cloud_vox_challenge_merge;
+ vox_desc.context = &ctx;
+ vox_desc.size = sizeof(float) * NFLOATS_PER_VOXEL;
+
+ /* Compute grid of voxels data */
+ if(sky->htrdr->cache_grids) {
+ res_local = setup_cloud_grid(sky, nvoxs, iband, iquad, htcp_filename,
+ htgop_filename, htmie_filename, force_cache_update, &ctx.grid);
+ if(res_local != RES_OK) continue;
+ }
- if(ctx.grid) {
- htrdr_grid_ref_put(ctx.grid);
- ctx.grid = NULL;
+ /* Create the octree */
+ res_local = svx_octree_create(sky->htrdr->svx, low, upp, nvoxs, &vox_desc,
+ &sky->clouds[id][iquad].octree);
+ if(ctx.grid) htrdr_grid_ref_put(ctx.grid);
+ if(res_local != RES_OK) {
+ htrdr_log_err(sky->htrdr, "could not create the octree of the cloud "
+ "properties for the band %lu.\n", (unsigned long)ctx.iband);
+ ATOMIC_SET(&res, res_local);
+ continue;
+ }
+
+ /* Fetch the octree descriptor for future use */
+ SVX(tree_get_desc
+ (sky->clouds[id][iquad].octree, &sky->clouds[id][iquad].octree_desc));
+
+ if(!sky->htrdr->cache_grids) {
+ /* Update the progress message */
+ n = (size_t)ATOMIC_INCR(&nbuilt_octrees);
+ pcent = (int32_t)(n * 100 / darray_specdata_size_get(&specdata));
+
+ #pragma omp critical
+ if(pcent > sky->htrdr->mpi_progress_octree[0]) {
+ sky->htrdr->mpi_progress_octree[0] = pcent;
+ if(sky->htrdr->mpi_rank == 0) {
+ update_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE);
+ } else { /* Send the progress percentage to the master process */
+ send_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE, pcent);
+ }
}
}
}
+ if(!sky->htrdr->cache_grids && sky->htrdr->mpi_rank == 0) {
+ while(total_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE) != 100) {
+ update_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE);
+ sleep(1);
+ }
+ fprintf(stderr, "\n"); /* Add a new line after the progress statuses */
+ }
+
exit:
- return res;
+ darray_specdata_release(&specdata);
+ return (res_T)res;
error:
- if(ctx.grid) htrdr_grid_ref_put(ctx.grid);
clean_clouds(sky);
darray_split_clear(&sky->svx2htcp_z);
goto exit;
diff --git a/src/htrdr_slab.c b/src/htrdr_slab.c
@@ -41,7 +41,7 @@ htrdr_slab_trace_ray
double cell_sz[3]; /* Size of a cell */
double t_max[3], t_delta[2], t_min_z;
size_t istep;
- int xy[2]; /* 2D index of the repeated cell */
+ int64_t xy[2]; /* 2D index of the repeated cell */
int incr[2]; /* Index increment */
res_T res = RES_OK;
ASSERT(htrdr && org && dir && range && cell_low && cell_upp && trace_cell);
@@ -64,8 +64,8 @@ htrdr_slab_trace_ray
* non duplicated cell */
pos[0] = org[0] + t_min_z*dir[0];
pos[1] = org[1] + t_min_z*dir[1];
- xy[0] = (int)floor((pos[0] - cell_low[0]) / cell_sz[0]);
- xy[1] = (int)floor((pos[1] - cell_low[1]) / cell_sz[1]);
+ xy[0] = (int64_t)floor((pos[0] - cell_low[0]) / cell_sz[0]);
+ xy[1] = (int64_t)floor((pos[1] - cell_low[1]) / cell_sz[1]);
/* Define the 2D index increment wrt dir sign */
incr[0] = dir[0] < 0 ? -1 : 1;
diff --git a/src/htrdr_solve.h b/src/htrdr_solve.h
@@ -48,8 +48,11 @@ extern LOCAL_SYM res_T
htrdr_draw_radiance_sw
(struct htrdr* htrdr,
const struct htrdr_camera* cam,
+ const size_t width, /* Image width */
+ const size_t height, /* Image height */
const size_t spp, /* #samples per pixel, i.e. #realisations */
- struct htrdr_buffer* buf); /* Buffer of struct htrdr_accum[3] */
+ /* Buffer of struct htrdr_accum[3]. May be NULL on on non master processes */
+ struct htrdr_buffer* buf);
extern LOCAL_SYM int
htrdr_ground_filter