htrdr

Solving radiative transfer in heterogeneous media
git clone git://git.meso-star.fr/htrdr.git
Log | Files | Refs | README | LICENSE

commit 6f6b95522a6e61a7af9ea6980b75fe8fd26e77cd
parent c7b47ff81cd070a3254159cdc5574528ee77a82f
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Thu,  8 Nov 2018 14:22:05 +0100

Merge branch 'feature_mpi'

Diffstat:
MREADME.md | 24++++++++++++++++++++----
Mcmake/CMakeLists.txt | 6+++++-
Msrc/htrdr.c | 460++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Msrc/htrdr.h | 43++++++++++++++++++++++++++++++++++++++++---
Msrc/htrdr_args.c | 18+++++++++++++++++-
Msrc/htrdr_args.h | 4++++
Msrc/htrdr_buffer.c | 1+
Msrc/htrdr_c.h | 51++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/htrdr_compute_radiance_sw.c | 3++-
Msrc/htrdr_draw_radiance_sw.c | 742+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Msrc/htrdr_ground.c | 11+++++++++++
Msrc/htrdr_ground.h | 5+++++
Msrc/htrdr_main.c | 34++++++++++++++++++++++++++++++++++
Msrc/htrdr_sky.c | 189+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Msrc/htrdr_slab.c | 6+++---
Msrc/htrdr_solve.h | 5++++-
16 files changed, 1407 insertions(+), 195 deletions(-)

diff --git a/README.md b/README.md @@ -1,7 +1,22 @@ # High-Tune: RenDeRer -This programs is used to test the implementation of radiative transfer -Monte-Carlo algorithms in cloudy atmopsheres. +This program is a part of the [High-Tune](http://www.umr-cnrm.fr/high-tune/) +project: it illustrates the implementation of efficient radiative transfer +Monte-Carlo algorithms in cloudy atmospheres. + +This program implements a rendering algorithm that computes the radiance in the +spectral interval [380, 780] nanometres that reaches an image through a pinhole +camera. The rendered scene is at least composed of an infinite 1D atmosphere +along the Z axis. Optionally, one can add 3D data describing the cloud +properties and/or a geometry describing the ground with a lambertian +reflectivity. The clouds and the ground, can be both infinitely repeated along +the X and Y axis. + +In addition of shared memory parallelism, htrdr supports the [*M*essage +*P*assing *I*nterface](https://www.mpi-forum.org/) specification to +parallelise its computations in a distribute memory environment; the HTRDR +binary can be run either directly or through a MPI process launcher like +`mpirun`. ## How to build @@ -17,8 +32,9 @@ on the [Star-3DAW](https://gitlab.com/meso-star/star-3daw/), [Star-SF](https://gitlab.com/meso-star/star-sf/), [Star-SP](https://gitlab.com/meso-star/stat-sp/) and -[Star-VX](https://gitlab.com/meso-star/star-vx/) libraries and on the -[OpenMP](http://www.openmp.org) 1.2 specification to parallelize its +[Star-VX](https://gitlab.com/meso-star/star-vx/) libraries and on +[OpenMP](http://www.openmp.org) 1.2 and the +[MPI](https://www.mpi-forum.org/) 2.0 specification to parallelize its computations. First ensure that CMake is installed on your system. Then install the RCMake diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -34,11 +34,14 @@ find_package(HTCP REQUIRED) find_package(HTGOP REQUIRED) find_package(HTMIE REQUIRED) find_package(OpenMP 1.2 REQUIRED) +find_package(MPI 2 REQUIRED) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${RCMAKE_SOURCE_DIR}) include(rcmake) include(rcmake_runtime) +set(CMAKE_C_COMPILER ${MPI_C_COMPILER}) + include_directories( ${RSys_INCLUDE_DIR} ${Star3D_INCLUDE_DIR} @@ -48,7 +51,8 @@ include_directories( ${StarVX_INCLUDE_DIR} ${HTCP_INCLUDE_DIR} ${HTGOP_INCLUDE_DIR} - ${HTMIE_INCLUDE_DIR}) + ${HTMIE_INCLUDE_DIR} + ${MPI_INCLUDE_PATH}) ################################################################################ # Configure and define targets diff --git a/src/htrdr.c b/src/htrdr.c @@ -25,7 +25,7 @@ #include "htrdr_sun.h" #include "htrdr_solve.h" -#include <rsys/clock_time.h> +#include <rsys/cstr.h> #include <rsys/mem_allocator.h> #include <rsys/str.h> @@ -39,10 +39,12 @@ #include <stdarg.h> #include <stdio.h> #include <unistd.h> +#include <time.h> #include <sys/time.h> /* timespec */ #include <sys/stat.h> /* S_IRUSR & S_IWUSR */ #include <omp.h> +#include <mpi.h> /******************************************************************************* * Helper functions @@ -243,6 +245,185 @@ spherical_to_cartesian_dir dir[2] = sin_elevation; } +static void +release_mpi(struct htrdr* htrdr) +{ + ASSERT(htrdr); + if(htrdr->mpi_working_procs) { + MEM_RM(htrdr->allocator, htrdr->mpi_working_procs); + htrdr->mpi_working_procs = NULL; + } + if(htrdr->mpi_progress_octree) { + MEM_RM(htrdr->allocator, htrdr->mpi_progress_octree); + htrdr->mpi_progress_octree = NULL; + } + if(htrdr->mpi_progress_render) { + MEM_RM(htrdr->allocator, htrdr->mpi_progress_render); + htrdr->mpi_progress_render = NULL; + } + if(htrdr->mpi_err_str) { + MEM_RM(htrdr->allocator, htrdr->mpi_err_str); + htrdr->mpi_err_str = NULL; + } + if(htrdr->mpi_mutex) { + mutex_destroy(htrdr->mpi_mutex); + htrdr->mpi_mutex = NULL; + } +} + +static res_T +mpi_print_proc_info(struct htrdr* htrdr) +{ + char proc_name[MPI_MAX_PROCESSOR_NAME]; + int proc_name_len; + char* proc_names = NULL; + uint32_t* proc_nthreads = NULL; + uint32_t nthreads = 0; + int iproc; + res_T res = RES_OK; + ASSERT(htrdr); + + if(htrdr->mpi_rank == 0) { + proc_names = MEM_CALLOC(htrdr->allocator, (size_t)htrdr->mpi_nprocs, + MPI_MAX_PROCESSOR_NAME*sizeof(*proc_names)); + if(!proc_names) { + res = RES_MEM_ERR; + htrdr_log_err(htrdr, + "could not allocate the temporary memory for MPI process names -- " + "%s.\n", res_to_cstr(res)); + goto error; + } + + proc_nthreads = MEM_CALLOC(htrdr->allocator, (size_t)htrdr->mpi_nprocs, + sizeof(*proc_nthreads)); + if(!proc_nthreads) { + res = RES_MEM_ERR; + htrdr_log_err(htrdr, + "could not allocate the temporary memory for the #threads of the MPI " + "processes -- %s.\n", res_to_cstr(res)); + goto error; + } + } + + /* Gather process name */ + MPI(Get_processor_name(proc_name, &proc_name_len)); + MPI(Gather(proc_name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, proc_names, + MPI_MAX_PROCESSOR_NAME, MPI_CHAR, 0, MPI_COMM_WORLD)); + + /* Gather process #threads */ + nthreads = (uint32_t)htrdr->nthreads; + MPI(Gather(&nthreads, 1, MPI_UINT32_T, proc_nthreads, 1, MPI_UINT32_T, 0, + MPI_COMM_WORLD)); + + if(htrdr->mpi_rank == 0) { + FOR_EACH(iproc, 0, htrdr->mpi_nprocs) { + htrdr_log(htrdr, "Process %d -- %s; #threads: %u\n", + iproc, proc_names + iproc*MPI_MAX_PROCESSOR_NAME, proc_nthreads[iproc]); + } + } + +exit: + if(proc_names) MEM_RM(htrdr->allocator, proc_names); + if(proc_nthreads) MEM_RM(htrdr->allocator, proc_nthreads); + return res; +error: + goto exit; +} + +static res_T +init_mpi(struct htrdr* htrdr) +{ + size_t n; + int err; + res_T res = RES_OK; + ASSERT(htrdr); + + htrdr->mpi_err_str = MEM_CALLOC + (htrdr->allocator, htrdr->nthreads, MPI_MAX_ERROR_STRING); + if(!htrdr->mpi_err_str) { + res = RES_MEM_ERR; + htrdr_log_err(htrdr, + "could not allocate the MPI error strings -- %s.\n", + res_to_cstr(res)); + goto error; + } + + err = MPI_Comm_rank(MPI_COMM_WORLD, &htrdr->mpi_rank); + if(err != MPI_SUCCESS) { + htrdr_log_err(htrdr, + "could not determine the MPI rank of the calling process -- %s.\n", + htrdr_mpi_error_string(htrdr, err)); + res = RES_UNKNOWN_ERR; + goto error; + } + + err = MPI_Comm_size(MPI_COMM_WORLD, &htrdr->mpi_nprocs); + if(err != MPI_SUCCESS) { + htrdr_log_err(htrdr, + "could retrieve the size of the MPI group -- %s.\n", + htrdr_mpi_error_string(htrdr, err)); + res = RES_UNKNOWN_ERR; + goto error; + } + + htrdr->mpi_working_procs = MEM_CALLOC(htrdr->allocator, + (size_t)htrdr->mpi_nprocs, sizeof(*htrdr->mpi_working_procs)); + if(!htrdr->mpi_working_procs) { + htrdr_log_err(htrdr, + "could not allocate the list of working processes.\n"); + res = RES_MEM_ERR; + goto error; + } + + /* Initialy, all the processes are working */ + htrdr->mpi_nworking_procs = (size_t)htrdr->mpi_nprocs; + memset(htrdr->mpi_working_procs, 0xFF, + htrdr->mpi_nworking_procs*sizeof(*htrdr->mpi_working_procs)); + + /* Allocate #processes progress statuses on the master process and only 1 + * progress status on the other ones: the master process will gather the + * status of the other processes to report their progression. */ + n = (size_t)(htrdr->mpi_rank == 0 ? htrdr->mpi_nprocs : 1); + + htrdr->mpi_progress_octree = MEM_CALLOC + (htrdr->allocator, n, sizeof(*htrdr->mpi_progress_octree)); + if(!htrdr->mpi_progress_octree) { + res = RES_MEM_ERR; + htrdr_log_err(htrdr, + "could not allocate the progress state of the octree building -- %s.\n", + res_to_cstr(res)); + goto error; + } + + htrdr->mpi_progress_render = MEM_CALLOC + (htrdr->allocator, n, sizeof(*htrdr->mpi_progress_render)); + if(!htrdr->mpi_progress_render) { + res = RES_MEM_ERR; + htrdr_log_err(htrdr, + "could not allocate the progress state of the scene rendering -- %s.\n", + res_to_cstr(res)); + goto error; + } + + htrdr->mpi_mutex = mutex_create(); + if(!htrdr->mpi_mutex) { + res = RES_MEM_ERR; + htrdr_log_err(htrdr, + "could not create the mutex to protect MPI calls from concurrent " + "threads -- %s.\n", res_to_cstr(res)); + goto error; + } + + if(htrdr->mpi_nprocs != 1) + mpi_print_proc_info(htrdr); + +exit: + return res; +error: + release_mpi(htrdr); + goto exit; +} + /******************************************************************************* * Local functions ******************************************************************************/ @@ -271,9 +452,20 @@ htrdr_init str_init(htrdr->allocator, &htrdr->output_name); htrdr->dump_vtk = args->dump_vtk; + htrdr->cache_grids = args->cache_grids; htrdr->verbose = args->verbose; htrdr->nthreads = MMIN(args->nthreads, (unsigned)omp_get_num_procs()); htrdr->spp = args->image.spp; + htrdr->width = args->image.definition[0]; + htrdr->height = args->image.definition[1]; + + res = init_mpi(htrdr); + if(res != RES_OK) goto error; + + if(htrdr->cache_grids && htrdr->mpi_nprocs != 1) { + htrdr_log_warn(htrdr, "cached grids are not supported in a MPI execution.\n"); + htrdr->cache_grids = 0; + } if(!args->output) { htrdr->output = stdout; @@ -287,14 +479,17 @@ htrdr_init res = str_set(&htrdr->output_name, output_name); if(res != RES_OK) { htrdr_log_err(htrdr, - "could not store the name of the output stream `%s'.\n", output_name); + "%s: could not store the name of the output stream `%s' -- %s.\n", + FUNC_NAME, output_name, res_to_cstr(res)); goto error; } res = svx_device_create (&htrdr->logger, htrdr->allocator, args->verbose, &htrdr->svx); if(res != RES_OK) { - htrdr_log_err(htrdr, "could not create the Star-VoXel device.\n"); + htrdr_log_err(htrdr, + "%s: could not create the Star-VoXel device -- %s.\n", + FUNC_NAME, res_to_cstr(res)); goto error; } @@ -304,12 +499,14 @@ htrdr_init res = s3d_device_create (&htrdr->logger, htrdr->allocator, 0, &htrdr->s3d); if(res != RES_OK) { - htrdr_log_err(htrdr, "could not create the Star-3D device.\n"); + htrdr_log_err(htrdr, + "%s: could not create the Star-3D device -- %s.\n", + FUNC_NAME, res_to_cstr(res)); goto error; } - res = htrdr_ground_create - (htrdr, args->filename_obj, args->repeat_ground, &htrdr->ground); + res = htrdr_ground_create(htrdr, args->filename_obj, + args->ground_reflectivity, args->repeat_ground, &htrdr->ground); if(res != RES_OK) goto error; proj_ratio = @@ -319,14 +516,18 @@ htrdr_init args->camera.up, proj_ratio, MDEG2RAD(args->camera.fov_y), &htrdr->cam); if(res != RES_OK) goto error; - res = htrdr_buffer_create(htrdr, - args->image.definition[0], /* Width */ - args->image.definition[1], /* Height */ - args->image.definition[0]*sizeof(struct htrdr_accum[3]), /* Pitch */ - sizeof(struct htrdr_accum[3]), - ALIGNOF(struct htrdr_accum[3]), /* Alignment */ - &htrdr->buf); - if(res != RES_OK) goto error; + /* Create the image buffer only on the master process; the image parts + * rendered by the processes are gathered onto the master process. */ + if(!htrdr->dump_vtk && htrdr->mpi_rank == 0) { + res = htrdr_buffer_create(htrdr, + args->image.definition[0], /* Width */ + args->image.definition[1], /* Height */ + args->image.definition[0]*sizeof(struct htrdr_accum[3]), /* Pitch */ + sizeof(struct htrdr_accum[3]), + ALIGNOF(struct htrdr_accum[3]), /* Alignment */ + &htrdr->buf); + if(res != RES_OK) goto error; + } res = htrdr_sun_create(htrdr, &htrdr->sun); if(res != RES_OK) goto error; @@ -342,24 +543,25 @@ htrdr_init htrdr->lifo_allocators = MEM_CALLOC (htrdr->allocator, htrdr->nthreads, sizeof(*htrdr->lifo_allocators)); if(!htrdr->lifo_allocators) { - htrdr_log_err(htrdr, - "could not allocate the list of per thread LIFO allocator.\n"); res = RES_MEM_ERR; + htrdr_log_err(htrdr, + "%s: could not allocate the list of per thread LIFO allocator -- %s.\n", + FUNC_NAME, res_to_cstr(res)); goto error; } FOR_EACH(ithread, 0, htrdr->nthreads) { res = mem_init_lifo_allocator - (&htrdr->lifo_allocators[ithread], htrdr->allocator, 4096); + (&htrdr->lifo_allocators[ithread], htrdr->allocator, 16384); if(res != RES_OK) { htrdr_log_err(htrdr, - "could not initialise the LIFO allocator of the thread %lu.\n", - (unsigned long)ithread); + "%s: could not initialise the LIFO allocator of the thread %lu -- %s.\n", + FUNC_NAME, (unsigned long)ithread, res_to_cstr(res)); goto error; } } - exit: +exit: return res; error: htrdr_release(htrdr); @@ -370,6 +572,7 @@ void htrdr_release(struct htrdr* htrdr) { ASSERT(htrdr); + release_mpi(htrdr); if(htrdr->s3d) S3D(device_ref_put(htrdr->s3d)); if(htrdr->svx) SVX(device_ref_put(htrdr->svx)); if(htrdr->ground) htrdr_ground_ref_put(htrdr->ground); @@ -395,6 +598,10 @@ htrdr_run(struct htrdr* htrdr) if(htrdr->dump_vtk) { const size_t nbands = htrdr_sky_get_sw_spectral_bands_count(htrdr->sky); size_t i; + + /* Nothing to do */ + if(htrdr->mpi_rank != 0) goto exit; + FOR_EACH(i, 0, nbands) { const size_t iband = htrdr_sky_get_sw_spectral_band_id(htrdr->sky, i); const size_t nquads = htrdr_sky_get_sw_spectral_band_quadrature_length @@ -407,19 +614,14 @@ htrdr_run(struct htrdr* htrdr) } } } else { - struct time t0, t1; - char buf[128]; - - time_current(&t0); - res = htrdr_draw_radiance_sw(htrdr, htrdr->cam, htrdr->spp, htrdr->buf); - if(res != RES_OK) goto error; - time_sub(&t0, time_current(&t1), &t0); - time_dump(&t0, TIME_ALL, NULL, buf, sizeof(buf)); - htrdr_log(htrdr, "Rendering time: %s\n", buf); - - res = dump_accum_buffer - (htrdr, htrdr->buf, str_cget(&htrdr->output_name), htrdr->output); + res = htrdr_draw_radiance_sw(htrdr, htrdr->cam, htrdr->width, + htrdr->height, htrdr->spp, htrdr->buf); if(res != RES_OK) goto error; + if(htrdr->mpi_rank == 0) { + res = dump_accum_buffer + (htrdr, htrdr->buf, str_cget(&htrdr->output_name), htrdr->output); + if(res != RES_OK) goto error; + } } exit: return res; @@ -430,11 +632,14 @@ error: void htrdr_log(struct htrdr* htrdr, const char* msg, ...) { - va_list vargs_list; ASSERT(htrdr && msg); - va_start(vargs_list, msg); - log_msg(htrdr, LOG_OUTPUT, msg, vargs_list); - va_end(vargs_list); + /* Log standard message only on master process */ + if(htrdr->mpi_rank == 0) { + va_list vargs_list; + va_start(vargs_list, msg); + log_msg(htrdr, LOG_OUTPUT, msg, vargs_list); + va_end(vargs_list); + } } void @@ -442,6 +647,7 @@ htrdr_log_err(struct htrdr* htrdr, const char* msg, ...) { va_list vargs_list; ASSERT(htrdr && msg); + /* Log errors on all processes */ va_start(vargs_list, msg); log_msg(htrdr, LOG_ERROR, msg, vargs_list); va_end(vargs_list); @@ -450,11 +656,48 @@ htrdr_log_err(struct htrdr* htrdr, const char* msg, ...) void htrdr_log_warn(struct htrdr* htrdr, const char* msg, ...) { - va_list vargs_list; ASSERT(htrdr && msg); - va_start(vargs_list, msg); - log_msg(htrdr, LOG_WARNING, msg, vargs_list); - va_end(vargs_list); + /* Log warnings only on master process */ + if(htrdr->mpi_rank == 0) { + va_list vargs_list; + va_start(vargs_list, msg); + log_msg(htrdr, LOG_WARNING, msg, vargs_list); + va_end(vargs_list); + } +} + +const char* +htrdr_mpi_error_string(struct htrdr* htrdr, const int mpi_err) +{ + const int ithread = omp_get_thread_num(); + char* str; + int strlen_err; + int err; + ASSERT(htrdr && (size_t)ithread < htrdr->nthreads); + str = htrdr->mpi_err_str + ithread*MPI_MAX_ERROR_STRING; + err = MPI_Error_string(mpi_err, str, &strlen_err); + return err == MPI_SUCCESS ? str : "Invalid MPI error"; +} + +void +htrdr_fprintf(struct htrdr* htrdr, FILE* stream, const char* msg, ...) +{ + ASSERT(htrdr && msg); + if(htrdr->mpi_rank == 0) { + va_list vargs_list; + va_start(vargs_list, msg); + vfprintf(stream, msg, vargs_list); + va_end(vargs_list); + } +} + +void +htrdr_fflush(struct htrdr* htrdr, FILE* stream) +{ + ASSERT(htrdr); + if(htrdr->mpi_rank == 0) { + fflush(stream); + } } /******************************************************************************* @@ -642,3 +885,140 @@ exit: error: goto exit; } + +void +send_mpi_progress + (struct htrdr* htrdr, const enum htrdr_mpi_message msg, const int32_t percent) +{ + ASSERT(htrdr); + ASSERT(msg == HTRDR_MPI_PROGRESS_RENDERING + || msg == HTRDR_MPI_PROGRESS_BUILD_OCTREE); + (void)htrdr; + mutex_lock(htrdr->mpi_mutex); + MPI(Send(&percent, 1, MPI_INT32_T, 0, msg, MPI_COMM_WORLD)); + mutex_unlock(htrdr->mpi_mutex); +} + +void +fetch_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_message msg) +{ + struct timespec t; + int32_t* progress = NULL; + int iproc; + ASSERT(htrdr && htrdr->mpi_rank == 0); + + t.tv_sec = 0; + t.tv_nsec = 10000000; /* 10ms */ + + switch(msg) { + case HTRDR_MPI_PROGRESS_BUILD_OCTREE: + progress = htrdr->mpi_progress_octree; + break; + case HTRDR_MPI_PROGRESS_RENDERING: + progress = htrdr->mpi_progress_render; + break; + default: FATAL("Unreachable code.\n"); break; + } + + FOR_EACH(iproc, 1, htrdr->mpi_nprocs) { + /* Flush the last sent percentage of the process `iproc' */ + for(;;) { + MPI_Request req; + int flag; + int complete; + + mutex_lock(htrdr->mpi_mutex); + MPI(Iprobe(iproc, msg, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE)); + mutex_unlock(htrdr->mpi_mutex); + + if(flag == 0) break; /* No more message */ + + mutex_lock(htrdr->mpi_mutex); + MPI(Irecv(&progress[iproc], 1, MPI_INT32_T, iproc, msg, MPI_COMM_WORLD, &req)); + mutex_unlock(htrdr->mpi_mutex); + for(;;) { + mutex_lock(htrdr->mpi_mutex); + MPI(Test(&req, &complete, MPI_STATUS_IGNORE)); + mutex_unlock(htrdr->mpi_mutex); + if(complete) break; + nanosleep(&t, NULL); + } + } + } +} + +void +print_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_message msg) +{ + ASSERT(htrdr && htrdr->mpi_rank == 0); + + if(htrdr->mpi_nprocs == 1) { + switch(msg) { + case HTRDR_MPI_PROGRESS_BUILD_OCTREE: + htrdr_fprintf(htrdr, stderr, "\033[2K\rBuilding octree: %3d%%", + htrdr->mpi_progress_octree[0]); + break; + case HTRDR_MPI_PROGRESS_RENDERING: + htrdr_fprintf(htrdr, stderr, "\033[2K\rRendering: %3d%%", + htrdr->mpi_progress_render[0]); + break; + default: FATAL("Unreachable code.\n"); break; + } + htrdr_fflush(htrdr, stderr); + } else { + int iproc; + FOR_EACH(iproc, 0, htrdr->mpi_nprocs) { + switch(msg) { + case HTRDR_MPI_PROGRESS_BUILD_OCTREE: + htrdr_fprintf(htrdr, stderr, + "\033[2K\rProcess %d -- building octree: %3d%%%c", + iproc, htrdr->mpi_progress_octree[iproc], + iproc == htrdr->mpi_nprocs - 1 ? '\r' : '\n'); + break; + case HTRDR_MPI_PROGRESS_RENDERING: + htrdr_fprintf(htrdr, stderr, + "\033[2K\rProcess %d -- rendering: %3d%%%c", + iproc, htrdr->mpi_progress_render[iproc], + iproc == htrdr->mpi_nprocs - 1 ? '\r' : '\n'); + break; + default: FATAL("Unreachable code.\n"); break; + } + } + } +} + +void +clear_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_message msg) +{ + ASSERT(htrdr); + (void)msg; + if(htrdr->mpi_nprocs > 1) { + htrdr_fprintf(htrdr, stderr, "\033[%dA", htrdr->mpi_nprocs-1); + } +} + +int +total_mpi_progress(const struct htrdr* htrdr, const enum htrdr_mpi_message msg) +{ + const int* progress = NULL; + int total = 0; + int iproc; + ASSERT(htrdr && htrdr->mpi_rank == 0); + + switch(msg) { + case HTRDR_MPI_PROGRESS_BUILD_OCTREE: + progress = htrdr->mpi_progress_octree; + break; + case HTRDR_MPI_PROGRESS_RENDERING: + progress = htrdr->mpi_progress_render; + break; + default: FATAL("Unreachable code.\n"); break; + } + + FOR_EACH(iproc, 0, htrdr->mpi_nprocs) { + total += progress[iproc]; + } + total = total / htrdr->mpi_nprocs; + return total; +} + diff --git a/src/htrdr.h b/src/htrdr.h @@ -35,6 +35,7 @@ struct htrdr_buffer; struct htrdr_sky; struct htrdr_rectangle; struct mem_allocator; +struct mutext; struct s3d_device; struct s3d_scene; struct ssf_bsdf; @@ -52,13 +53,28 @@ struct htrdr { struct htrdr_camera* cam; struct htrdr_buffer* buf; size_t spp; /* #samples per pixel */ + size_t width; /* Image width */ + size_t height; /* Image height */ FILE* output; struct str output_name; - unsigned nthreads; - int dump_vtk; - int verbose; + unsigned nthreads; /* #threads of the process */ + int dump_vtk; /* Dump octree VTK */ + int cache_grids; /* Use/Precompute grid caches */ + int verbose; /* Verbosity level */ + + int mpi_rank; /* Rank of the process in the MPI group */ + int mpi_nprocs; /* Overall #processes in the MPI group */ + char* mpi_err_str; /* Temp buffer used to store MPI error string */ + int8_t* mpi_working_procs; /* Define the rank of active processes */ + size_t mpi_nworking_procs; + + /* Process progress percentage */ + int32_t* mpi_progress_octree; + int32_t* mpi_progress_render; + + struct mutex* mpi_mutex; /* Protect MPI calls from concurrent threads */ struct logger logger; struct mem_allocator* allocator; @@ -109,5 +125,26 @@ htrdr_log_warn #endif ; +extern LOCAL_SYM const char* +htrdr_mpi_error_string + (struct htrdr* htrdr, + const int mpi_err); + +extern LOCAL_SYM void +htrdr_fprintf + (struct htrdr* htrdr, + FILE* stream, + const char* msg, + ...) +#ifdef COMPILER_GCC + __attribute((format(printf, 3, 4))) +#endif + ; + +extern LOCAL_SYM void +htrdr_fflush + (struct htrdr* htrdr, + FILE* stream); + #endif /* HTRDR_H */ diff --git a/src/htrdr_args.c b/src/htrdr_args.c @@ -46,10 +46,19 @@ print_help(const char* cmd) printf( " -d dump octree data to OUTPUT wrt the VTK ASCII file format.\n"); printf( +" -e ground reflectivity in [0, 1]. By default its value is `%g'.\n", + HTRDR_ARGS_DEFAULT.ground_reflectivity); + printf( " -f overwrite the OUTPUT file if it already exists.\n"); printf( " -g FILENAME path of an OBJ file representing the ground geometry.\n"); printf( +" -G precompute/use grids of raw cloud opitical properties built\n" +" from the HTCP file, the atmospheric profile and the Mie's\n" +" data. If the corresponding grids were generated in a\n" +" previous run, reuse them as far as it is possible, i.e. if\n" +" the HTCP, Mie and atmospheric files were not updated.\n"); + printf( " -h display this help and exit.\n"); printf( " -i <image> define the image to compute.\n"); @@ -320,7 +329,7 @@ htrdr_args_init(struct htrdr_args* args, int argc, char** argv) *args = HTRDR_ARGS_DEFAULT; - while((opt = getopt(argc, argv, "a:C:c:D:dfg:hi:m:o:RrT:t:v")) != -1) { + while((opt = getopt(argc, argv, "a:C:c:D:de:fGg:hi:m:o:RrT:t:v")) != -1) { switch(opt) { case 'a': args->filename_gas = optarg; break; case 'C': @@ -330,7 +339,14 @@ htrdr_args_init(struct htrdr_args* args, int argc, char** argv) case 'c': args->filename_les = optarg; break; case 'D': res = parse_sun_dir(args, optarg); break; case 'd': args->dump_vtk = 1; break; + case 'e': + res = cstr_to_double(optarg, &args->ground_reflectivity); + if(args->ground_reflectivity < 0 || args->ground_reflectivity > 1) { + res = RES_BAD_ARG; + } + break; case 'f': args->force_overwriting = 1; break; + case 'G': args->cache_grids = 1; break; case 'g': args->filename_obj = optarg; break; case 'h': print_help(argv[0]); diff --git a/src/htrdr_args.h b/src/htrdr_args.h @@ -47,10 +47,12 @@ struct htrdr_args { double sun_azimuth; /* In degrees */ double sun_elevation; /* In degrees */ double optical_thickness; /* Threshold used during octree building */ + double ground_reflectivity; /* Reflectivity of the ground */ unsigned nthreads; /* Hint on the number of threads to use */ int force_overwriting; int dump_vtk; /* Dump the loaded cloud properties in a VTK file */ + int cache_grids; /* Use grid caching mechanism */ int verbose; /* Verbosity level */ int repeat_clouds; /* Make the clouds infinite in X and Y */ int repeat_ground; /* Make the ground infinite in X and Y */ @@ -80,9 +82,11 @@ struct htrdr_args { 0, /* Sun azimuth */ \ 90, /* Sun elevation */ \ 1.0, /* Optical thickness */ \ + 0.5, /* Ground reflectivity */ \ (unsigned)~0, /* #threads */ \ 0, /* Force overwriting */ \ 0, /* dump VTK */ \ + 0, /* Grid cache */ \ 0, /* Verbose flag */ \ 0, /* Repeat clouds */ \ 0, /* Repeat ground */ \ diff --git a/src/htrdr_buffer.c b/src/htrdr_buffer.c @@ -163,3 +163,4 @@ htrdr_buffer_at(struct htrdr_buffer* buf, const size_t x, const size_t y) ASSERT(buf && x < buf->width && y < buf->height); return buf->mem + y*buf->pitch + x*buf->elmtsz; } + diff --git a/src/htrdr_c.h b/src/htrdr_c.h @@ -18,6 +18,20 @@ #include <rsys/rsys.h> +#ifndef NDEBUG + #define MPI(Func) ASSERT(MPI_##Func == MPI_SUCCESS) +#else + #define MPI(Func) MPI_##Func +#endif + +enum htrdr_mpi_message { + HTRDR_MPI_PROGRESS_BUILD_OCTREE, + HTRDR_MPI_PROGRESS_RENDERING, + HTRDR_MPI_STEAL_REQUEST, + HTRDR_MPI_WORK_STEALING, + HTRDR_MPI_TILE_DATA +}; + struct htrdr; #define SW_WAVELENGTH_MIN 380 /* In nanometer */ @@ -96,7 +110,7 @@ is_file_updated extern LOCAL_SYM res_T update_file_stamp - (struct htrdr* htrdr, + (struct htrdr* htrdr, const char* filename); extern LOCAL_SYM res_T @@ -104,5 +118,40 @@ create_directory (struct htrdr* htrdt, const char* path); +extern LOCAL_SYM void +send_mpi_progress + (struct htrdr* htrdr, + const enum htrdr_mpi_message progress, + const int32_t percent); + +extern LOCAL_SYM void +fetch_mpi_progress + (struct htrdr* htrdr, + const enum htrdr_mpi_message progress); + +extern LOCAL_SYM void +print_mpi_progress + (struct htrdr* htrdr, + const enum htrdr_mpi_message progress); + +extern LOCAL_SYM void +clear_mpi_progress + (struct htrdr* htrdr, + const enum htrdr_mpi_message progress); + +extern int32_t +total_mpi_progress + (const struct htrdr* htrdr, + const enum htrdr_mpi_message progress); + +static INLINE void +update_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_message progress) +{ + ASSERT(htrdr); + fetch_mpi_progress(htrdr, progress); + clear_mpi_progress(htrdr, progress); + print_mpi_progress(htrdr, progress); +} + #endif /* HTRDR_C_H */ diff --git a/src/htrdr_compute_radiance_sw.c b/src/htrdr_compute_radiance_sw.c @@ -285,7 +285,8 @@ htrdr_compute_radiance_sw CHK(RES_OK == ssf_phase_create (&htrdr->lifo_allocators[ithread], &ssf_phase_rayleigh, &phase_rayleigh)); - SSF(lambertian_reflection_setup(bsdf, 0.5)); + SSF(lambertian_reflection_setup + (bsdf, htrdr_ground_get_reflectivity(htrdr->ground))); /* Setup the phase function for this spectral band & quadrature point */ g = htrdr_sky_fetch_particle_phase_function_asymmetry_parameter diff --git a/src/htrdr_draw_radiance_sw.c b/src/htrdr_draw_radiance_sw.c @@ -20,14 +20,44 @@ #include "htrdr_sky.h" #include "htrdr_solve.h" +#include <rsys/clock_time.h> +#include <rsys/cstr.h> +#include <rsys/dynamic_array_u32.h> +#include <rsys/math.h> +#include <rsys/mutex.h> #include <star/ssp.h> #include <omp.h> -#include <rsys/math.h> +#include <mpi.h> +#include <time.h> +#include <unistd.h> + +#define RNG_SEQUENCE_SIZE 10000 -#define TILE_SIZE 32 /* definition in X & Y of a tile */ +#define TILE_MCODE_NULL UINT32_MAX +#define TILE_SIZE 32 /* Definition in X & Y of a tile */ STATIC_ASSERT(IS_POW2(TILE_SIZE), TILE_SIZE_must_be_a_power_of_2); +/* Tile of row ordered image pixels */ +struct tile { + struct list_node node; + struct mem_allocator* allocator; + ref_T ref; + + struct tile_data { + uint16_t x, y; /* 2D coordinates of the tile in tile space */ + /* Simulate the flexible array member of the C99 standard. */ + struct htrdr_accum accums[1/*dummy element*/]; + } data; +}; + +/* List of tile to compute onto the MPI process. */ +struct proc_work { + struct mutex* mutex; + struct darray_u32 tiles; /* #tiles to render */ + size_t itile; /* Next tile to render in the above list of tiles */ +}; + /******************************************************************************* * Helper functions ******************************************************************************/ @@ -42,6 +72,378 @@ morton2D_decode(const uint32_t u32) return (uint16_t)x; } +static FINLINE uint32_t +morton2D_encode(const uint16_t u16) +{ + uint32_t u32 = u16; + u32 = (u32 | (u32 << 8)) & 0x00FF00FF; + u32 = (u32 | (u32 << 4)) & 0X0F0F0F0F; + u32 = (u32 | (u32 << 2)) & 0x33333333; + u32 = (u32 | (u32 << 1)) & 0x55555555; + return u32; +} + +static FINLINE struct tile* +tile_create(struct mem_allocator* allocator) +{ + struct tile* tile; + const size_t tile_sz = + sizeof(struct tile) - sizeof(struct htrdr_accum)/*rm dummy accum*/; + const size_t buf_sz = /* Flexiblbe array element */ + TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum)*3/*#channels*/; + ASSERT(allocator); + + tile = MEM_ALLOC(allocator, tile_sz+buf_sz); + if(!tile) return NULL; + + ref_init(&tile->ref); + list_init(&tile->node); + tile->allocator = allocator; + ASSERT(IS_ALIGNED(&tile->data.accums, ALIGNOF(struct htrdr_accum))); + + return tile; +} + +static INLINE void +tile_ref_get(struct tile* tile) +{ + ASSERT(tile); + tile_ref_get(tile); +} + +static INLINE void +release_tile(ref_T* ref) +{ + struct tile* tile = CONTAINER_OF(ref, struct tile, ref); + ASSERT(ref); + MEM_RM(tile->allocator, tile); +} + +static INLINE void +tile_ref_put(struct tile* tile) +{ + ASSERT(tile); + ref_put(&tile->ref, release_tile); +} + +static FINLINE struct htrdr_accum* +tile_at + (struct tile* tile, + const size_t x, /* In tile space */ + const size_t y) /* In tile space */ +{ + ASSERT(tile && x < TILE_SIZE && y < TILE_SIZE); + return tile->data.accums + (y*TILE_SIZE + x)*3/*#channels*/; +} + +static void +write_tile_data(struct htrdr_buffer* buf, const struct tile_data* tile_data) +{ + struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL; + size_t icol, irow; + size_t irow_tile; + size_t ncols_tile, nrows_tile; + char* buf_mem; + ASSERT(buf && tile_data); + + htrdr_buffer_get_layout(buf, &layout); + buf_mem = htrdr_buffer_get_data(buf); + + /* Compute the row/column of the tile origin into the buffer */ + icol = tile_data->x * (size_t)TILE_SIZE; + irow = tile_data->y * (size_t)TILE_SIZE; + + /* Define the number of tile row/columns to write into the buffer */ + ncols_tile = MMIN(icol + TILE_SIZE, layout.width) - icol; + nrows_tile = MMIN(irow + TILE_SIZE, layout.height) - irow; + + /* Copy the tile data, row by row */ + FOR_EACH(irow_tile, 0, nrows_tile) { + char* buf_row = buf_mem + (irow + irow_tile) * layout.pitch; + const struct htrdr_accum* tile_row = + tile_data->accums + irow_tile*TILE_SIZE*3/*#channels*/; + memcpy(buf_row + icol*sizeof(struct htrdr_accum)*3, tile_row, + ncols_tile*sizeof(struct htrdr_accum)*3/*#channels*/); + } +} + +static INLINE void +proc_work_init(struct mem_allocator* allocator, struct proc_work* work) +{ + ASSERT(work); + darray_u32_init(allocator, &work->tiles); + work->itile = 0; + CHK(work->mutex = mutex_create()); +} + +static INLINE void +proc_work_release(struct proc_work* work) +{ + darray_u32_release(&work->tiles); + mutex_destroy(work->mutex); +} + +static INLINE void +proc_work_reset(struct proc_work* work) +{ + ASSERT(work); + mutex_lock(work->mutex); + darray_u32_clear(&work->tiles); + work->itile = 0; + mutex_unlock(work->mutex); +} + +static INLINE void +proc_work_add_tile(struct proc_work* work, const uint32_t mcode) +{ + mutex_lock(work->mutex); + CHK(darray_u32_push_back(&work->tiles, &mcode) == RES_OK); + mutex_unlock(work->mutex); +} + +static INLINE uint32_t +proc_work_get_tile(struct proc_work* work) +{ + uint32_t mcode; + ASSERT(work); + mutex_lock(work->mutex); + if(work->itile >= darray_u32_size_get(&work->tiles)) { + mcode = TILE_MCODE_NULL; + } else { + mcode = darray_u32_cdata_get(&work->tiles)[work->itile]; + ++work->itile; + } + mutex_unlock(work->mutex); + return mcode; +} + +static INLINE size_t +proc_work_get_ntiles(struct proc_work* work) +{ + size_t sz = 0; + ASSERT(work); + mutex_lock(work->mutex); + sz = darray_u32_size_get(&work->tiles); + mutex_unlock(work->mutex); + return sz; +} + +static void +mpi_wait_for_request(struct htrdr* htrdr, MPI_Request* req) +{ + ASSERT(htrdr && req); + + /* Wait for process synchronisation */ + for(;;) { + struct timespec t; + int complete; + t.tv_sec = 0; + t.tv_nsec = 10000000; /* 10ms */ + + mutex_lock(htrdr->mpi_mutex); + MPI(Test(req, &complete, MPI_STATUS_IGNORE)); + mutex_unlock(htrdr->mpi_mutex); + if(complete) break; + + nanosleep(&t, NULL); + } +} + +static void +mpi_probe_thieves + (struct htrdr* htrdr, + struct proc_work* work, + ATOMIC* probe_thieves) +{ + uint32_t tiles[UINT8_MAX]; + struct timespec t; + ASSERT(htrdr && work && probe_thieves); + + if(htrdr->mpi_nprocs == 1) /* The process is alone. No thief is possible */ + return; + + t.tv_sec = 0; + + /* Protect MPI calls of multiple invocations from concurrent threads */ + #define P_MPI(Func) { \ + mutex_lock(htrdr->mpi_mutex); \ + MPI(Func); \ + mutex_unlock(htrdr->mpi_mutex); \ + } (void)0 + + while(ATOMIC_GET(probe_thieves)) { + MPI_Status status; + size_t itile; + int msg; + + /* Probe if a steal request was submitted by any processes */ + P_MPI(Iprobe(MPI_ANY_SOURCE, HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &msg, + &status)); + + if(msg) { /* A steal request was posted */ + MPI_Request req; + uint8_t ntiles_to_steal; + + /* Asynchronously receive the steal request */ + P_MPI(Irecv(&ntiles_to_steal, 1, MPI_UINT8_T, status.MPI_SOURCE, + HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &req)); + + /* Wait for the completion of the steal request */ + mpi_wait_for_request(htrdr, &req); + + /* Thief some tiles */ + FOR_EACH(itile, 0, ntiles_to_steal) { + tiles[itile] = proc_work_get_tile(work); + } + P_MPI(Send(&tiles, ntiles_to_steal, MPI_UINT32_T, status.MPI_SOURCE, + HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD)); + } + t.tv_nsec = 500000000; /* 500ms */ + nanosleep(&t, NULL); + } + #undef P_MPI +} + +static int +mpi_sample_working_process(struct htrdr* htrdr, struct ssp_rng* rng) +{ + int iproc, i; + int dst_rank; + ASSERT(htrdr && rng && htrdr->mpi_nworking_procs); + + /* Sample the index of the 1st active process */ + iproc = (int)(ssp_rng_canonical(rng) * (double)htrdr->mpi_nworking_procs); + + /* Find the rank of the sampled active process. Use a simple linear search + * since the overall number of processes should be quite low; at most few + * dozens. */ + i = 0; + FOR_EACH(dst_rank, 0, htrdr->mpi_nprocs) { + if(htrdr->mpi_working_procs[dst_rank] == 0) continue; /* Inactive process */ + if(i == iproc) break; /* The rank of the sampled process is found */ + ++i; + } + ASSERT(dst_rank < htrdr->mpi_nprocs); + return dst_rank; +} + +/* Return the number of stolen tiles */ +static size_t +mpi_steal_work + (struct htrdr* htrdr, + struct ssp_rng* rng, + struct proc_work* work) +{ + MPI_Request req; + size_t itile; + size_t nthieves = 0; + uint32_t tiles[UINT8_MAX]; /* Morton code of the stolen tile */ + int proc_to_steal; /* Process to steal */ + uint8_t ntiles_to_steal = MMIN((uint8_t)(htrdr->nthreads*2), 16); + ASSERT(htrdr && rng && work && htrdr->nthreads < UINT8_MAX); + + /* Protect MPI calls of multiple invocations from concurrent threads */ + #define P_MPI(Func) { \ + mutex_lock(htrdr->mpi_mutex); \ + MPI(Func); \ + mutex_unlock(htrdr->mpi_mutex); \ + } (void)0 + + /* No more working process => nohting to steal */ + if(!htrdr->mpi_nworking_procs) return 0; + + /* Sample a process to steal */ + proc_to_steal = mpi_sample_working_process(htrdr, rng); + + /* Send a steal request to the sampled process and wait for a response */ + P_MPI(Send(&ntiles_to_steal, 1, MPI_UINT8_T, proc_to_steal, + HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD)); + + /* Receive the stolen tile from the sampled process */ + P_MPI(Irecv(tiles, ntiles_to_steal, MPI_UINT32_T, proc_to_steal, + HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD, &req)); + + mpi_wait_for_request(htrdr, &req); + + FOR_EACH(itile, 0, ntiles_to_steal) { + if(tiles[itile] == TILE_MCODE_NULL) { + ASSERT(htrdr->mpi_working_procs[proc_to_steal] != 0); + htrdr->mpi_working_procs[proc_to_steal] = 0; + htrdr->mpi_nworking_procs--; + break; + } + proc_work_add_tile(work, tiles[itile]); + ++nthieves; + } + #undef P_MPI + return nthieves; +} + +static res_T +mpi_gather_tiles + (struct htrdr* htrdr, + struct htrdr_buffer* buf, + const size_t ntiles, + struct list_node* tiles) +{ + /* Compute the size of the tile_data */ + const size_t msg_sz = + sizeof(struct tile_data) - sizeof(struct htrdr_accum)/*dummy*/ + + TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum)*3/*#channels*/; + + struct list_node* node = NULL; + struct tile* tile = NULL; + res_T res = RES_OK; + ASSERT(htrdr && tiles); + ASSERT(htrdr->mpi_rank != 0 || buf); + (void)ntiles; + + if(htrdr->mpi_rank != 0) { /* Non master process */ + /* Send the computed tile to the master process */ + LIST_FOR_EACH(node, tiles) { + struct tile* t = CONTAINER_OF(node, struct tile, node); + MPI(Send(&t->data, (int)msg_sz, MPI_CHAR, 0, + HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD)); + } + } else { /* Master process */ + size_t itile = 0; + + LIST_FOR_EACH(node, tiles) { + struct tile* t = CONTAINER_OF(node, struct tile, node); + write_tile_data(buf, &t->data); + ++itile; + } + + if(itile != ntiles) { + ASSERT(htrdr->mpi_nprocs > 1); + + /* Create a temporary tile to receive the tile data computed by the + * concurrent MPI processes */ + tile = tile_create(htrdr->allocator); + if(!tile) { + res = RES_MEM_ERR; + htrdr_log_err(htrdr, + "could not allocate the temporary tile used to gather the process " + "output data -- %s.\n", res_to_cstr(res)); + goto error; + } + + /* Receive the tile data of the concurret MPI processes */ + FOR_EACH(itile, itile, ntiles) { + MPI(Recv(&tile->data, (int)msg_sz, MPI_CHAR, MPI_ANY_SOURCE, + HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE)); + write_tile_data(buf, &tile->data); + } + } + } + +exit: + if(tile) tile_ref_put(tile); + return res; +error: + goto exit; +} + static res_T draw_tile (struct htrdr* htrdr, @@ -53,11 +455,11 @@ draw_tile const struct htrdr_camera* cam, const size_t spp, /* #samples per pixel */ struct ssp_rng* rng, - struct htrdr_buffer* buf) + struct tile* tile) { size_t npixels; size_t mcode; /* Morton code of tile pixel */ - ASSERT(htrdr && tile_org && tile_sz && pix_sz && cam && spp && buf); + ASSERT(htrdr && tile_org && tile_sz && pix_sz && cam && spp && tile); (void)tile_mcode; /* Adjust the #pixels to process them wrt a morton order */ npixels = round_up_pow2(MMAX(tile_sz[0], tile_sz[1])); @@ -74,13 +476,13 @@ draw_tile ipix_tile[1] = morton2D_decode((uint32_t)(mcode>>1)); if(ipix_tile[1] >= tile_sz[1]) continue; /* Pixel is out of tile */ + /* Fetch and reset the pixel accumulator */ + pix_accums = tile_at(tile, ipix_tile[0], ipix_tile[1]); + /* Compute the pixel coordinate */ ipix[0] = tile_org[0] + ipix_tile[0]; ipix[1] = tile_org[1] + ipix_tile[1]; - /* Fetch and reset the pixel accumulator */ - pix_accums = htrdr_buffer_at(buf, ipix[0], ipix[1]); - FOR_EACH(ichannel, 0, 3) { size_t isamp; pix_accums[ichannel] = HTRDR_ACCUM_NULL; @@ -133,131 +535,293 @@ draw_tile return RES_OK; } -/******************************************************************************* - * Local functions - ******************************************************************************/ -res_T -htrdr_draw_radiance_sw +static res_T +draw_image (struct htrdr* htrdr, const struct htrdr_camera* cam, + const size_t width, /* Image width */ + const size_t height, /* Image height */ const size_t spp, - struct htrdr_buffer* buf) + const size_t ntiles_x, + const size_t ntiles_y, + const size_t ntiles_adjusted, + const double pix_sz[2], /* Pixel size in the normalized image plane */ + struct proc_work* work, + struct list_node* tiles) { - struct ssp_rng_proxy* rng_proxy = NULL; - struct ssp_rng** rngs = NULL; - size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted; - size_t progress = 0; - size_t i; - int32_t mcode; /* Morton code of the tile */ - struct htrdr_buffer_layout layout; - double pix_sz[2]; /* Pixel size in the normalized image plane */ + struct ssp_rng* rng_proc = NULL; + MPI_Request req; + size_t nthreads = 0; + size_t nthieves = 0; + size_t proc_ntiles = 0; ATOMIC nsolved_tiles = 0; ATOMIC res = RES_OK; - ASSERT(htrdr && cam && buf); + ASSERT(htrdr && cam && spp && ntiles_adjusted && work && tiles); + ASSERT(pix_sz && pix_sz[0] > 0 && pix_sz[1] > 0); + ASSERT(width && height); + (void)ntiles_x, (void)ntiles_y; - htrdr_buffer_get_layout(buf, &layout); - ASSERT(layout.width || layout.height || layout.elmt_size); - - if(layout.elmt_size != sizeof(struct htrdr_accum[3])/*#channels*/ - || layout.alignment < ALIGNOF(struct htrdr_accum[3])) { - htrdr_log_err(htrdr, - "%s: invalid buffer layout. " - "The pixel size must be the size of 3 * accumulators.\n", - FUNC_NAME); - res = RES_BAD_ARG; - goto error; - } - - res = ssp_rng_proxy_create - (htrdr->allocator, &ssp_rng_mt19937_64, htrdr->nthreads, &rng_proxy); + res = ssp_rng_create(htrdr->allocator, &ssp_rng_mt19937_64, &rng_proc); if(res != RES_OK) { - htrdr_log_err(htrdr, "%s: could not create the RNG proxy.\n", FUNC_NAME); + htrdr_log_err(htrdr, "could not create the RNG used to sample a process " + "to steal -- %s.\n", res_to_cstr((res_T)res)); goto error; } - rngs = MEM_CALLOC(htrdr->allocator, htrdr->nthreads, sizeof(*rngs)); - if(!rngs) { - htrdr_log_err(htrdr, "%s: could not allocate the RNGs list.\n", FUNC_NAME); - res = RES_MEM_ERR; - goto error; - } + proc_ntiles = proc_work_get_ntiles(work); + nthreads = MMIN(htrdr->nthreads, proc_ntiles); - FOR_EACH(i, 0, htrdr->nthreads) { - res = ssp_rng_proxy_create_rng(rng_proxy, i, &rngs[i]); - if(res != RES_OK) { - htrdr_log_err(htrdr,"%s: could not create the per thread RNG.\n", FUNC_NAME); - goto error; - } - } - - ntiles_x = (layout.width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; - ntiles_y = (layout.height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; - ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y)); - ntiles_adjusted *= ntiles_adjusted; - ntiles = ntiles_x * ntiles_y; - - pix_sz[0] = 1.0 / (double)layout.width; - pix_sz[1] = 1.0 / (double)layout.height; - - fprintf(stderr, "Rendering: %3i%%", 0); - fflush(stderr); + /* The process is not considered as a working process for himself */ + htrdr->mpi_working_procs[htrdr->mpi_rank] = 0; + --htrdr->mpi_nworking_procs; - omp_set_num_threads((int)htrdr->nthreads); - - #pragma omp parallel for schedule(static, 1/*chunck size*/) - for(mcode=0; mcode<(int64_t)ntiles_adjusted; ++mcode) { + omp_set_num_threads((int)nthreads); + #pragma omp parallel + for(;;) { const int ithread = omp_get_thread_num(); - struct ssp_rng* rng = rngs[ithread]; + struct ssp_rng_proxy* rng_proxy = NULL; + struct ssp_rng* rng; + struct tile* tile; + uint32_t mcode = TILE_MCODE_NULL; size_t tile_org[2]; size_t tile_sz[2]; - size_t pcent; size_t n; res_T res_local = RES_OK; + int32_t pcent; + + /* Get a tile to draw */ + #pragma omp critical + { + mcode = proc_work_get_tile(work); + if(mcode == TILE_MCODE_NULL) { /* No more work on this process */ + /* Try to steal works to concurrent processes */ + proc_work_reset(work); + nthieves = mpi_steal_work(htrdr, rng_proc, work); + if(nthieves != 0) { + mcode = proc_work_get_tile(work); + } + } + } + if(mcode == TILE_MCODE_NULL) break; /* No more work */ /* Decode the morton code to retrieve the tile index */ tile_org[0] = morton2D_decode((uint32_t)(mcode>>0)); - if(tile_org[0] >= ntiles_x) continue; /* Skip border tile */ tile_org[1] = morton2D_decode((uint32_t)(mcode>>1)); - if(tile_org[1] >= ntiles_y) continue; /* Skip border tile */ + ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y); + + /* Create the tile */ + tile = tile_create(htrdr->allocator); + if(!tile) { + ATOMIC_SET(&res, RES_MEM_ERR); + htrdr_log_err(htrdr, + "could not allocate the memory space of the tile (%lu, %lu) -- %s.\n", + (unsigned long)tile_org[0], (unsigned long)tile_org[1], + res_to_cstr((res_T)ATOMIC_GET(&res))); + break; + } + + /* Register the tile */ + #pragma omp critical + list_add_tail(tiles, &tile->node); + + tile->data.x = (uint16_t)tile_org[0]; + tile->data.y = (uint16_t)tile_org[1]; /* Define the tile origin in pixel space */ tile_org[0] *= TILE_SIZE; tile_org[1] *= TILE_SIZE; /* Compute the size of the tile clamped by the borders of the buffer */ - tile_sz[0] = MMIN(TILE_SIZE, layout.width - tile_org[0]); - tile_sz[1] = MMIN(TILE_SIZE, layout.height - tile_org[1]); - + tile_sz[0] = MMIN(TILE_SIZE, width - tile_org[0]); + tile_sz[1] = MMIN(TILE_SIZE, height - tile_org[1]); + + /* Create a proxy RNG for the current tile. This proxy is used for the + * current thread only and thus it has to manage only one RNG. This proxy + * is initialised in order to ensure that a unique and predictable set of + * random numbers is used for the current tile. */ + SSP(rng_proxy_create2 + (&htrdr->lifo_allocators[ithread], + &ssp_rng_threefry, + RNG_SEQUENCE_SIZE * (size_t)mcode, /* Offset */ + RNG_SEQUENCE_SIZE, /* Size */ + RNG_SEQUENCE_SIZE * (size_t)ntiles_adjusted, /* Pitch */ + 1, &rng_proxy)); + SSP(rng_proxy_create_rng(rng_proxy, 0, &rng)); + + /* Launch the tile rendering */ res_local = draw_tile(htrdr, (size_t)ithread, mcode, tile_org, tile_sz, - pix_sz, cam, spp, rng, buf); + pix_sz, cam, spp, rng, tile); + + SSP(rng_proxy_ref_put(rng_proxy)); + SSP(rng_ref_put(rng)); + if(res_local != RES_OK) { ATOMIC_SET(&res, res_local); - continue; + break; } + /* Update the progress status */ n = (size_t)ATOMIC_INCR(&nsolved_tiles); - pcent = n * 100 / ntiles; + pcent = (int32_t)((double)n * 100.0 / (double)proc_ntiles + 0.5/*round*/); #pragma omp critical - if(pcent > progress) { - progress = pcent; - fprintf(stderr, "%c[2K\rRendering: %3lu%%", - 27, (unsigned long)pcent); - fflush(stderr); + if(pcent > htrdr->mpi_progress_render[0]) { + htrdr->mpi_progress_render[0] = pcent; + if(htrdr->mpi_rank == 0) { + update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); + } else { /* Send the progress percentage to the master process */ + send_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING, pcent); + } } + } + + if(ATOMIC_GET(&res) != RES_OK) goto error; + + /* Synchronize the process */ + mutex_lock(htrdr->mpi_mutex); + MPI(Ibarrier(MPI_COMM_WORLD, &req)); + mutex_unlock(htrdr->mpi_mutex); + + /* Wait for processes synchronization */ + mpi_wait_for_request(htrdr, &req); + +exit: + if(rng_proc) SSP(rng_ref_put(rng_proc)); + return (res_T)res; +error: + goto exit; +} - if(ATOMIC_GET(&res) != RES_OK) continue; +/******************************************************************************* + * Local functions + ******************************************************************************/ +res_T +htrdr_draw_radiance_sw + (struct htrdr* htrdr, + const struct htrdr_camera* cam, + const size_t width, + const size_t height, + const size_t spp, + struct htrdr_buffer* buf) +{ + char strbuf[128]; + struct time t0, t1; + struct list_node tiles; + size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted; + size_t itile; + struct proc_work work; + struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL; + size_t proc_ntiles_adjusted; + double pix_sz[2]; + ATOMIC probe_thieves = 1; + ATOMIC res = RES_OK; + ASSERT(htrdr && cam && width && height); + ASSERT(htrdr->mpi_rank != 0 || buf); + + list_init(&tiles); + proc_work_init(htrdr->allocator, &work); + + if(htrdr->mpi_rank == 0) { + htrdr_buffer_get_layout(buf, &layout); + ASSERT(layout.width || layout.height || layout.elmt_size); + ASSERT(layout.width == width && layout.height == height); + + if(layout.elmt_size != sizeof(struct htrdr_accum[3])/*#channels*/ + || layout.alignment < ALIGNOF(struct htrdr_accum[3])) { + htrdr_log_err(htrdr, + "%s: invalid buffer layout. " + "The pixel size must be the size of 3 * accumulators.\n", + FUNC_NAME); + res = RES_BAD_ARG; + goto error; + } } - fprintf(stderr, "\n"); + + /* Compute the overall number of tiles */ + ntiles_x = (width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; + ntiles_y = (height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; + ntiles = ntiles_x * ntiles_y; + + /* Compute the pixel size in the normalized image plane */ + pix_sz[0] = 1.0 / (double)width; + pix_sz[1] = 1.0 / (double)height; + + /* Adjust the #tiles for the morton-encoding procedure */ + ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y)); + ntiles_adjusted *= ntiles_adjusted; + + /* Define the initial number of tiles of the current process */ + proc_ntiles_adjusted = ntiles_adjusted / (size_t)htrdr->mpi_nprocs; + if(htrdr->mpi_rank == 0) { /* Affect the remaining tiles to the master proc */ + proc_ntiles_adjusted += + ntiles_adjusted - proc_ntiles_adjusted*(size_t)htrdr->mpi_nprocs; + } + + /* Define the initial list of tiles of the process */ + FOR_EACH(itile, 0, proc_ntiles_adjusted) { + uint32_t mcode; + uint16_t tile_org[2]; + + mcode = (uint32_t)itile*(uint32_t)htrdr->mpi_nprocs + + (uint32_t)htrdr->mpi_rank; + + tile_org[0] = morton2D_decode(mcode>>0); + if(tile_org[0] >= ntiles_x) continue; + tile_org[1] = morton2D_decode(mcode>>1); + if(tile_org[1] >= ntiles_y) continue; + proc_work_add_tile(&work, mcode); + } + + if(htrdr->mpi_rank == 0) { + fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); + print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); + } + + time_current(&t0); + + omp_set_nested(1); /* Enable nested threads for draw_image */ + #pragma omp parallel sections num_threads(2) + { + #pragma omp section + mpi_probe_thieves(htrdr, &work, &probe_thieves); + + #pragma omp section + { + draw_image(htrdr, cam, width, height, spp, ntiles_x, ntiles_y, + ntiles_adjusted, pix_sz, &work, &tiles); + /* The processes have no more work to do. Stop probing for thieves */ + ATOMIC_SET(&probe_thieves, 0); + } + } + + if(htrdr->mpi_rank == 0) { + update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); + fprintf(stderr, "\n"); /* Add a new line after the progress statuses */ + } + + time_sub(&t0, time_current(&t1), &t0); + time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf)); + htrdr_log(htrdr, "Rendering time: %s\n", strbuf); + + /* Gather accum buffers from the group of processes */ + time_current(&t0); + res = mpi_gather_tiles(htrdr, buf, ntiles, &tiles); + if(res != RES_OK) goto error; + time_sub(&t0, time_current(&t1), &t0); + time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf)); + htrdr_log(htrdr, "Image gathering time: %s\n", strbuf); exit: - if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy)); - if(rngs) { - FOR_EACH(i, 0, htrdr->nthreads) { - if(rngs[i]) SSP(rng_ref_put(rngs[i])); + { /* Free allocated tiles */ + struct list_node* node; + struct list_node* tmp; + LIST_FOR_EACH_SAFE(node, tmp, &tiles) { + struct tile* tile = CONTAINER_OF(node, struct tile, node); + list_del(node); + tile_ref_put(tile); } - MEM_RM(htrdr->allocator, rngs); } + proc_work_release(&work); return (res_T)res; error: goto exit; diff --git a/src/htrdr_ground.c b/src/htrdr_ground.c @@ -48,6 +48,7 @@ struct htrdr_ground { struct s3d_scene_view* view; float lower[3]; /* Ground lower bound */ float upper[3]; /* Ground upper bound */ + double reflectivity; int repeat; /* Make the ground infinite in X and Y */ struct htrdr* htrdr; @@ -223,6 +224,7 @@ res_T htrdr_ground_create (struct htrdr* htrdr, const char* obj_filename, + const double reflectivity, const int repeat_ground, /* Infinitely repeat the ground in X and Y */ struct htrdr_ground** out_ground) { @@ -231,6 +233,7 @@ htrdr_ground_create struct time t0, t1; res_T res = RES_OK; ASSERT(htrdr && obj_filename && out_ground); + ASSERT(reflectivity >= 0 || reflectivity <= 1); ground = MEM_CALLOC(htrdr->allocator, 1, sizeof(*ground)); if(!ground) { @@ -243,6 +246,7 @@ htrdr_ground_create ref_init(&ground->ref); ground->htrdr = htrdr; ground->repeat = repeat_ground; + ground->reflectivity = reflectivity; time_current(&t0); res = setup_ground(ground, obj_filename); @@ -276,6 +280,13 @@ htrdr_ground_ref_put(struct htrdr_ground* ground) ref_put(&ground->ref, release_ground); } +double +htrdr_ground_get_reflectivity(const struct htrdr_ground* ground) +{ + ASSERT(ground); + return ground->reflectivity; +} + res_T htrdr_ground_trace_ray (struct htrdr_ground* ground, diff --git a/src/htrdr_ground.h b/src/htrdr_ground.h @@ -27,6 +27,7 @@ extern LOCAL_SYM res_T htrdr_ground_create (struct htrdr* htrdr, const char* obj_filename, + const double reflectivity, /* In [0, 1] */ const int repeat_ground, /* Infinitely repeat the ground in X and Y */ struct htrdr_ground** ground); @@ -38,6 +39,10 @@ extern LOCAL_SYM void htrdr_ground_ref_put (struct htrdr_ground* ground); +extern LOCAL_SYM double +htrdr_ground_get_reflectivity + (const struct htrdr_ground* ground); + extern LOCAL_SYM res_T htrdr_ground_trace_ray (struct htrdr_ground* ground, diff --git a/src/htrdr_main.c b/src/htrdr_main.c @@ -16,8 +16,21 @@ #include "htrdr.h" #include "htrdr_args.h" +#include <mpi.h> #include <rsys/mem_allocator.h> +static const char* +thread_support_string(const int val) +{ + switch(val) { + case MPI_THREAD_SINGLE: return "MPI_THREAD_SINGLE"; + case MPI_THREAD_FUNNELED: return "MPI_THREAD_FUNNELED"; + case MPI_THREAD_SERIALIZED: return "MPI_THREAD_SERIALIZED"; + case MPI_THREAD_MULTIPLE: return "MPI_THREAD_MULTIPLE"; + default: FATAL("Unreachable code.\n"); break; + } +} + /******************************************************************************* * Program ******************************************************************************/ @@ -29,12 +42,32 @@ main(int argc, char** argv) size_t memsz = 0; int err = 0; int is_htrdr_init = 0; + int thread_support = 0; res_T res = RES_OK; + err = MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support); + if(err != MPI_SUCCESS) { + fprintf(stderr, "Error initializing MPI.\n"); + goto error; + } + + if(thread_support != MPI_THREAD_SERIALIZED) { + fprintf(stderr, "The provided MPI implementation does not support " + "serialized API calls from multiple threads. Provided thread support: " + "%s.\n", thread_support_string(thread_support)); + goto error; + } + res = htrdr_args_init(&args, argc, argv); if(res != RES_OK) goto error; if(args.quit) goto exit; + if(args.dump_vtk) { + int rank; + CHK(MPI_Comm_rank(MPI_COMM_WORLD, &rank) == MPI_SUCCESS); + if(rank != 0) goto exit; /* Nothing to do except for the master process */ + } + res = htrdr_init(NULL, &args, &htrdr); if(res != RES_OK) goto error; is_htrdr_init = 1; @@ -43,6 +76,7 @@ main(int argc, char** argv) if(res != RES_OK) goto error; exit: + MPI_Finalize(); if(is_htrdr_init) htrdr_release(&htrdr); htrdr_args_release(&args); if((memsz = mem_allocated_size()) != 0) { diff --git a/src/htrdr_sky.c b/src/htrdr_sky.c @@ -38,6 +38,7 @@ #include <fcntl.h> /* open */ #include <libgen.h> #include <math.h> +#include <mpi.h> #include <omp.h> #include <string.h> #include <sys/stat.h> /* mkdir */ @@ -65,6 +66,15 @@ struct split { #define DARRAY_DATA struct split #include <rsys/dynamic_array.h> +struct spectral_data { + size_t iband; /* Index of the spectral band */ + size_t iquad; /* Quadrature point into the band */ +}; + +#define DARRAY_NAME specdata +#define DARRAY_DATA struct spectral_data +#include <rsys/dynamic_array.h> + struct build_tree_context { const struct htrdr_sky* sky; double vxsz[3]; @@ -1009,8 +1019,9 @@ setup_cloud_grid /* Compute the overall number of voxels in the htrdr_grid */ ncells = definition[0] * definition[1] * definition[2]; - fprintf(stderr, "Generating cloud grid %lu.%lu: %3u%%", iband, iquad, 0); - fflush(stderr); + htrdr_fprintf(sky->htrdr, stderr, + "Generating cloud grid %lu.%lu: %3u%%", iband, iquad, 0); + htrdr_fflush(sky->htrdr, stderr); omp_set_num_threads((int)sky->htrdr->nthreads); #pragma omp parallel for @@ -1033,12 +1044,13 @@ setup_cloud_grid #pragma omp critical if(pcent > progress) { progress = pcent; - fprintf(stderr, "%c[2K\rGenerating cloud grid %lu.%lu: %3u%%", + htrdr_fprintf(sky->htrdr, stderr, + "%c[2K\rGenerating cloud grid %lu.%lu: %3u%%", 27, iband, iquad, (unsigned)pcent); - fflush(stderr); + htrdr_fflush(sky->htrdr, stderr); } } - fprintf(stderr, "\n"); + htrdr_fprintf(sky->htrdr, stderr, "\n"); exit: *out_grid = grid; @@ -1063,16 +1075,20 @@ setup_clouds const double optical_thickness_threshold, const int force_cache_update) { - struct svx_voxel_desc vox_desc = SVX_VOXEL_DESC_NULL; - struct build_tree_context ctx = BUILD_TREE_CONTEXT_NULL; + struct darray_specdata specdata; size_t nvoxs[3]; + double vxsz[3]; double low[3]; double upp[3]; + int64_t ispecdata; size_t nbands; size_t i; - res_T res = RES_OK; + ATOMIC nbuilt_octrees = 0; + ATOMIC res = RES_OK; ASSERT(sky && sky->sw_bands && optical_thickness_threshold >= 0); + darray_specdata_init(sky->htrdr->allocator, &specdata); + res = htcp_get_desc(sky->htcp, &sky->htcp_desc); if(res != RES_OK) { htrdr_log_err(sky->htrdr, "could not retrieve the HTCP descriptor.\n"); @@ -1126,27 +1142,21 @@ setup_clouds const double upp_z = (double)(iz + 1) * sky->lut_cell_sz + low[2]; darray_split_data_get(&sky->svx2htcp_z)[iz].index = iz2; darray_split_data_get(&sky->svx2htcp_z)[iz].height = upp[2]; - if(upp_z >= upp[2]) upp[2] += sky->htcp_desc.vxsz_z[++iz2]; + if(upp_z >= upp[2] && iz + 1 < nsplits) { + ASSERT(iz2 + 1 < sky->htcp_desc.spatial_definition[2]); + upp[2] += sky->htcp_desc.vxsz_z[++iz2]; + } } ASSERT(eq_eps(upp[2] - low[2], len_z, 1.e-6)); } /* Setup the build context */ - ctx.sky = sky; - ctx.tau_threshold = optical_thickness_threshold; - ctx.vxsz[0] = sky->htcp_desc.upper[0] - sky->htcp_desc.lower[0]; - ctx.vxsz[1] = sky->htcp_desc.upper[1] - sky->htcp_desc.lower[1]; - ctx.vxsz[2] = sky->htcp_desc.upper[2] - sky->htcp_desc.lower[2]; - ctx.vxsz[0] = ctx.vxsz[0] / (double)sky->htcp_desc.spatial_definition[0]; - ctx.vxsz[1] = ctx.vxsz[1] / (double)sky->htcp_desc.spatial_definition[1]; - ctx.vxsz[2] = ctx.vxsz[2] / (double)sky->htcp_desc.spatial_definition[2]; - - /* Setup the voxel descriptor */ - vox_desc.get = cloud_vox_get; - vox_desc.merge = cloud_vox_merge; - vox_desc.challenge_merge = cloud_vox_challenge_merge; - vox_desc.context = &ctx; - vox_desc.size = sizeof(float) * NFLOATS_PER_VOXEL; + vxsz[0] = sky->htcp_desc.upper[0] - sky->htcp_desc.lower[0]; + vxsz[1] = sky->htcp_desc.upper[1] - sky->htcp_desc.lower[1]; + vxsz[2] = sky->htcp_desc.upper[2] - sky->htcp_desc.lower[2]; + vxsz[0] = vxsz[0] / (double)sky->htcp_desc.spatial_definition[0]; + vxsz[1] = vxsz[1] / (double)sky->htcp_desc.spatial_definition[1]; + vxsz[2] = vxsz[2] / (double)sky->htcp_desc.spatial_definition[2]; /* Create as many cloud data structure than considered SW spectral bands */ nbands = htrdr_sky_get_sw_spectral_bands_count(sky); @@ -1158,58 +1168,135 @@ setup_clouds goto error; } + /* Compute how many octree are going to be built */ FOR_EACH(i, 0, nbands) { - size_t iquad; struct htgop_spectral_interval band; - ctx.iband = i + sky->sw_bands_range[0]; + const size_t iband = i + sky->sw_bands_range[0]; + size_t iquad; - HTGOP(get_sw_spectral_interval(sky->htgop, ctx.iband, &band)); + HTGOP(get_sw_spectral_interval(sky->htgop, iband, &band)); sky->clouds[i] = MEM_CALLOC(sky->htrdr->allocator, band.quadrature_length, sizeof(*sky->clouds[i])); if(!sky->clouds[i]) { htrdr_log_err(sky->htrdr, "could not create the list of per quadrature point cloud data " - "for the band %lu.\n", (unsigned long)ctx.iband); + "for the band %lu.\n", (unsigned long)iband); res = RES_MEM_ERR; goto error; } - /* Build a cloud octree for each quadrature point of the considered - * spectral band */ FOR_EACH(iquad, 0, band.quadrature_length) { - ctx.quadrature_range[0] = iquad; - ctx.quadrature_range[1] = iquad; - - /* Compute grid of voxels data */ - res = setup_cloud_grid(sky, nvoxs, ctx.iband, iquad, htcp_filename, - htgop_filename, htmie_filename, force_cache_update, &ctx.grid); - if(res != RES_OK) goto error; - - /* Create the octree */ - res = svx_octree_create(sky->htrdr->svx, low, upp, nvoxs, &vox_desc, - &sky->clouds[i][iquad].octree); + struct spectral_data spectral_data; + spectral_data.iband = iband; + spectral_data.iquad = iquad; + res = darray_specdata_push_back(&specdata, &spectral_data); if(res != RES_OK) { - htrdr_log_err(sky->htrdr, "could not create the octree of the cloud " - "properties for the band %lu.\n", (unsigned long)ctx.iband); + htrdr_log_err(sky->htrdr, + "could not register the quadrature point %lu of the spectral band " + "%lu .\n", (unsigned long)iband, (unsigned long)iquad); goto error; } + } + } - /* Fetch the octree descriptor for future use */ - SVX(tree_get_desc - (sky->clouds[i][iquad].octree, &sky->clouds[i][iquad].octree_desc)); + /* Make the generation of the octrees parallel or sequential whether the + * cache is disabled or not respectively: when the cache is enabled, we + * parallelize the generation of the cached grids, not the building of the + * octrees. */ + if(sky->htrdr->cache_grids) { + omp_set_num_threads(1); + } else { + omp_set_num_threads((int)sky->htrdr->nthreads); + if(sky->htrdr->mpi_rank == 0) { + fetch_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE); + print_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE); + } + } + #pragma omp parallel for schedule(dynamic, 1/*chunksize*/) + for(ispecdata=0; + (size_t)ispecdata<darray_specdata_size_get(&specdata); + ++ispecdata) { + struct svx_voxel_desc vox_desc = SVX_VOXEL_DESC_NULL; + struct build_tree_context ctx = BUILD_TREE_CONTEXT_NULL; + const size_t iband = darray_specdata_data_get(&specdata)[ispecdata].iband; + const size_t iquad = darray_specdata_data_get(&specdata)[ispecdata].iquad; + const size_t id = iband - sky->sw_bands_range[0]; + int32_t pcent; + size_t n; + res_T res_local = RES_OK; + + if(ATOMIC_GET(&res) != RES_OK) continue; + + /* Setup the build context */ + ctx.sky = sky; + ctx.vxsz[0] = vxsz[0]; + ctx.vxsz[1] = vxsz[1]; + ctx.vxsz[2] = vxsz[2]; + ctx.tau_threshold = optical_thickness_threshold; + ctx.iband = iband; + ctx.quadrature_range[0] = iquad; + ctx.quadrature_range[1] = iquad; + + /* Setup the voxel descriptor */ + vox_desc.get = cloud_vox_get; + vox_desc.get = cloud_vox_get; + vox_desc.merge = cloud_vox_merge; + vox_desc.challenge_merge = cloud_vox_challenge_merge; + vox_desc.context = &ctx; + vox_desc.size = sizeof(float) * NFLOATS_PER_VOXEL; + + /* Compute grid of voxels data */ + if(sky->htrdr->cache_grids) { + res_local = setup_cloud_grid(sky, nvoxs, iband, iquad, htcp_filename, + htgop_filename, htmie_filename, force_cache_update, &ctx.grid); + if(res_local != RES_OK) continue; + } - if(ctx.grid) { - htrdr_grid_ref_put(ctx.grid); - ctx.grid = NULL; + /* Create the octree */ + res_local = svx_octree_create(sky->htrdr->svx, low, upp, nvoxs, &vox_desc, + &sky->clouds[id][iquad].octree); + if(ctx.grid) htrdr_grid_ref_put(ctx.grid); + if(res_local != RES_OK) { + htrdr_log_err(sky->htrdr, "could not create the octree of the cloud " + "properties for the band %lu.\n", (unsigned long)ctx.iband); + ATOMIC_SET(&res, res_local); + continue; + } + + /* Fetch the octree descriptor for future use */ + SVX(tree_get_desc + (sky->clouds[id][iquad].octree, &sky->clouds[id][iquad].octree_desc)); + + if(!sky->htrdr->cache_grids) { + /* Update the progress message */ + n = (size_t)ATOMIC_INCR(&nbuilt_octrees); + pcent = (int32_t)(n * 100 / darray_specdata_size_get(&specdata)); + + #pragma omp critical + if(pcent > sky->htrdr->mpi_progress_octree[0]) { + sky->htrdr->mpi_progress_octree[0] = pcent; + if(sky->htrdr->mpi_rank == 0) { + update_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE); + } else { /* Send the progress percentage to the master process */ + send_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE, pcent); + } } } } + if(!sky->htrdr->cache_grids && sky->htrdr->mpi_rank == 0) { + while(total_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE) != 100) { + update_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE); + sleep(1); + } + fprintf(stderr, "\n"); /* Add a new line after the progress statuses */ + } + exit: - return res; + darray_specdata_release(&specdata); + return (res_T)res; error: - if(ctx.grid) htrdr_grid_ref_put(ctx.grid); clean_clouds(sky); darray_split_clear(&sky->svx2htcp_z); goto exit; diff --git a/src/htrdr_slab.c b/src/htrdr_slab.c @@ -41,7 +41,7 @@ htrdr_slab_trace_ray double cell_sz[3]; /* Size of a cell */ double t_max[3], t_delta[2], t_min_z; size_t istep; - int xy[2]; /* 2D index of the repeated cell */ + int64_t xy[2]; /* 2D index of the repeated cell */ int incr[2]; /* Index increment */ res_T res = RES_OK; ASSERT(htrdr && org && dir && range && cell_low && cell_upp && trace_cell); @@ -64,8 +64,8 @@ htrdr_slab_trace_ray * non duplicated cell */ pos[0] = org[0] + t_min_z*dir[0]; pos[1] = org[1] + t_min_z*dir[1]; - xy[0] = (int)floor((pos[0] - cell_low[0]) / cell_sz[0]); - xy[1] = (int)floor((pos[1] - cell_low[1]) / cell_sz[1]); + xy[0] = (int64_t)floor((pos[0] - cell_low[0]) / cell_sz[0]); + xy[1] = (int64_t)floor((pos[1] - cell_low[1]) / cell_sz[1]); /* Define the 2D index increment wrt dir sign */ incr[0] = dir[0] < 0 ? -1 : 1; diff --git a/src/htrdr_solve.h b/src/htrdr_solve.h @@ -48,8 +48,11 @@ extern LOCAL_SYM res_T htrdr_draw_radiance_sw (struct htrdr* htrdr, const struct htrdr_camera* cam, + const size_t width, /* Image width */ + const size_t height, /* Image height */ const size_t spp, /* #samples per pixel, i.e. #realisations */ - struct htrdr_buffer* buf); /* Buffer of struct htrdr_accum[3] */ + /* Buffer of struct htrdr_accum[3]. May be NULL on on non master processes */ + struct htrdr_buffer* buf); extern LOCAL_SYM int htrdr_ground_filter