stardis-solver

Solve coupled heat transfers
git clone git://git.meso-star.fr/stardis-solver.git
Log | Files | Refs | README | LICENSE

commit 0c3169117baa442269a8855036894c3884a53a9c
parent dc29b53107ce603be7562ded7c873b71a1b20206
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Thu, 14 Dec 2023 15:01:53 +0100

Merge branch 'feature_solve_probe_list' into develop

Diffstat:
M.gitignore | 1+
MMakefile | 1+
Msrc/sdis.c | 173+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Msrc/sdis.h | 40++++++++++++++++++++++++++++++++++++++++
Msrc/sdis_c.h | 33+++++++++++++++++++++++++++++++--
Msrc/sdis_device.c | 12++++++++++++
Msrc/sdis_scene.c | 9++++++++-
Msrc/sdis_solve.c | 14++++++++++++++
Msrc/sdis_solve_camera.c | 2+-
Msrc/sdis_solve_probe_Xd.h | 381++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/test_sdis_device.c | 8++++++++
Msrc/test_sdis_scene.c | 6++++++
Msrc/test_sdis_solve_probe_list.c | 378++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
13 files changed, 998 insertions(+), 60 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -9,5 +9,6 @@ test* .config .config_test .test +rng_state tags *.pc diff --git a/Makefile b/Makefile @@ -255,6 +255,7 @@ test_all: test clean_test: @$(SHELL) make.sh clean_test $(TEST_SRC) $(TEST_SRC_MPI) $(TEST_SRC_LONG) + rm -f rng_state ################################################################################ # Regular tests diff --git a/src/sdis.c b/src/sdis.c @@ -453,36 +453,54 @@ free_process_progress(struct sdis_device* dev, int32_t progress[]) } size_t -compute_process_realisations_count +compute_process_index_range (const struct sdis_device* dev, - const size_t nrealisations) + const size_t nindices, + size_t range[2]) { #ifndef SDIS_ENABLE_MPI - (void)dev, (void)nrealisations; - return nrealisations; + (void)dev; + range[0] = 0; + range[1] = nindices; /* Upper bound is _exclusive_ */ #else - size_t per_process_nrealisations = 0; - size_t remaining_nrealisations = 0; ASSERT(dev); - if(!dev->use_mpi) return nrealisations; - - /* Compute minimum the number of realisations on each process */ - per_process_nrealisations = nrealisations / (size_t)dev->mpi_nprocs; - - /* Define the remaining number of realisations that are not handle by one - * process */ - remaining_nrealisations = - nrealisations - - per_process_nrealisations * (size_t)dev->mpi_nprocs; - - /* Distribute the remaining realisations onto the processes */ - if((size_t)dev->mpi_rank >= remaining_nrealisations) { - return per_process_nrealisations; + if(!dev->use_mpi) { + range[0] = 0; + range[1] = nindices; } else { - return per_process_nrealisations + 1; + size_t per_process_indices = 0; + size_t remaining_indices = 0; + + /* Compute the minimum number of indices on each process */ + per_process_indices = nindices / (size_t)dev->mpi_nprocs; + + range[0] = per_process_indices * (size_t)dev->mpi_rank; + range[1] = range[0] + per_process_indices; /* Upper bound is _exclusive */ + ASSERT(range[0] <= range[1]); + + /* Set the remaining number of indexes that are not managed by one process */ + remaining_indices = + nindices - per_process_indices * (size_t)dev->mpi_nprocs; + + /* Distribute the remaining indices among the processes. Each process whose + * rank is lower than the number of remaining indices takes an additional + * index. To ensure continuity of indices per process, subsequent processes + * shift their initial rank accordingly, i.e. process 1 shifts its indices + * by 1, process 2 shifts them by 2 and so on until there are no more + * indices to distribute. From then on, subsequent processes simply shift + * their index range by the number of remaining indices that have been + * distributed. */ + if((size_t)dev->mpi_rank < remaining_indices) { + range[0] += (size_t)dev->mpi_rank; + range[1] += (size_t)dev->mpi_rank + 1/* Take one more index */; + } else { + range[0] += remaining_indices; + range[1] += remaining_indices; + } } #endif + return range[1] - range[0]; } #ifndef SDIS_ENABLE_MPI @@ -568,6 +586,119 @@ error: #ifndef SDIS_ENABLE_MPI res_T +gather_accumulators_list + (struct sdis_device* dev, + const enum mpi_sdis_message msg, + const size_t nprobes, /* Total number of probes */ + const size_t process_probes[2], /* Ids of the probes managed by the process */ + struct accum* per_probe_acc) /* List of per probe accumulators */ +{ + (void)dev, (void)msg, (void) nprobes; + (void)process_probes, (void)per_probe_acc; + return RES_OK; +} +#else +res_T +gather_accumulators_list + (struct sdis_device* dev, + const enum mpi_sdis_message msg, + const size_t nprobes, /* Total number of probes */ + const size_t process_probes[2], /* Range of probes managed by the process */ + struct accum* per_probe_acc) /* List of per probe accumulators */ +{ + struct accum_list { + size_t size; + /* Simulate a C99 flexible array */ + ALIGN(16) struct accum accums[1/*Dummy element*/]; + }* accum_list = NULL; + size_t max_per_process_nprobes = 0; /* Maximum #probes per process */ + size_t process_nprobes = 0; /* Number of process probes */ + size_t msg_sz = 0; /* Size in bytes of the message to send */ + res_T res = RES_OK; + + /* Check pre-conditions */ + ASSERT(dev); + ASSERT(process_nprobes == 0 || (process_probes && per_probe_acc)); + + /* Without MPI, do nothing since per_probe_acc already has all the + * accumulators */ + if(!dev->use_mpi) goto exit; + + /* Defines the maximum number of probes managed by a process. In fact, it's + * the number of probes divided by the number of processes, plus one to manage + * the remainder of the entire division: the remaining probes are distributed + * between the processes */ + max_per_process_nprobes = nprobes/(size_t)dev->mpi_nprocs + 1; + + /* Number of probes */ + process_nprobes = process_probes[1] - process_probes[0]; + + /* Allocate the array into which the data to be collected is copied */ + msg_sz = + sizeof(struct accum_list) + + sizeof(struct accum)*max_per_process_nprobes + - 1/*Dummy element */; + if(msg_sz > INT_MAX) { + log_err(dev, "%s: invalid MPI message size %lu.\n", + FUNC_NAME, (unsigned long)msg_sz); + res = RES_BAD_ARG; + goto error; + } + + accum_list = MEM_CALLOC(dev->allocator, 1, msg_sz); + if(!accum_list) { + log_err(dev, + "%s: unable to allocate the temporary list of accumulators.\n", + FUNC_NAME); + res = RES_MEM_ERR; + goto error; + } + + /* Non master process */ + if(dev->mpi_rank != 0) { + + /* Setup the message to be sent */ + accum_list->size = process_nprobes; + memcpy(accum_list->accums, per_probe_acc, + sizeof(struct accum)*process_nprobes); + + mutex_lock(dev->mpi_mutex); + MPI(Send(accum_list, (int)msg_sz, MPI_CHAR, 0/*Dst*/, msg, MPI_COMM_WORLD)); + mutex_unlock(dev->mpi_mutex); + + /* Master process */ + } else { + size_t gathered_nprobes = process_nprobes; + int iproc; + + FOR_EACH(iproc, 1, dev->mpi_nprocs) { + MPI_Request req; + + /* Asynchronously receive the accumulator of `iproc' */ + mutex_lock(dev->mpi_mutex); + MPI(Irecv + (accum_list, (int)msg_sz, MPI_CHAR, iproc, msg, MPI_COMM_WORLD, &req)); + mutex_unlock(dev->mpi_mutex); + + mpi_waiting_for_request(dev, &req); + + memcpy(per_probe_acc+gathered_nprobes, accum_list->accums, + sizeof(struct accum)*accum_list->size); + + gathered_nprobes += accum_list->size; + } + } + +exit: + if(accum_list) MEM_RM(dev->allocator, accum_list); + return res; +error: + goto exit; +} +#endif + +#ifndef SDIS_ENABLE_MPI +res_T gather_green_functions (struct sdis_scene* scn, struct ssp_rng_proxy* rng_proxy, diff --git a/src/sdis.h b/src/sdis.h @@ -466,6 +466,24 @@ struct sdis_solve_probe_args { static const struct sdis_solve_probe_args SDIS_SOLVE_PROBE_ARGS_DEFAULT = SDIS_SOLVE_PROBE_ARGS_DEFAULT__; +struct sdis_solve_probe_list_args { + struct sdis_solve_probe_args* probes; /* List of probes to compute */ + size_t nprobes; /* Total number of probes */ + + /* State/type of the RNG to use for the list of probes to calculate. + * The state/type defines per probe is ignored */ + struct ssp_rng* rng_state; /* Initial RNG state. May be NULL */ + enum ssp_rng_type rng_type; /* RNG type to use if `rng_state' is NULL */ +}; +#define SDIS_SOLVE_PROBE_LIST_ARGS_DEFAULT__ { \ + NULL, /* List of probes */ \ + 0, /* #probes */ \ + NULL, /* RNG state */ \ + SSP_RNG_THREEFRY /* RNG type */ \ +} +static const struct sdis_solve_probe_list_args +SDIS_SOLVE_PROBE_LIST_ARGS_DEFAULT = SDIS_SOLVE_PROBE_LIST_ARGS_DEFAULT__; + /* Arguments of a probe simulation */ struct sdis_solve_probe_boundary_args { size_t nrealisations; /* #realisations */ @@ -712,6 +730,11 @@ sdis_device_ref_put (struct sdis_device* dev); SDIS_API res_T +sdis_device_is_mpi_used + (struct sdis_device* dev, + int* is_mpi_used); + +SDIS_API res_T sdis_device_get_mpi_rank (struct sdis_device* dev, int* rank); @@ -1089,6 +1112,11 @@ sdis_scene_get_medium_spread const struct sdis_medium* mdm, double* spread); +SDIS_API res_T +sdis_scene_get_device + (struct sdis_scene* scn, + struct sdis_device** device); + /******************************************************************************* * An estimator stores the state of a simulation ******************************************************************************/ @@ -1330,6 +1358,18 @@ sdis_solve_probe const struct sdis_solve_probe_args* args, struct sdis_estimator** estimator); +/* Calculate temperature for a list of probe points. Unlike its + * single-probe counterpart, this function parallelizes the list of + * probes, rather than calculating a single probe. Calling this function + * is therefore more advantageous in terms of load distribution when the + * number of probe points to be evaluated is large compared to the cost + * of calculating a single probe point. */ +SDIS_API res_T +sdis_solve_probe_list + (struct sdis_scene* scn, + const struct sdis_solve_probe_list_args* args, + struct sdis_estimator_buffer** buf); + SDIS_API res_T sdis_solve_probe_boundary (struct sdis_scene* scn, diff --git a/src/sdis_c.h b/src/sdis_c.h @@ -85,11 +85,25 @@ free_process_progress (struct sdis_device* dev, int32_t progress[]); -/* Compute the number of realisations for the current process */ +/* Calculate the index range of the current process. It returns the size of the + * range. The overall_count is the number of calculations to parallelize between + * processes. For example, it may be the number of realisations of one + * calculation, or the total number of probe calculations. */ extern LOCAL_SYM size_t +compute_process_index_range + (const struct sdis_device* dev, + const size_t overall_count, + size_t range[2]); /* [lower, upper[ */ + +/* Return the number of realisations for the current process */ +static INLINE size_t compute_process_realisations_count (const struct sdis_device* dev, - const size_t overall_realisations_count); + const size_t overall_realisations_count) +{ + size_t range[2]; + return compute_process_index_range(dev, overall_realisations_count, range); +} /* Gather the accumulators and sum them in acc. With MPI, non master processes * store in acc the gathering of their per thread accumulators that are sent to @@ -102,6 +116,21 @@ gather_accumulators const struct accum* per_thread_acc, struct accum* acc); +/* Collect accumulators evaluated over multiple processes, with each accumulator + * storing a complete Monte Carlo calculation. Without MPI, nothing happens + * since the per_probe_acc variable already stores the entire list of + * accumulators. With MPI, non-master processes send their list of accumulators + * to the master process which saves them in the per_probe_acc, after its + * accumulators that it has managed, sorted against the identifiers of the + * probes listed in process_probes. */ +extern LOCAL_SYM res_T +gather_accumulators_list + (struct sdis_device* dev, + const enum mpi_sdis_message msg, + const size_t nprobes, /* Total number of probes */ + const size_t process_probes[2], /* Ids of the probes managed by the process */ + struct accum* per_probe_acc); /* List of per probe accumulators */ + /* Gather the green functions. With MPI, non master processes store in green * the gathering of their per thread green functions and sent the result to the * master process. The master process gathers both per thread green functions diff --git a/src/sdis_device.c b/src/sdis_device.c @@ -373,6 +373,18 @@ sdis_device_ref_put(struct sdis_device* dev) } res_T +sdis_device_is_mpi_used(struct sdis_device* dev, int* is_mpi_used) +{ + if(!dev || !is_mpi_used) return RES_BAD_ARG; +#ifndef SDIS_ENABLE_MPI + *is_mpi_used = 0; +#else + *is_mpi_used = dev->use_mpi; +#endif + return RES_OK; +} + +res_T sdis_device_get_mpi_rank(struct sdis_device* dev, int* rank) { #ifndef SDIS_ENABLE_MPI diff --git a/src/sdis_scene.c b/src/sdis_scene.c @@ -410,6 +410,14 @@ error: goto exit; } +res_T +sdis_scene_get_device(struct sdis_scene* scn, struct sdis_device** device) +{ + if(!scn || !device) return RES_BAD_ARG; + *device = scn->dev; + return RES_OK; +} + /******************************************************************************* * Local miscellaneous function ******************************************************************************/ @@ -556,4 +564,3 @@ exit: error: goto exit; } - diff --git a/src/sdis_solve.c b/src/sdis_solve.c @@ -57,6 +57,20 @@ sdis_solve_probe } res_T +sdis_solve_probe_list + (struct sdis_scene* scn, + const struct sdis_solve_probe_list_args args[], + struct sdis_estimator_buffer** out_buf) +{ + if(!scn) return RES_BAD_ARG; + if(scene_is_2d(scn)) { + return solve_probe_list_2d(scn, args, out_buf); + } else { + return solve_probe_list_3d(scn, args, out_buf); + } +} + +res_T sdis_solve_probe_green_function (struct sdis_scene* scn, const struct sdis_solve_probe_args* args, diff --git a/src/sdis_solve_camera.c b/src/sdis_solve_camera.c @@ -495,7 +495,7 @@ sdis_solve_camera char buffer[128]; /* Temporary buffer used to store formated time */ /* Stardis variables */ - struct sdis_estimator_buffer* buf= NULL; + struct sdis_estimator_buffer* buf = NULL; struct sdis_medium* medium = NULL; /* Random number generators */ diff --git a/src/sdis_solve_probe_Xd.h b/src/sdis_solve_probe_Xd.h @@ -16,6 +16,7 @@ #include "sdis_c.h" #include "sdis_device_c.h" #include "sdis_estimator_c.h" +#include "sdis_estimator_buffer_c.h" #include "sdis_log.h" #include "sdis_green.h" #include "sdis_misc.h" @@ -66,8 +67,183 @@ check_solve_probe_args(const struct sdis_solve_probe_args* args) return RES_OK; } +static INLINE res_T +check_solve_probe_list_args + (struct sdis_device* dev, + const struct sdis_solve_probe_list_args* args) +{ + size_t iprobe = 0; + + if(!args) return RES_BAD_ARG; + + /* Check the list of probes */ + if(!args->probes || !args->nprobes) { + return RES_BAD_ARG; + } + + /* Check the RNG type */ + if(!args->rng_state && args->rng_type >= SSP_RNG_TYPES_COUNT__) { + return RES_BAD_ARG; + } + + FOR_EACH(iprobe, 0, args->nprobes) { + const res_T res = check_solve_probe_args(args->probes+iprobe); + if(res != RES_OK) return res; + + if(args->probes[iprobe].register_paths != SDIS_HEAT_PATH_NONE) { + log_warn(dev, + "Unable to save paths for probe %lu. " + "Saving path is not supported when solving multiple probes\n", + (unsigned long)iprobe); + } + } + + return RES_OK; +} + +static res_T +setup_estimator_buffer + (struct sdis_device* dev, + struct ssp_rng_proxy* rng_proxy, + const struct sdis_solve_probe_list_args* solve_args, + const struct accum* per_probe_acc_temp, + const struct accum* per_probe_acc_time, + struct sdis_estimator_buffer** out_estim_buffer) +{ + /* Accumulators throughout the buffer */ + struct accum acc_temp = ACCUM_NULL; + struct accum acc_time = ACCUM_NULL; + size_t nrealisations = 0; + + struct sdis_estimator_buffer* estim_buf = NULL; + size_t iprobe = 0; + res_T res = RES_OK; + + ASSERT(dev && rng_proxy && solve_args); + ASSERT(per_probe_acc_time && per_probe_acc_time && out_estim_buffer); + + res = estimator_buffer_create(dev, solve_args->nprobes, 1, &estim_buf); + if(res != RES_OK) { + log_err(dev, "Unable to allocate the estimator buffer.\n"); + goto error; + } + + FOR_EACH(iprobe, 0, solve_args->nprobes) { + const struct sdis_solve_probe_args* probe = NULL; + const struct accum* probe_acc_temp = NULL; + const struct accum* probe_acc_time = NULL; + struct sdis_estimator* estim = NULL; + + /* Get probe data */ + probe = solve_args->probes + iprobe; + probe_acc_temp = per_probe_acc_temp + iprobe; + probe_acc_time = per_probe_acc_time + iprobe; + ASSERT(probe_acc_temp->count == probe_acc_time->count); + + /* Setup probe estimator */ + estim = estimator_buffer_grab(estim_buf, iprobe, 0); + estimator_setup_realisations_count + (estim, probe->nrealisations, probe_acc_temp->count); + estimator_setup_temperature + (estim, probe_acc_temp->sum, probe_acc_temp->sum2); + estimator_setup_realisation_time + (estim, probe_acc_time->sum, probe_acc_time->sum2); + + /* Update global accumulators */ + acc_temp.sum += probe_acc_temp->sum; + acc_temp.sum2 += probe_acc_temp->sum2; + acc_temp.count += probe_acc_temp->count; + acc_time.sum += probe_acc_time->sum; + acc_time.sum2 += probe_acc_time->sum2; + acc_time.count += probe_acc_time->count; + nrealisations += probe->nrealisations; + } + + ASSERT(acc_temp.count == acc_time.count); + + /* Setup global estimator */ + estimator_buffer_setup_realisations_count + (estim_buf, nrealisations, acc_temp.count); + estimator_buffer_setup_temperature + (estim_buf, acc_temp.sum, acc_temp.sum2); + estimator_buffer_setup_realisation_time + (estim_buf, acc_time.sum, acc_time.sum2); + + res = estimator_buffer_save_rng_state(estim_buf, rng_proxy); + if(res != RES_OK) goto error; + +exit: + *out_estim_buffer = estim_buf; + return res; +error: + goto exit; +} + #endif /* SDIS_SOLVE_PROBE_XD_H */ +static res_T +XD(solve_one_probe) + (struct sdis_scene* scn, + struct ssp_rng* rng, + const struct sdis_solve_probe_args* args, + struct accum* acc_temp, + struct accum* acc_time) +{ + struct sdis_medium* medium = NULL; /* Medium in which the probe lies */ + size_t irealisation = 0; + res_T res = RES_OK; + ASSERT(scn && rng && check_solve_probe_args(args) == RES_OK); + ASSERT(acc_temp && acc_time); + + *acc_temp = ACCUM_NULL; + *acc_time = ACCUM_NULL; + + /* Retrieve the medium in which the submitted position lies */ + res = scene_get_medium(scn, args->position, NULL, &medium); + if(res != RES_OK) goto error; + + FOR_EACH(irealisation, 0, args->nrealisations) { + struct probe_realisation_args realis_args = PROBE_REALISATION_ARGS_NULL; + double w = NaN; /* MC weight */ + double usec = 0; /* Time of a realisation */ + double time = 0; /* Sampled observation time */ + struct time t0, t1; /* Register the time spent solving a realisation */ + + /* Begin time registration of the realisation */ + time_current(&t0); + + /* Sample observation time */ + time = sample_time(rng, args->time_range); + + /* Run a realisation */ + realis_args.rng = rng; + realis_args.medium = medium; + realis_args.time = time; + realis_args.picard_order = args->picard_order; + realis_args.irealisation = irealisation; + dX(set)(realis_args.position, args->position); + res = XD(probe_realisation)(scn, &realis_args, &w); + if(res != RES_OK) goto error; + + /* Stop time registration */ + time_sub(&t0, time_current(&t1), &t0); + usec = (double)time_val(&t0, TIME_NSEC) * 0.001; + + /* Update MC weights */ + acc_temp->sum += w; + acc_temp->sum2 += w*w; + acc_temp->count += 1; + acc_time->sum += usec; + acc_time->sum2 += usec*usec; + acc_time->count += 1; + } + +exit: + return res; +error: + goto exit; +} + /******************************************************************************* * Generic solve function ******************************************************************************/ @@ -316,7 +492,7 @@ XD(solve_probe) if(res != RES_OK) goto error; \ } (void)0 GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_TEMP, acc_temp); - GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_TEMP, acc_time); + GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_TIME, acc_time); #undef GATHER_ACCUMS time_sub(&time0, time_current(&time1), &time0); @@ -369,5 +545,206 @@ error: goto exit; } -#include "sdis_Xd_end.h" +static res_T +XD(solve_probe_list) + (struct sdis_scene* scn, + const struct sdis_solve_probe_list_args* args, + struct sdis_estimator_buffer** out_estim_buf) +{ + /* Time registration */ + struct time time0, time1; + char buf[128]; /* Temporary buffer used to store formated time */ + + /* Device variables */ + struct mem_allocator* allocator = NULL; + + /* Stardis variables */ + struct sdis_estimator_buffer* estim_buf = NULL; + + /* Random Number generator */ + struct ssp_rng_proxy* rng_proxy = NULL; + struct ssp_rng** per_thread_rng = NULL; + + /* Probe variables */ + size_t process_probes[2] = {0, 0}; /* Probes range managed by the process */ + size_t process_nprobes = 0; /* Number of probes managed by the process */ + size_t nprobes = 0; + struct accum* per_probe_acc_temp = NULL; + struct accum* per_probe_acc_time = NULL; + + /* Miscellaneous */ + int32_t* progress = NULL; /* Per process progress bar */ + int pcent_progress = 1; /* Percentage requiring progress update */ + int is_master_process = 0; + int64_t i = 0; + ATOMIC nsolved_probes = 0; + ATOMIC res = RES_OK; + /* Check input arguments */ + if(!scn || !out_estim_buf) { res = RES_BAD_ARG; goto error; } + res = check_solve_probe_list_args(scn->dev, args); + if(res != RES_OK) goto error; + res = XD(scene_check_dimensionality)(scn); + if(res != RES_OK) goto error; + +#ifdef SDIS_ENABLE_MPI + is_master_process = !scn->dev->use_mpi || scn->dev->mpi_rank == 0; +#endif + + allocator = scn->dev->allocator; + + /* Update the progress bar every percent if escape sequences are allowed in + * log messages or only every 10 percent when only plain text is allowed. + * This reduces the number of lines of plain text printed */ + pcent_progress = scn->dev->no_escape_sequence ? 10 : 1; + + /* Create the per thread RNGs */ + res = create_per_thread_rng + (scn->dev, args->rng_state, args->rng_type, &rng_proxy, &per_thread_rng); + if(res != RES_OK) goto error; + + /* Allocate the per process progress status */ + res = alloc_process_progress(scn->dev, &progress); + if(res != RES_OK) goto error; + + /* Synchronise the processes */ + process_barrier(scn->dev); + + /* Define the range of probes to manage in this process */ + process_nprobes = compute_process_index_range + (scn->dev, args->nprobes, process_probes); + + #define PROGRESS_MSG "Solving probes: " + print_progress(scn->dev, progress, PROGRESS_MSG); + + /* If there is no work to be done on this process (i.e. no probe to + * calculate), simply print its completion and go straight to the + * synchronization barrier.*/ + if(process_nprobes == 0) { + progress[0] = 100; + print_progress_update(scn->dev, progress, PROGRESS_MSG); + goto post_sync; + } + + /* Begin time registration of the computation */ + time_current(&time0); + + /* Allocate the list of accumulators per probe. On the master process, + * allocate a complete list in which the accumulators of all processes will be + * stored. */ + nprobes = is_master_process ? args->nprobes : process_nprobes; + per_probe_acc_temp = MEM_CALLOC(allocator, nprobes, sizeof(*per_probe_acc_temp)); + per_probe_acc_time = MEM_CALLOC(allocator, nprobes, sizeof(*per_probe_acc_time)); + if(!per_probe_acc_temp || !per_probe_acc_time) { + log_err(scn->dev, "Unable to allocate the list of accumulators per probe.\n"); + res = RES_MEM_ERR; + goto error; + } + + /* Here we go! Calculation of probe list */ + omp_set_num_threads((int)scn->dev->nthreads); + #pragma omp parallel for schedule(static) + for(i = 0; i < (int64_t)process_nprobes; ++i) { + /* Thread */ + const int ithread = omp_get_thread_num(); /* Thread ID */ + struct ssp_rng* rng = per_thread_rng[ithread]; /* RNG of the thread */ + + /* Probe */ + struct accum* probe_acc_temp = NULL; + struct accum* probe_acc_time = NULL; + const struct sdis_solve_probe_args* probe_args = NULL; /* Solve args */ + const size_t iprobe = process_probes[0] + (size_t)i; /* Probe ID */ + + /* Misc */ + size_t n = 0; /* Number of solved probes */ + int pcent = 0; /* Current progress */ + res_T res_local = RES_OK; + + if(ATOMIC_GET(&res) != RES_OK) continue; /* An error occured */ + + /* Retrieve the probe arguments */ + probe_args = &args->probes[iprobe]; + + /* Retrieve the probe accumulators */ + probe_acc_temp = &per_probe_acc_temp[i]; + probe_acc_time = &per_probe_acc_time[i]; + + res_local = XD(solve_one_probe) + (scn, rng, probe_args, probe_acc_temp, probe_acc_time); + if(res_local != RES_OK) { + ATOMIC_SET(&res, res_local); + continue; + } + + /* Update progress */ + n = (size_t)ATOMIC_INCR(&nsolved_probes); + pcent = (int)((double)n * 100.0 / (double)process_nprobes + 0.5/*round*/); + + #pragma omp critical + if(pcent/pcent_progress > progress[0]/pcent_progress) { + progress[0] = pcent; + print_progress_update(scn->dev, progress, PROGRESS_MSG); + } + } + +post_sync: + /* Synchronise processes */ + process_barrier(scn->dev); + + res = gather_res_T(scn->dev, (res_T)res); + if(res != RES_OK) goto error; + + print_progress_completion(scn->dev, progress, PROGRESS_MSG); + #undef PROGRESS_MSG + + /* Report computation time */ + time_sub(&time0, time_current(&time1), &time0); + time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf)); + log_info(scn->dev, "%lu probes solved in %s.\n", + (unsigned long)args->nprobes, buf); + + /* Gather the RNG proxy sequence IDs and ensure that the RNG proxy state of + * the master process is greater than the RNG proxy state of all other + * processes */ + res = gather_rng_proxy_sequence_id(scn->dev, rng_proxy); + if(res != RES_OK) goto error; + + time_current(&time0); + + /* Gather the list of accumulators */ + #define GATHER_ACCUMS_LIST(Msg, Acc) { \ + res = gather_accumulators_list \ + (scn->dev, Msg, args->nprobes, process_probes, per_probe_acc_##Acc); \ + if(res != RES_OK) goto error; \ + } (void)0 + GATHER_ACCUMS_LIST(MPI_SDIS_MSG_ACCUM_TEMP, temp); + GATHER_ACCUMS_LIST(MPI_SDIS_MSG_ACCUM_TIME, time); + #undef GATHER_ACCUMS_LIST + + time_sub(&time0, time_current(&time1), &time0); + time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf)); + log_info(scn->dev, "Probes accumulator gathered in %s.\n", buf); + + if(is_master_process) { + res = setup_estimator_buffer(scn->dev, rng_proxy, args, per_probe_acc_temp, + per_probe_acc_time, &estim_buf); + if(res != RES_OK) goto error; + } + +exit: + if(per_thread_rng) release_per_thread_rng(scn->dev, per_thread_rng); + if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy)); + if(per_probe_acc_temp) MEM_RM(allocator, per_probe_acc_temp); + if(per_probe_acc_time) MEM_RM(allocator, per_probe_acc_time); + if(progress) free_process_progress(scn->dev, progress); + if(out_estim_buf) *out_estim_buf = estim_buf; + return (res_T)res; +error: + if(estim_buf) { + SDIS(estimator_buffer_ref_put(estim_buf)); + estim_buf = NULL; + } + goto exit; +} + +#include "sdis_Xd_end.h" diff --git a/src/test_sdis_device.c b/src/test_sdis_device.c @@ -37,6 +37,7 @@ main(int argc, char** argv) struct logger logger; struct mem_allocator allocator; struct sdis_device* dev; + int is_mpi_used; #ifdef SDIS_ENABLE_MPI int provided; #endif @@ -84,6 +85,9 @@ main(int argc, char** argv) args.nthreads_hint = SDIS_NTHREADS_DEFAULT; OK(sdis_device_create(&args, &dev)); + BA(sdis_device_is_mpi_used(NULL, &is_mpi_used)); + BA(sdis_device_is_mpi_used(dev, NULL)); + OK(sdis_device_ref_put(dev)); args.use_mpi = 1; @@ -91,10 +95,14 @@ main(int argc, char** argv) #ifndef SDIS_ENABLE_MPI OK(sdis_device_create(&args, &dev)); + OK(sdis_device_is_mpi_used(dev, &is_mpi_used)); + CHK(!is_mpi_used); OK(sdis_device_ref_put(dev)); #else CHK(MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided) == MPI_SUCCESS); OK(sdis_device_create(&args, &dev)); + OK(sdis_device_is_mpi_used(dev, &is_mpi_used)); + CHK(is_mpi_used); CHK(MPI_Finalize() == MPI_SUCCESS); OK(sdis_device_ref_put(dev)); #endif diff --git a/src/test_sdis_scene.c b/src/test_sdis_scene.c @@ -99,6 +99,7 @@ test_scene_3d(struct sdis_device* dev, struct sdis_interface* interf) struct senc3d_scene* scn3d; struct sdis_scene_create_args scn_args = SDIS_SCENE_CREATE_ARGS_DEFAULT; struct sdis_scene* scn = NULL; + struct sdis_device* dev2 = NULL; size_t ntris, npos; size_t iprim; size_t i; @@ -163,6 +164,11 @@ test_scene_3d(struct sdis_device* dev, struct sdis_interface* interf) OK(sdis_scene_get_dimension(scn, &dim)); CHK(dim == SDIS_SCENE_3D); + BA(sdis_scene_get_device(NULL, &dev2)); + BA(sdis_scene_get_device(scn, NULL)); + OK(sdis_scene_get_device(scn, &dev2)); + CHK(dev == dev2); + BA(sdis_scene_get_aabb(NULL, lower, upper)); BA(sdis_scene_get_aabb(scn, NULL, upper)); BA(sdis_scene_get_aabb(scn, lower, NULL)); diff --git a/src/test_sdis_solve_probe_list.c b/src/test_sdis_solve_probe_list.c @@ -16,8 +16,15 @@ #include "sdis.h" #include "test_sdis_utils.h" +#include <rsys/float3.h> + #include <star/s3d.h> #include <star/s3dut.h> +#include <star/ssp.h> + +#ifndef SDIS_ENABLE_MPI + #include <mpi.h> +#endif /* * The system is a trilinear profile of steady state temperature, i.e. at each @@ -44,8 +51,8 @@ static double trilinear_profile(const double pos[3]) { /* Range in X, Y and Z in which the trilinear profile is defined */ - const double lower = -10; - const double upper = +10; + const double lower = -3; + const double upper = +3; /* Upper temperature limit in X, Y and Z [K]. Lower temperature limit is * implicitly 0 */ @@ -67,17 +74,28 @@ trilinear_profile(const double pos[3]) return a*x + b*y + c*z; } -static double -compute_delta(struct s3d_scene_view* view) +static INLINE float +rand_canonic(void) { - float S = 0; /* Surface */ - float V = 0; /* Volume */ - - OK(s3d_scene_view_compute_area(view, &S)); - OK(s3d_scene_view_compute_volume(view, &V)); - CHK(S > 0 && V > 0); + return (float)rand() / (float)((unsigned)RAND_MAX+1); +} - return (4.0*V/S)/20.0; +static INLINE void +check_intersection + (const double val0, + const double eps0, + const double val1, + const double eps1) +{ + double interval0[2], interval1[2]; + double intersection[2]; + interval0[0] = val0 - eps0; + interval0[1] = val0 + eps0; + interval1[0] = val1 - eps1; + interval1[1] = val1 + eps1; + intersection[0] = MMAX(interval0[0], interval1[0]); + intersection[1] = MMIN(interval0[1], interval1[1]); + CHK(intersection[0] <= intersection[1]); } /******************************************************************************* @@ -156,6 +174,66 @@ create_view(struct s3dut_mesh* mesh) return view; } +static double +view_compute_delta(struct s3d_scene_view* view) +{ + float S = 0; /* Surface */ + float V = 0; /* Volume */ + + OK(s3d_scene_view_compute_area(view, &S)); + OK(s3d_scene_view_compute_volume(view, &V)); + CHK(S > 0 && V > 0); + + return (4.0*V/S)/30.0; +} + +static void +view_sample_position + (struct s3d_scene_view* view, + const double delta, + double pos[3]) +{ + /* Ray variables */ + const float dir[3] = {0, 1, 0}; + const float range[2] = {0, FLT_MAX}; + float org[3]; + + /* View variables */ + float low[3]; + float upp[3]; + + OK(s3d_scene_view_get_aabb(view, low, upp)); + + /* Sample a position in the supershape by uniformly sampling a position within + * its bounding box and rejecting it if it is not in the supershape or if it + * is too close to its boundaries. */ + for(;;) { + struct s3d_hit hit = S3D_HIT_NULL; + float N[3] = {0, 0, 0}; /* Normal */ + + pos[0] = low[0] + rand_canonic()*(upp[0] - low[0]); + pos[1] = low[1] + rand_canonic()*(upp[1] - low[1]); + pos[2] = low[2] + rand_canonic()*(upp[2] - low[2]); + + org[0] = (float)pos[0]; + org[1] = (float)pos[1]; + org[2] = (float)pos[2]; + OK(s3d_scene_view_trace_ray(view, org, dir, range, NULL, &hit)); + + if(S3D_HIT_NONE(&hit)) continue; + + f3_normalize(N, hit.normal); + if(f3_dot(N, dir) > 0) continue; + + OK(s3d_scene_view_closest_point(view, org, (float)INF, NULL, &hit)); + CHK(!S3D_HIT_NONE(&hit)); + + /* Sample position is in the super shape and is not too close to its + * boundaries */ + if(hit.distance > delta) break; + } +} + /******************************************************************************* * Scene, i.e. the system to simulate ******************************************************************************/ @@ -251,11 +329,11 @@ create_solid(struct sdis_device* sdis, struct s3d_scene_view* view) struct sdis_solid_shader shader = SDIS_SOLID_SHADER_NULL; struct sdis_medium* solid = NULL; struct sdis_data* data = NULL; - double* delta = NULL; + double* pdelta = NULL; OK(sdis_data_create(sdis, sizeof(double), ALIGNOF(double), NULL, &data)); - delta = sdis_data_get(data); - *delta = compute_delta(view); + pdelta = sdis_data_get(data); + *pdelta = view_compute_delta(view); shader.calorific_capacity = solid_get_calorific_capacity; shader.thermal_conductivity = solid_get_thermal_conductivity; @@ -263,6 +341,8 @@ create_solid(struct sdis_device* sdis, struct s3d_scene_view* view) shader.delta = solid_get_delta; shader.temperature = solid_get_temperature; OK(sdis_solid_create(sdis, &shader, data, &solid)); + + OK(sdis_data_ref_put(data)); return solid; } @@ -315,26 +395,254 @@ create_interface * Validations ******************************************************************************/ static void -check_probe(struct sdis_scene* scn) +check_probe_list_api + (struct sdis_scene* scn, + const struct sdis_solve_probe_list_args* in_args) +{ + struct sdis_solve_probe_list_args args = *in_args; + struct sdis_estimator_buffer* estim_buf = NULL; + + /* Check API */ + BA(sdis_solve_probe_list(NULL, &args, &estim_buf)); + BA(sdis_solve_probe_list(scn, NULL, &estim_buf)); + BA(sdis_solve_probe_list(scn, &args, NULL)); + args.nprobes = 0; + BA(sdis_solve_probe_list(scn, &args, &estim_buf)); + args.nprobes = in_args->nprobes; + args.probes = NULL; + BA(sdis_solve_probe_list(scn, &args, &estim_buf)); +} + +/* Check the estimators against the analytical solution */ +static void +check_estimator_buffer + (const struct sdis_estimator_buffer* estim_buf, + const struct sdis_solve_probe_list_args* args) { - struct sdis_solve_probe_args args = SDIS_SOLVE_PROBE_ARGS_DEFAULT; struct sdis_mc T = SDIS_MC_NULL; - struct sdis_estimator* estimator = NULL; - double ref = 0; /* Analytical reference */ - - args.nrealisations = 100000; - args.position[0] = 0; - args.position[1] = 0; - args.position[2] = 0; - OK(sdis_solve_probe(scn, &args, &estimator)); - OK(sdis_estimator_get_temperature(estimator, &T)); - - ref = trilinear_profile(args.position); - printf("T(%g, %g, %g) = %g ~ %g +/- %g\n", - SPLIT3(args.position), ref, T.E, T.SE); - CHK(eq_eps(ref, T.E, 3*T.SE)); - - OK(sdis_estimator_ref_put(estimator)); + size_t iprobe = 0; + + /* Variables used to check the global estimation results */ + size_t total_nrealisations = 0; + size_t total_nrealisations_ref = 0; + size_t total_nfailures = 0; + size_t total_nfailures_ref = 0; + double sum = 0; /* Global sum of weights */ + double sum2 = 0; /* Global sum of squared weights */ + double N = 0; /* Number of (successful) realisations */ + double E = 0; /* Expected value */ + double V = 0; /* Variance */ + double SE = 0; /* Standard Error */ + + /* Check the results */ + FOR_EACH(iprobe, 0, args->nprobes) { + const struct sdis_estimator* estimator = NULL; + size_t probe_nrealisations = 0; + size_t probe_nfailures = 0; + double probe_sum = 0; + double probe_sum2 = 0; + double ref = 0; + + /* Fetch result */ + OK(sdis_estimator_buffer_at(estim_buf, iprobe, 0, &estimator)); + OK(sdis_estimator_get_temperature(estimator, &T)); + OK(sdis_estimator_get_realisation_count(estimator, &probe_nrealisations)); + OK(sdis_estimator_get_failure_count(estimator, &probe_nfailures)); + + /* Check probe estimation */ + ref = trilinear_profile(args->probes[iprobe].position); + printf("T(%g, %g, %g) = %g ~ %g +/- %g\n", + SPLIT3(args->probes[iprobe].position), ref, T.E, T.SE); + CHK(eq_eps(ref, T.E, 3*T.SE)); + + /* Check miscellaneous results */ + CHK(probe_nrealisations == args->probes[iprobe].nrealisations); + + /* Compute the sum of weights and sum of squared weights of the probe */ + probe_sum = T.E * (double)probe_nrealisations; + probe_sum2 = (T.V + T.E*T.E) * (double)probe_nrealisations; + + /* Update the global estimation */ + total_nrealisations_ref += probe_nrealisations; + total_nfailures_ref += probe_nfailures; + sum += probe_sum; + sum2 += probe_sum2; + } + + /* Check the overall estimate of the estimate buffer. This estimate is not + * really a result expected by the caller since the probes are all + * independent. But to be consistent with the estimate buffer API, we need to + * provide it. And so we check its validity */ + OK(sdis_estimator_buffer_get_temperature(estim_buf, &T)); + OK(sdis_estimator_buffer_get_realisation_count(estim_buf, &total_nrealisations)); + OK(sdis_estimator_buffer_get_failure_count(estim_buf, &total_nfailures)); + + CHK(total_nrealisations == total_nrealisations_ref); + CHK(total_nfailures == total_nfailures_ref); + + N = (double)total_nrealisations_ref; + E = sum / N; + V = sum2 / N - E*E; + SE = sqrt(V/N); + check_intersection(E, SE, T.E, T.SE); +} + +/* Check that the buffers store statistically independent estimates */ +static void +check_estimator_buffer_combination + (const struct sdis_estimator_buffer* estim_buf1, + const struct sdis_estimator_buffer* estim_buf2, + const struct sdis_solve_probe_list_args* args) +{ + size_t iprobe; + + /* Check that the 2 series of estimates are compatible but not identical */ + FOR_EACH(iprobe, 0, args->nprobes) { + const struct sdis_estimator* estimator1 = NULL; + const struct sdis_estimator* estimator2 = NULL; + size_t nrealisations1 = 0; + size_t nrealisations2 = 0; + struct sdis_mc T1 = SDIS_MC_NULL; + struct sdis_mc T2 = SDIS_MC_NULL; + double sum = 0; /* Sum of weights */ + double sum2 = 0; /* Sum of squared weights */ + double E = 0; /* Expected value */ + double V = 0; /* Variance */ + double SE = 0; /* Standard Error */ + double N = 0; /* Number of realisations */ + double ref = 0; /* Analytical solution */ + + /* Retrieve the estimation results */ + OK(sdis_estimator_buffer_at(estim_buf1, iprobe, 0, &estimator1)); + OK(sdis_estimator_buffer_at(estim_buf2, iprobe, 0, &estimator2)); + OK(sdis_estimator_get_temperature(estimator1, &T1)); + OK(sdis_estimator_get_temperature(estimator2, &T2)); + + /* Estimates are not identical... */ + CHK(T1.E != T2.E); + CHK(T1.SE != T2.SE); + + /* ... but are compatible ... */ + check_intersection(T1.E, 3*T1.SE, T2.E, 3*T2.SE); + + /* We can combine the 2 estimates since they must be numerically + * independent. We can therefore verify that their combination is valid with + * respect to the analytical solution */ + OK(sdis_estimator_get_realisation_count(estimator1, &nrealisations1)); + OK(sdis_estimator_get_realisation_count(estimator2, &nrealisations2)); + + sum = + T1.E*(double)nrealisations1 + + T2.E*(double)nrealisations2; + sum2 = + (T1.V + T1.E*T1.E)*(double)nrealisations1 + + (T2.V + T2.E*T2.E)*(double)nrealisations2; + N = (double)(nrealisations1 + nrealisations2); + E = sum / N; + V = sum2 / N - E*E; + SE = sqrt(V/N); + + ref = trilinear_profile(args->probes[iprobe].position); + printf("T(%g, %g, %g) = %g ~ %g +/- %g\n", + SPLIT3(args->probes[iprobe].position), ref, E, SE); + CHK(eq_eps(ref, E, 3*SE)); + } +} + +static void +check_probe_list + (struct sdis_scene* scn, + struct s3d_scene_view* view, + const int is_master_process) +{ + #define NPROBES 10 + + /* RNG state */ + const char* rng_state_filename = "rng_state"; + struct ssp_rng* rng = NULL; + enum ssp_rng_type rng_type = SSP_RNG_TYPE_NULL; + + /* Probe variables */ + struct sdis_solve_probe_args probes[NPROBES]; + struct sdis_solve_probe_list_args args = SDIS_SOLVE_PROBE_LIST_ARGS_DEFAULT; + size_t iprobe = 0; + + /* Estimations */ + struct sdis_estimator_buffer* estim_buf = NULL; + struct sdis_estimator_buffer* estim_buf2 = NULL; + + /* Misc */ + double delta = 0; + FILE* fp = NULL; + + delta = view_compute_delta(view); + + /* Setup the list of probes to calculate */ + args.probes = probes; + args.nprobes = NPROBES; + FOR_EACH(iprobe, 0, NPROBES) { + probes[iprobe] = SDIS_SOLVE_PROBE_ARGS_DEFAULT; + probes[iprobe].nrealisations = 10000; + view_sample_position(view, delta, probes[iprobe].position); + } + + check_probe_list_api(scn, &args); + + /* Solve the probes */ + OK(sdis_solve_probe_list(scn, &args, &estim_buf)); + + if(!is_master_process) { + CHK(estim_buf == NULL); + } else { + check_estimator_buffer(estim_buf, &args); + + /* Check the use of a non-default rng_state. Run the calculations using the + * final RNG state from the previous estimate. Thus, the estimates are + * statically independent and can be combined. To do this, write the RNG + * state to a file so that it is read by all processes for the next test. */ + OK(sdis_estimator_buffer_get_rng_state(estim_buf, &rng)); + OK(ssp_rng_get_type(rng, &rng_type)); + CHK(fp = fopen(rng_state_filename, "w")); + CHK(fwrite(&rng_type, sizeof(rng_type), 1, fp) == 1); + OK(ssp_rng_write(rng, fp)); + CHK(fclose(fp) == 0); + } + +#ifdef SDIS_ENABLE_MPI + /* Synchronize processes. Wait for the master process to write RNG state to + * be used */ + { + struct sdis_device* sdis = NULL; + int is_mpi_used = 0; + OK(sdis_scene_get_device(scn, &sdis)); + OK(sdis_device_is_mpi_used(sdis, &is_mpi_used)); + if(is_mpi_used) { + CHK(MPI_Barrier(MPI_COMM_WORLD) == MPI_SUCCESS); + } + } +#endif + + /* Read the RNG state */ + CHK(fp = fopen(rng_state_filename, "r")); + CHK(fread(&rng_type, sizeof(rng_type), 1, fp) == 1); + OK(ssp_rng_create(NULL, rng_type, &rng)); + OK(ssp_rng_read(rng, fp)); + CHK(fclose(fp) == 0); + + /* Run a new calculation using the read RNG state */ + args.rng_state = rng; + OK(sdis_solve_probe_list(scn, &args, &estim_buf2)); + OK(ssp_rng_ref_put(rng)); + + if(!is_master_process) { + CHK(estim_buf2 == NULL); + } else { + check_estimator_buffer_combination(estim_buf, estim_buf2, &args); + OK(sdis_estimator_buffer_ref_put(estim_buf)); + OK(sdis_estimator_buffer_ref_put(estim_buf2)); + } + + #undef NPROBES } /******************************************************************************* @@ -367,14 +675,18 @@ main(int argc, char** argv) interf = create_interface(sdis, solid, dummy); scn = create_scene(sdis, super_shape, interf); - check_probe(scn); + check_probe_list(scn, view, is_master_process); OK(s3dut_mesh_ref_put(super_shape)); + OK(s3d_scene_view_ref_put(view)); OK(sdis_medium_ref_put(solid)); OK(sdis_medium_ref_put(dummy)); OK(sdis_interface_ref_put(interf)); OK(sdis_scene_ref_put(scn)); free_default_device(sdis); + + CHK(mem_allocated_size() == 0); + return 0; }