commit 4181cf76fe1b24f83d864084bccf6f9cc5774084
parent b01c461881e959632e9ad29f713b908f2c1ba2a4
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Thu, 16 Dec 2021 16:37:09 +0100
Add and test the MPI support to the function sdis_solve_boundary_flux
Diffstat:
3 files changed, 192 insertions(+), 130 deletions(-)
diff --git a/src/sdis_solve_boundary_Xd.h b/src/sdis_solve_boundary_Xd.h
@@ -37,7 +37,7 @@ static const struct XD(boundary_context) XD(BOUNDARY_CONTEXT_NULL) = {
};
/*******************************************************************************
- * Help functions
+ * Non generic helper functions
******************************************************************************/
#ifndef SDIS_SOLVE_BOUNDARY_XD
#define SDIS_SOLVE_BOUNDARY_XD
@@ -74,8 +74,43 @@ check_solve_boundary_args(const struct sdis_solve_boundary_args* args)
return RES_OK;
}
+static INLINE res_T
+check_solve_boundary_flux_args(const struct sdis_solve_boundary_flux_args* args)
+{
+ if(!args) return RES_BAD_ARG;
+
+ /* Check #realisations */
+ if(!args->nrealisations || args->nrealisations > INT64_MAX) {
+ return RES_BAD_ARG;
+ }
+
+ /* Check the list of primitives */
+ if(!args->primitives || !args->nprimitives) {
+ return RES_BAD_ARG;
+ }
+
+ /* Check time range */
+ if(args->time_range[0] < 0 || args->time_range[1] < args->time_range[0]) {
+ return RES_BAD_ARG;
+ }
+ if(args->time_range[1] > DBL_MAX
+ && args->time_range[0] != args->time_range[1]) {
+ return RES_BAD_ARG;
+ }
+
+ /* Check picard order */
+ if(args->picard_order < 1) {
+ return RES_BAD_ARG;
+ }
+
+ return RES_OK;
+}
+
#endif /* SDIS_SOLVE_BOUNDARY_XD */
+/*******************************************************************************
+ * Helper functions
+ ******************************************************************************/
static INLINE void
XD(boundary_get_indices)(const unsigned iprim, unsigned ids[DIM], void* context)
{
@@ -496,59 +531,59 @@ XD(solve_boundary_flux)
const struct sdis_solve_boundary_flux_args* args,
struct sdis_estimator** out_estimator)
{
+ /* Time registration */
+ struct time time0, time1;
+ char buf[128]; /* Temporary buffer used to store formated time */
+
+ /* Stardis variables */
+ struct sdis_estimator* estimator = NULL;
+
+ /* XD variables */
struct XD(boundary_context) ctx = XD(BOUNDARY_CONTEXT_NULL);
struct sXd(vertex_data) vdata = SXD_VERTEX_DATA_NULL;
struct sXd(scene)* scene = NULL;
struct sXd(shape)* shape = NULL;
struct sXd(scene_view)* view = NULL;
- struct sdis_estimator* estimator = NULL;
+
+ /* Random number generator */
struct ssp_rng_proxy* rng_proxy = NULL;
- struct ssp_rng** rngs = NULL;
- struct accum* acc_tp = NULL; /* Per thread temperature accumulator */
- struct accum* acc_ti = NULL; /* Per thread realisation time accumulator */
- struct accum* acc_fl = NULL; /* Per thread flux accumulator */
- struct accum* acc_fc = NULL; /* Per thread convective flux accumulator */
- struct accum* acc_fr = NULL; /* Per thread radiative flux accumulator */
- struct accum* acc_fi = NULL; /* Per thread imposed flux accumulator */
+ struct ssp_rng** per_thread_rng = NULL;
+
+ /* Per thread accumulators */
+ struct accum* per_thread_acc_tp = NULL; /* Temperature accumulator */
+ struct accum* per_thread_acc_ti = NULL; /* Realisation time accumulator */
+ struct accum* per_thread_acc_fl = NULL; /* Flux accumulator */
+ struct accum* per_thread_acc_fc = NULL; /* Convective flux accumulator */
+ struct accum* per_thread_acc_fr = NULL; /* Radiative flux accumulator */
+ struct accum* per_thread_acc_fi = NULL; /* Imposed flux accumulator */
+
+ /* Gathered accumulators */
+ struct accum acc_tp = ACCUM_NULL;
+ struct accum acc_ti = ACCUM_NULL;
+ struct accum acc_fl = ACCUM_NULL;
+ struct accum acc_fc = ACCUM_NULL;
+ struct accum acc_fr = ACCUM_NULL;
+ struct accum acc_fi = ACCUM_NULL;
+
+ /* Miscellaneous */
size_t nrealisations = 0;
int64_t irealisation;
size_t i;
- size_t view_nprims;
- int progress = 0;
+ int32_t* progress = NULL; /* Per process progress bar */
+ int is_master_process = 1;
ATOMIC nsolved_realisations = 0;
ATOMIC res = RES_OK;
- if(!scn
- || !args
- || !args->nrealisations
- || args->nrealisations > INT64_MAX
- || !args->primitives
- || args->time_range[0] < 0
- || args->time_range[1] < args->time_range[0]
- || (args->time_range[1] > DBL_MAX && args->time_range[0] != args->time_range[1])
- || !args->nprimitives
- || !out_estimator) {
- res = RES_BAD_ARG;
- goto error;
- }
+ if(!scn || !out_estimator) { res = RES_BAD_ARG; goto error; }
-#if SDIS_XD_DIMENSION == 2
- if(scene_is_2d(scn) == 0) { res = RES_BAD_ARG; goto error; }
-#else
- if(scene_is_2d(scn) != 0) { res = RES_BAD_ARG; goto error; }
-#endif
+ res = check_solve_boundary_flux_args(args);
+ if(res != RES_OK) goto error;
+ res = XD(scene_check_dimensionality)(scn);
+ if(res != RES_OK) goto error;
- SXD(scene_view_primitives_count(scn->sXd(view), &view_nprims));
FOR_EACH(i, 0, args->nprimitives) {
- if(args->primitives[i] >= view_nprims) {
- log_err(scn->dev,
- "%s: invalid primitive identifier `%lu'. It must be in the [0 %lu] range.\n",
- FUNC_NAME,
- (unsigned long)args->primitives[i],
- (unsigned long)scene_get_primitives_count(scn)-1);
- res = RES_BAD_ARG;
- goto error;
- }
+ res = scene_check_primitive_index(scn, args->primitives[i]);
+ if(res != RES_OK) goto error;
}
/* Create the Star-XD shape of the boundary */
@@ -586,43 +621,49 @@ XD(solve_boundary_flux)
res = sXd(scene_view_create)(scene, SXD_SAMPLE, &view);
if(res != RES_OK) goto error;
- /* Create the proxy RNG */
- if(args->rng_state) {
- res = ssp_rng_proxy_create_from_rng(scn->dev->allocator, args->rng_state,
- scn->dev->nthreads, &rng_proxy);
- if(res != RES_OK) goto error;
- } else {
- res = ssp_rng_proxy_create(scn->dev->allocator, SSP_RNG_MT19937_64,
- scn->dev->nthreads, &rng_proxy);
- if(res != RES_OK) goto error;
- }
+#ifdef SDIS_ENABLE_MPI
+ is_master_process = !scn->dev->use_mpi || scn->dev->mpi_rank == 0;
+#endif
- /* Create the per thread RNG */
- rngs = MEM_CALLOC(scn->dev->allocator, scn->dev->nthreads, sizeof(*rngs));
- if(!rngs) { res = RES_MEM_ERR; goto error; }
- FOR_EACH(i, 0, scn->dev->nthreads) {
- res = ssp_rng_proxy_create_rng(rng_proxy, i, rngs + i);
- if(res != RES_OK) goto error;
- }
+ /* Create the per thread RNGs */
+ res = create_per_thread_rng
+ (scn->dev, args->rng_state, &rng_proxy, &per_thread_rng);
+ if(res != RES_OK) goto error;
+
+ /* Allocate the per process progress status */
+ res = alloc_process_progress(scn->dev, &progress);
+ if(res != RES_OK) goto error;
/* Create the per thread accumulator */
#define ALLOC_ACCUMS(Dst) { \
Dst = MEM_CALLOC(scn->dev->allocator, scn->dev->nthreads, sizeof(*Dst)); \
if(!Dst) { res = RES_MEM_ERR; goto error; } \
} (void)0
- ALLOC_ACCUMS(acc_tp);
- ALLOC_ACCUMS(acc_ti);
- ALLOC_ACCUMS(acc_fc);
- ALLOC_ACCUMS(acc_fl);
- ALLOC_ACCUMS(acc_fr);
- ALLOC_ACCUMS(acc_fi);
+ ALLOC_ACCUMS(per_thread_acc_tp);
+ ALLOC_ACCUMS(per_thread_acc_ti);
+ ALLOC_ACCUMS(per_thread_acc_fc);
+ ALLOC_ACCUMS(per_thread_acc_fl);
+ ALLOC_ACCUMS(per_thread_acc_fr);
+ ALLOC_ACCUMS(per_thread_acc_fi);
#undef ALLOC_ACCUMS
- /* Create the estimator */
- res = estimator_create(scn->dev, SDIS_ESTIMATOR_FLUX, &estimator);
- if(res != RES_OK) goto error;
+ if(is_master_process) {
+ /* Create the estimator */
+ res = estimator_create(scn->dev, SDIS_ESTIMATOR_FLUX, &estimator);
+ if(res != RES_OK) goto error;
+ }
+
+ /* Synchronise the processes */
+ process_barrier(scn->dev);
+
+ #define PROGRESS_MSG "Solving surface flux: "
+ print_progress(scn->dev, progress, PROGRESS_MSG);
- nrealisations = args->nrealisations;
+ /* Begin time registration of the computation */
+ time_current(&time0);
+
+ /* Here we go! Launch the Monte Carlo estimation */
+ nrealisations = compute_process_realisations_count(scn->dev, args->nrealisations);
omp_set_num_threads((int)scn->dev->nthreads);
#pragma omp parallel for schedule(static)
for(irealisation = 0; irealisation < (int64_t)nrealisations; ++irealisation) {
@@ -630,13 +671,13 @@ XD(solve_boundary_flux)
BOUNDARY_FLUX_REALISATION_ARGS_NULL;
struct time t0, t1;
const int ithread = omp_get_thread_num();
- struct ssp_rng* rng = rngs[ithread];
- struct accum* acc_temp = &acc_tp[ithread];
- struct accum* acc_time = &acc_ti[ithread];
- struct accum* acc_flux = &acc_fl[ithread];
- struct accum* acc_fcon = &acc_fc[ithread];
- struct accum* acc_frad = &acc_fr[ithread];
- struct accum* acc_fimp = &acc_fi[ithread];
+ struct ssp_rng* rng = per_thread_rng[ithread];
+ struct accum* acc_temp = &per_thread_acc_tp[ithread];
+ struct accum* acc_time = &per_thread_acc_ti[ithread];
+ struct accum* acc_flux = &per_thread_acc_fl[ithread];
+ struct accum* acc_fcon = &per_thread_acc_fc[ithread];
+ struct accum* acc_frad = &per_thread_acc_fr[ithread];
+ struct accum* acc_fimp = &per_thread_acc_fi[ithread];
struct sXd(primitive) prim;
struct sdis_interface_fragment frag = SDIS_INTERFACE_FRAGMENT_NULL;
const struct sdis_interface* interf;
@@ -782,52 +823,77 @@ XD(solve_boundary_flux)
n = (size_t)ATOMIC_INCR(&nsolved_realisations);
pcent = (int)((double)n * 100.0 / (double)nrealisations + 0.5/*round*/);
#pragma omp critical
- if(pcent > progress) {
- progress = pcent;
- log_info(scn->dev, "Solving boundary flux: %3d%%\r", progress);
+ if(pcent > progress[0]) {
+ progress[0] = pcent;
+ print_progress_update(scn->dev, progress, PROGRESS_MSG);
}
}
+ /* Synchronise processes */
+ process_barrier(scn->dev);
+
+ res = gather_res_T(scn->dev, (res_T)res);
if(res != RES_OK) goto error;
- /* Add a new line after the progress status */
- log_info(scn->dev, "Solving boundary flux: %3d%%\n", progress);
-
- /* Redux the per thread accumulators */
- sum_accums(acc_tp, scn->dev->nthreads, &acc_tp[0]);
- sum_accums(acc_ti, scn->dev->nthreads, &acc_ti[0]);
- sum_accums(acc_fc, scn->dev->nthreads, &acc_fc[0]);
- sum_accums(acc_fr, scn->dev->nthreads, &acc_fr[0]);
- sum_accums(acc_fl, scn->dev->nthreads, &acc_fl[0]);
- sum_accums(acc_fi, scn->dev->nthreads, &acc_fi[0]);
- ASSERT(acc_tp[0].count == acc_fl[0].count);
- ASSERT(acc_tp[0].count == acc_ti[0].count);
- ASSERT(acc_tp[0].count == acc_fr[0].count);
- ASSERT(acc_tp[0].count == acc_fc[0].count);
- ASSERT(acc_tp[0].count == acc_fi[0].count);
+ print_progress_update(scn->dev, progress, PROGRESS_MSG);
+ log_info(scn->dev, "\n");
+ #undef PROGRESS_MSG
+
+ /* Report computation time */
+ time_sub(&time0, time_current(&time1), &time0);
+ time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf));
+ log_info(scn->dev, "Surface flux solved in %s.\n", buf);
- /* Setup the estimated values */
- estimator_setup_realisations_count(estimator, nrealisations, acc_tp[0].count);
- estimator_setup_temperature(estimator, acc_tp[0].sum, acc_tp[0].sum2);
- estimator_setup_realisation_time(estimator, acc_ti[0].sum, acc_ti[0].sum2);
- estimator_setup_flux(estimator, FLUX_CONVECTIVE, acc_fc[0].sum, acc_fc[0].sum2);
- estimator_setup_flux(estimator, FLUX_RADIATIVE, acc_fr[0].sum, acc_fr[0].sum2);
- estimator_setup_flux(estimator, FLUX_IMPOSED, acc_fi[0].sum, acc_fi[0].sum2);
- estimator_setup_flux(estimator, FLUX_TOTAL, acc_fl[0].sum, acc_fl[0].sum2);
-
- res = estimator_save_rng_state(estimator, rng_proxy);
+ /* Gather the RNG proxy sequence IDs and ensure that the RNG proxy state of
+ * the master process is greater than the RNG proxy state of all other
+ * processes */
+ res = gather_rng_proxy_sequence_id(scn->dev, rng_proxy);
if(res != RES_OK) goto error;
-exit:
- if(rngs) {
- FOR_EACH(i, 0, scn->dev->nthreads) { if(rngs[i]) SSP(rng_ref_put(rngs[i])); }
- MEM_RM(scn->dev->allocator, rngs);
+ time_current(&time0);
+ #define GATHER_ACCUMS(Msg, Acc) { \
+ res = gather_accumulators(scn->dev, Msg, per_thread_##Acc, &Acc); \
+ if(res != RES_OK) goto error; \
+ } (void)0
+ GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_TEMP, acc_tp);
+ GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_TIME, acc_ti);
+ GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_FLUX_CONVECTIVE, acc_fc);
+ GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_FLUX_IMPOSED, acc_fi);
+ GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_FLUX_RADIATIVE, acc_fr);
+ GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_FLUX_TOTAL, acc_fl);
+ #undef GATHER_ACCUMS
+
+ time_sub(&time0, time_current(&time1), &time0);
+ time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf));
+ log_info(scn->dev, "Accumulators gathered in %s.\n", buf);
+
+ /* Setup the estimated values */
+ if(is_master_process) {
+ ASSERT(acc_tp.count == acc_fl.count);
+ ASSERT(acc_tp.count == acc_ti.count);
+ ASSERT(acc_tp.count == acc_fr.count);
+ ASSERT(acc_tp.count == acc_fc.count);
+ ASSERT(acc_tp.count == acc_fi.count);
+ estimator_setup_realisations_count(estimator, args->nrealisations, acc_tp.count);
+ estimator_setup_temperature(estimator, acc_tp.sum, acc_tp.sum2);
+ estimator_setup_realisation_time(estimator, acc_ti.sum, acc_ti.sum2);
+ estimator_setup_flux(estimator, FLUX_CONVECTIVE, acc_fc.sum, acc_fc.sum2);
+ estimator_setup_flux(estimator, FLUX_RADIATIVE, acc_fr.sum, acc_fr.sum2);
+ estimator_setup_flux(estimator, FLUX_IMPOSED, acc_fi.sum, acc_fi.sum2);
+ estimator_setup_flux(estimator, FLUX_TOTAL, acc_fl.sum, acc_fl.sum2);
+
+ res = estimator_save_rng_state(estimator, rng_proxy);
+ if(res != RES_OK) goto error;
}
- if(acc_tp) MEM_RM(scn->dev->allocator, acc_tp);
- if(acc_ti) MEM_RM(scn->dev->allocator, acc_ti);
- if(acc_fc) MEM_RM(scn->dev->allocator, acc_fc);
- if(acc_fr) MEM_RM(scn->dev->allocator, acc_fr);
- if(acc_fl) MEM_RM(scn->dev->allocator, acc_fl);
- if(acc_fi) MEM_RM(scn->dev->allocator, acc_fi);
+
+exit:
+ if(per_thread_rng) release_per_thread_rng(scn->dev, per_thread_rng);
+ if(per_thread_acc_tp) MEM_RM(scn->dev->allocator, per_thread_acc_tp);
+ if(per_thread_acc_ti) MEM_RM(scn->dev->allocator, per_thread_acc_ti);
+ if(per_thread_acc_fc) MEM_RM(scn->dev->allocator, per_thread_acc_fc);
+ if(per_thread_acc_fr) MEM_RM(scn->dev->allocator, per_thread_acc_fr);
+ if(per_thread_acc_fl) MEM_RM(scn->dev->allocator, per_thread_acc_fl);
+ if(per_thread_acc_fi) MEM_RM(scn->dev->allocator, per_thread_acc_fi);
+ if(progress) free_process_progress(scn->dev, progress);
if(scene) SXD(scene_ref_put(scene));
if(shape) SXD(shape_ref_put(shape));
if(view) SXD(scene_view_ref_put(view));
@@ -835,10 +901,7 @@ exit:
if(out_estimator) *out_estimator = estimator;
return (res_T)res;
error:
- if(estimator) {
- SDIS(estimator_ref_put(estimator));
- estimator = NULL;
- }
+ if(estimator) { SDIS(estimator_ref_put(estimator)); estimator = NULL; }
goto exit;
}
diff --git a/src/sdis_solve_probe_boundary_Xd.h b/src/sdis_solve_probe_boundary_Xd.h
@@ -395,10 +395,6 @@ XD(solve_probe_boundary_flux)
struct time time0, time1;
char buf[128]; /* Temporary buffer used to store formated time */
- /* Device variables */
- struct mem_allocator* allocator = NULL;
- size_t nthreads = 0;
-
/* Stardis variables */
const struct sdis_interface* interf = NULL;
const struct sdis_medium* fmd = NULL;
@@ -467,9 +463,6 @@ XD(solve_probe_boundary_flux)
is_master_process = !scn->dev->use_mpi || scn->dev->mpi_rank == 0;
#endif
- nthreads = scn->dev->nthreads;
- allocator = scn->dev->allocator;
-
/* Create the per thread RNGs */
res = create_per_thread_rng
(scn->dev, args->rng_state, &rng_proxy, &per_thread_rng);
@@ -481,7 +474,7 @@ XD(solve_probe_boundary_flux)
/* Create the per thread accumulators */
#define ALLOC_ACCUMS(Dst) { \
- Dst = MEM_CALLOC(allocator, nthreads, sizeof(*Dst)); \
+ Dst = MEM_CALLOC(scn->dev->allocator, scn->dev->nthreads, sizeof(*Dst)); \
if(!Dst) { res = RES_MEM_ERR; goto error; } \
} (void)0
ALLOC_ACCUMS(per_thread_acc_tp);
diff --git a/src/test_sdis_solve_boundary_flux.c b/src/test_sdis_solve_boundary_flux.c
@@ -465,10 +465,14 @@ main(int argc, char** argv)
prims[0] = 6;
OK(SOLVE(box_scn, &bound_args, &estimator));
- /* Average temperature on the right side of the box */
- printf("Average values of the right side of the box = ");
- check_estimator(estimator, N, analyticT, analyticCF, analyticRF, analyticTF);
- OK(sdis_estimator_ref_put(estimator));
+ if(!is_master_process) {
+ CHK(estimator == NULL);
+ } else {
+ /* Average temperature on the right side of the box */
+ printf("Average values of the right side of the box = ");
+ check_estimator(estimator, N, analyticT, analyticCF, analyticRF, analyticTF);
+ OK(sdis_estimator_ref_put(estimator));
+ }
/* Average temperature on the right side of the square */
prims[0] = 4;
@@ -476,9 +480,11 @@ main(int argc, char** argv)
BA(SOLVE(square_scn, &bound_args, &estimator));
prims[0] = 3;
OK(SOLVE(square_scn, &bound_args, &estimator));
- printf("Average values of the right side of the square = ");
- check_estimator(estimator, N, analyticT, analyticCF, analyticRF, analyticTF);
- OK(sdis_estimator_ref_put(estimator));
+ if(is_master_process) {
+ printf("Average values of the right side of the square = ");
+ check_estimator(estimator, N, analyticT, analyticCF, analyticRF, analyticTF);
+ OK(sdis_estimator_ref_put(estimator));
+ }
/* Flux computation on Dirichlet boundaries is not available yet.
* Once available, the expected total flux is the same we expect on the right