stardis-solver

Solve coupled heat transfers
git clone git://git.meso-star.fr/stardis-solver.git
Log | Files | Refs | README | LICENSE

commit 8c9afb07f5524a5f4057182f54710804d766c0ee
parent 7430763d68f3ef2e1254248eedaf77c6d9183c8b
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Fri,  8 Mar 2019 17:43:32 +0100

Do not use OpenMP to perform accumulation reduction

Diffstat:
Msrc/sdis_Xd_begin.h | 26++++++++++++++++++++++++++
Msrc/sdis_solve_boundary_Xd.h | 128+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
Msrc/sdis_solve_medium_Xd.h | 14+++++---------
Msrc/sdis_solve_probe_Xd.h | 40+++++++++++++++++++++++-----------------
Msrc/sdis_solve_probe_boundary_Xd.h | 148++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
5 files changed, 223 insertions(+), 133 deletions(-)

diff --git a/src/sdis_Xd_begin.h b/src/sdis_Xd_begin.h @@ -22,6 +22,14 @@ struct green_path_handle; struct sdis_heat_path; +struct accum { + double sum; /* Sum of MC weights */ + double sum2; /* Sum of square MC weights */ + size_t count; /* #accumulated MC weights */ +}; +#define ACCUM_NULL__ {0,0,0} +static const struct accum ACCUM_NULL = ACCUM_NULL__; + struct rwalk_context { struct green_path_handle* green_path; struct sdis_heat_path* heat_path; @@ -31,6 +39,24 @@ struct rwalk_context { #define RWALK_CONTEXT_NULL__ {NULL, NULL, 0, 0} static const struct rwalk_context RWALK_CONTEXT_NULL = RWALK_CONTEXT_NULL__; +static INLINE void +sum_accums + (const struct accum accums[], + const size_t naccums, + struct accum* accum) +{ + struct accum acc = ACCUM_NULL; + size_t i; + ASSERT(accums && naccums && accum); + + FOR_EACH(i, 0, naccums) { + acc.sum += accums[i].sum; + acc.sum2 += accums[i].sum2; + acc.count += accums[i].count; + } + *accum = acc; +} + #endif /* SDIS_XD_BEGIN_H */ #ifdef SDIS_XD_BEGIN_H__ diff --git a/src/sdis_solve_boundary_Xd.h b/src/sdis_solve_boundary_Xd.h @@ -93,10 +93,9 @@ XD(solve_boundary) struct sdis_estimator* estimator = NULL; struct ssp_rng_proxy* rng_proxy = NULL; struct ssp_rng** rngs = NULL; + struct accum* accums = NULL; size_t i; - size_t N = 0; /* #realisations that do not fail */ size_t view_nprims; - double weight=0, sqr_weight=0; int64_t irealisation; ATOMIC res = RES_OK; @@ -174,16 +173,21 @@ XD(solve_boundary) if(res != RES_OK) goto error; } + /* Create the per thread accumulator */ + accums = MEM_CALLOC(scn->dev->allocator, scn->dev->nthreads, sizeof(*accums)); + if(!accums) { res = RES_MEM_ERR; goto error; } + /* Create the estimator */ res = estimator_create(scn->dev, SDIS_ESTIMATOR_TEMPERATURE, &estimator); if(res != RES_OK) goto error; omp_set_num_threads((int)scn->dev->nthreads); - #pragma omp parallel for schedule(static) reduction(+:weight,sqr_weight,N) + #pragma omp parallel for schedule(static) for(irealisation=0; irealisation<(int64_t)nrealisations; ++irealisation) { const int ithread = omp_get_thread_num(); - struct sXd(primitive) prim; struct ssp_rng* rng = rngs[ithread]; + struct accum* accum = &accums[ithread]; + struct sXd(primitive) prim; enum sdis_side side; size_t iprim; double w = NaN; @@ -226,29 +230,32 @@ XD(solve_boundary) /* Update the MC accumulators */ if(res_local == RES_OK) { - weight += w; - sqr_weight += w*w; - ++N; + accum->sum += w; + accum->sum2 += w*w; + ++accum->count; } else if(res_local != RES_BAD_OP) { ATOMIC_SET(&res, res_local); continue; } } + sum_accums(accums, scn->dev->nthreads, &accums[0]); + /* Setup the estimated temperature */ - estimator_setup_realisations_count(estimator, nrealisations, N); - estimator_setup_temperature(estimator, weight, sqr_weight); + estimator_setup_realisations_count(estimator, nrealisations, accums[0].count); + estimator_setup_temperature(estimator, accums[0].sum, accums[0].sum2); exit: + if(rngs) { + FOR_EACH(i, 0, scn->dev->nthreads) {if(rngs[i]) SSP(rng_ref_put(rngs[i]));} + MEM_RM(scn->dev->allocator, rngs); + } + if(accums) MEM_RM(scn->dev->allocator, accums); if(scene) SXD(scene_ref_put(scene)); if(shape) SXD(shape_ref_put(shape)); if(view) SXD(scene_view_ref_put(view)); if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy)); if(out_estimator) *out_estimator = estimator; - if(rngs) { - FOR_EACH(i, 0, scn->dev->nthreads) {if(rngs[i]) SSP(rng_ref_put(rngs[i]));} - MEM_RM(scn->dev->allocator, rngs); - } return (res_T)res; error: if(estimator) { @@ -278,12 +285,11 @@ XD(solve_boundary_flux) struct sdis_estimator* estimator = NULL; struct ssp_rng_proxy* rng_proxy = NULL; struct ssp_rng** rngs = NULL; - double weight_t = 0, sqr_weight_t = 0; - double weight_fc = 0, sqr_weight_fc = 0; - double weight_fr = 0, sqr_weight_fr = 0; - double weight_f = 0, sqr_weight_f = 0; + struct accum* acc_tp = NULL; /* Per thread temperature accumulator */ + struct accum* acc_fl = NULL; /* Per thread flux accumulator */ + struct accum* acc_fc = NULL; /* Per thread convective flux accumulator */ + struct accum* acc_fr = NULL; /* Per thread radiative flux accumulator */ size_t i; - size_t N = 0; /* #realisations that do not fail */ size_t view_nprims; int64_t irealisation; ATOMIC res = RES_OK; @@ -363,17 +369,31 @@ XD(solve_boundary_flux) if(res != RES_OK) goto error; } + /* Create the per thread accumulator */ + #define ALLOC_ACCUMS(Dst) { \ + Dst = MEM_CALLOC(scn->dev->allocator, scn->dev->nthreads, sizeof(*Dst)); \ + if(!Dst) { res = RES_MEM_ERR; goto error; } \ + } (void)0 + ALLOC_ACCUMS(acc_tp); + ALLOC_ACCUMS(acc_fc); + ALLOC_ACCUMS(acc_fl); + ALLOC_ACCUMS(acc_fr); + #undef ALLOC_ACCUMS + /* Create the estimator */ res = estimator_create(scn->dev, SDIS_ESTIMATOR_FLUX, &estimator); if(res != RES_OK) goto error; omp_set_num_threads((int)scn->dev->nthreads); - #pragma omp parallel for schedule(static) reduction(+:weight_t,sqr_weight_t,\ - weight_fc,sqr_weight_fc,weight_fr,sqr_weight_fr,weight_f,sqr_weight_f,N) + #pragma omp parallel for schedule(static) for(irealisation = 0; irealisation < (int64_t)nrealisations; ++irealisation) { const int ithread = omp_get_thread_num(); - struct sXd(primitive) prim; struct ssp_rng* rng = rngs[ithread]; + struct accum* acc_temp = &acc_tp[ithread]; + struct accum* acc_flux = &acc_fl[ithread]; + struct accum* acc_fcon = &acc_fc[ithread]; + struct accum* acc_frad = &acc_fr[ithread]; + struct sXd(primitive) prim; struct sdis_interface_fragment frag = SDIS_INTERFACE_FRAGMENT_NULL; const struct sdis_interface* interf; const struct sdis_medium *fmd, *bmd; @@ -444,48 +464,66 @@ XD(solve_boundary_flux) if(hc > 0) flux_mask |= FLUX_FLAG_CONVECTIVE; res_local = XD(boundary_flux_realisation)(scn, rng, iprim, uv, time, solid_side, fp_to_meter, Tarad, Tref, flux_mask, T_brf); - if(res_local != RES_OK) { - if(res_local != RES_BAD_OP) { - ATOMIC_SET(&res, res_local); - continue; - } - } else { + if(res_local == RES_OK) { const double Tboundary = T_brf[0]; const double Tradiative = T_brf[1]; const double Tfluid = T_brf[2]; const double w_conv = hc * (Tboundary - Tfluid); const double w_rad = hr * (Tboundary - Tradiative); const double w_total = w_conv + w_rad; - weight_t += Tboundary; - sqr_weight_t += Tboundary * Tboundary; - weight_fc += w_conv; - sqr_weight_fc += w_conv * w_conv; - weight_fr += w_rad; - sqr_weight_fr += w_rad * w_rad; - weight_f += w_total; - sqr_weight_f += w_total * w_total; - ++N; + /* Temperature */ + acc_temp->sum += Tboundary; + acc_temp->sum2 += Tboundary*Tboundary; + ++acc_temp->count; + /* Overwall flux */ + acc_flux->sum += w_total; + acc_flux->sum2 += w_total*w_total; + ++acc_flux->count; + /* Convective flux */ + acc_fcon->sum += w_conv; + acc_fcon->sum2 += w_conv*w_conv; + ++acc_fcon->count; + /* Radiative flux */ + acc_frad->sum += w_rad; + acc_frad->sum2 += w_rad*w_rad; + ++acc_frad->count; + } else if(res_local != RES_BAD_OP) { + ATOMIC_SET(&res, res_local); + continue; } } if(res != RES_OK) goto error; + /* Redux the per thread accumulators */ + sum_accums(acc_tp, scn->dev->nthreads, &acc_tp[0]); + sum_accums(acc_fc, scn->dev->nthreads, &acc_fc[0]); + sum_accums(acc_fr, scn->dev->nthreads, &acc_fr[0]); + sum_accums(acc_fl, scn->dev->nthreads, &acc_fl[0]); + ASSERT(acc_tp[0].count == acc_fl[0].count); + ASSERT(acc_tp[0].count == acc_fr[0].count); + ASSERT(acc_tp[0].count == acc_fc[0].count); + /* Setup the estimated values */ - estimator_setup_realisations_count(estimator, nrealisations, N); - estimator_setup_temperature(estimator, weight_t, sqr_weight_t); - estimator_setup_flux(estimator, FLUX_CONVECTIVE, weight_fc, sqr_weight_fc); - estimator_setup_flux(estimator, FLUX_RADIATIVE, weight_fr, sqr_weight_fr); - estimator_setup_flux(estimator, FLUX_TOTAL, weight_f, sqr_weight_f); + estimator_setup_realisations_count(estimator, nrealisations, acc_tp[0].count); + estimator_setup_temperature(estimator, acc_tp[0].sum, acc_tp[0].sum2); + estimator_setup_flux(estimator, FLUX_CONVECTIVE, acc_fc[0].sum, acc_fc[0].sum2); + estimator_setup_flux(estimator, FLUX_RADIATIVE, acc_fr[0].sum, acc_fr[0].sum2); + estimator_setup_flux(estimator, FLUX_TOTAL, acc_fl[0].sum, acc_fl[0].sum2); exit: + if(rngs) { + FOR_EACH(i, 0, scn->dev->nthreads) { if(rngs[i]) SSP(rng_ref_put(rngs[i])); } + MEM_RM(scn->dev->allocator, rngs); + } + if(acc_tp) MEM_RM(scn->dev->allocator, acc_tp); + if(acc_fc) MEM_RM(scn->dev->allocator, acc_fc); + if(acc_fr) MEM_RM(scn->dev->allocator, acc_fr); + if(acc_fl) MEM_RM(scn->dev->allocator, acc_fl); if(scene) SXD(scene_ref_put(scene)); if(shape) SXD(shape_ref_put(shape)); if(view) SXD(scene_view_ref_put(view)); if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy)); if(out_estimator) *out_estimator = estimator; - if(rngs) { - FOR_EACH(i, 0, scn->dev->nthreads) { if(rngs[i]) SSP(rng_ref_put(rngs[i])); } - MEM_RM(scn->dev->allocator, rngs); - } return (res_T)res; error: if(estimator) { diff --git a/src/sdis_solve_medium_Xd.h b/src/sdis_solve_medium_Xd.h @@ -211,7 +211,7 @@ XD(solve_medium) struct ssp_rng_proxy* rng_proxy = NULL; struct ssp_rng** rngs = NULL; struct sdis_estimator* estimator = NULL; - struct accum { double sum, sum2; size_t naccums; }* accums = NULL; + struct accum* accums = NULL; int64_t irealisation; int cumul_is_init = 0; size_t i; @@ -300,7 +300,7 @@ XD(solve_medium) } else { accum->sum += weight; accum->sum2 += weight*weight; - ++accum->naccums; + ++accum->count; } /* Finalize the registered path */ @@ -321,15 +321,11 @@ XD(solve_medium) if(res != RES_OK) goto error; /* Merge the per thread accumulators into the accumulator of the thread 0 */ - FOR_EACH(i, 1, scn->dev->nthreads) { - accums[0].sum += accums[i].sum; - accums[0].sum2 += accums[i].sum2; - accums[0].naccums += accums[i].naccums; - } - ASSERT(accums[0].naccums <= nrealisations); + sum_accums(accums, scn->dev->nthreads, &accums[0]); + ASSERT(accums[0].count <= nrealisations); /* Setup the estimated temperature */ - estimator_setup_realisations_count(estimator, nrealisations, accums[0].naccums); + estimator_setup_realisations_count(estimator, nrealisations, accums[0].count); estimator_setup_temperature(estimator, accums[0].sum, accums[0].sum2); exit: diff --git a/src/sdis_solve_probe_Xd.h b/src/sdis_solve_probe_Xd.h @@ -47,11 +47,8 @@ XD(solve_probe) struct sdis_green_function** greens = NULL; struct ssp_rng_proxy* rng_proxy = NULL; struct ssp_rng** rngs = NULL; - double weight = 0; - double sqr_weight = 0; - const int64_t rcount = (int64_t)nrealisations; + struct accum* accums = NULL; int64_t irealisation = 0; - size_t N = 0; /* #realisations that do not fail */ size_t i; ATOMIC res = RES_OK; @@ -91,6 +88,10 @@ XD(solve_probe) if(res != RES_OK) goto error; } + /* Create the per thread accumulator */ + accums = MEM_CALLOC(scn->dev->allocator, scn->dev->nthreads, sizeof(*accums)); + if(!accums) { res = RES_MEM_ERR; goto error; } + /* Retrieve the medium in which the submitted position lies */ res = scene_get_medium(scn, position, NULL, &medium); if(res != RES_OK) goto error; @@ -114,17 +115,18 @@ XD(solve_probe) /* Here we go! Launch the Monte Carlo estimation */ omp_set_num_threads((int)scn->dev->nthreads); - #pragma omp parallel for schedule(static) reduction(+:weight,sqr_weight,N) - for(irealisation = 0; irealisation < rcount; ++irealisation) { - res_T res_local; - double w = NaN; - double time; + #pragma omp parallel for schedule(static) + for(irealisation = 0; irealisation < (int64_t)nrealisations; ++irealisation) { const int ithread = omp_get_thread_num(); struct ssp_rng* rng = rngs[ithread]; + struct accum* accum = &accums[ithread]; struct green_path_handle* pgreen_path = NULL; struct green_path_handle green_path = GREEN_PATH_HANDLE_NULL; struct sdis_heat_path* pheat_path = NULL; struct sdis_heat_path heat_path; + double w = NaN; + double time; + res_T res_local; if(ATOMIC_GET(&res) != RES_OK) continue; /* An error occurred */ @@ -146,12 +148,13 @@ XD(solve_probe) res_local = XD(probe_realisation)((size_t)irealisation, scn, rng, medium, position, time, fp_to_meter, Tarad, Tref, pgreen_path, pheat_path, &w); - if(res_local != RES_OK) { - if(res_local != RES_BAD_OP) { ATOMIC_SET(&res, res_local); continue; } - } else { - weight += w; - sqr_weight += w*w; - ++N; + if(res_local == RES_OK) { + accum->sum += w; + accum->sum2 += w*w; + ++accum->count; + } else if(res_local != RES_BAD_OP) { + ATOMIC_SET(&res, res_local); + continue; } if(pheat_path) { @@ -172,8 +175,10 @@ XD(solve_probe) /* Setup the estimated temperature */ if(out_estimator) { - estimator_setup_realisations_count(estimator, nrealisations, N); - estimator_setup_temperature(estimator, weight, sqr_weight); + struct accum acc; + sum_accums(accums, scn->dev->nthreads, &acc); + estimator_setup_realisations_count(estimator, nrealisations, acc.count); + estimator_setup_temperature(estimator, acc.sum, acc.sum2); } if(out_green) { @@ -202,6 +207,7 @@ exit: } MEM_RM(scn->dev->allocator, greens); } + if(accums) MEM_RM(scn->dev->allocator, accums); if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy)); if(out_green) *out_green = green; if(out_estimator) *out_estimator = estimator; diff --git a/src/sdis_solve_probe_boundary_Xd.h b/src/sdis_solve_probe_boundary_Xd.h @@ -44,11 +44,8 @@ XD(solve_probe_boundary) struct sdis_estimator* estimator = NULL; struct ssp_rng_proxy* rng_proxy = NULL; struct ssp_rng** rngs = NULL; - double weight = 0; - double sqr_weight = 0; - const int64_t rcount = (int64_t)nrealisations; + struct accum* accums = NULL; int64_t irealisation = 0; - size_t N = 0; /* #realisations that do not fail */ size_t i; ATOMIC res = RES_OK; @@ -115,28 +112,30 @@ XD(solve_probe_boundary) /* Create the per thread RNG */ rngs = MEM_CALLOC (scn->dev->allocator, scn->dev->nthreads, sizeof(struct ssp_rng*)); - if(!rngs) { - res = RES_MEM_ERR; - goto error; - } + if(!rngs) { res = RES_MEM_ERR; goto error; } FOR_EACH(i, 0, scn->dev->nthreads) { res = ssp_rng_proxy_create_rng(rng_proxy, i, rngs+i); if(res != RES_OK) goto error; } + /* Create the per thread accumulator */ + accums = MEM_CALLOC(scn->dev->allocator, scn->dev->nthreads, sizeof(*accums)); + if(!accums) { res = RES_MEM_ERR; goto error; } + /* Create the estimator */ res = estimator_create(scn->dev, SDIS_ESTIMATOR_TEMPERATURE, &estimator); if(res != RES_OK) goto error; /* Here we go! Launch the Monte Carlo estimation */ omp_set_num_threads((int)scn->dev->nthreads); - #pragma omp parallel for schedule(static) reduction(+:weight,sqr_weight,N) - for(irealisation = 0; irealisation < rcount; ++irealisation) { - res_T res_local; - double w = NaN; - double time; + #pragma omp parallel for schedule(static) + for(irealisation = 0; irealisation < (int64_t)nrealisations; ++irealisation) { const int ithread = omp_get_thread_num(); struct ssp_rng* rng = rngs[ithread]; + struct accum* accum = &accums[ithread]; + double w = NaN; + double time; + res_T res_local; if(ATOMIC_GET(&res) != RES_OK) continue; /* An error occurred */ @@ -144,22 +143,21 @@ XD(solve_probe_boundary) res_local = XD(boundary_realisation) (scn, rng, iprim, uv, time, side, fp_to_meter, Tarad, Tref, &w); - if(res_local != RES_OK) { - if(res_local != RES_BAD_OP) { - ATOMIC_SET(&res, res_local); - continue; - } - } else { - weight += w; - sqr_weight += w*w; - ++N; + if(res_local == RES_OK) { + accum->sum += w; + accum->sum2 += w*w; + ++accum->count; + } else if(res_local != RES_BAD_OP) { + ATOMIC_SET(&res, res_local); + continue; } } if(res != RES_OK) goto error; /* Setup the estimated temperature */ - estimator_setup_realisations_count(estimator, nrealisations, N); - estimator_setup_temperature(estimator, weight, sqr_weight); + sum_accums(accums, scn->dev->nthreads, &accums[0]); + estimator_setup_realisations_count(estimator, nrealisations, accums[0].count); + estimator_setup_temperature(estimator, accums[0].sum, accums[0].sum2); exit: if(rngs) { @@ -168,6 +166,7 @@ exit: } MEM_RM(scn->dev->allocator, rngs); } + if(accums) MEM_RM(scn->dev->allocator, accums); if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy)); if(out_estimator) *out_estimator = estimator; return (res_T)res; @@ -198,13 +197,11 @@ XD(solve_probe_boundary_flux) const struct sdis_medium *fmd, *bmd; enum sdis_side solid_side, fluid_side; struct sdis_interface_fragment frag; - double weight_t = 0, sqr_weight_t = 0; - double weight_fc = 0, sqr_weight_fc = 0; - double weight_fr = 0, sqr_weight_fr = 0; - double weight_f= 0, sqr_weight_f = 0; - const int64_t rcount = (int64_t)nrealisations; + struct accum* acc_tp = NULL; /* Per thread temperature accumulator */ + struct accum* acc_fl = NULL; /* Per thread flux accumulator */ + struct accum* acc_fc = NULL; /* Per thread convective flux accumulator */ + struct accum* acc_fr = NULL; /* Per thread radiative flux accumulator */ int64_t irealisation = 0; - size_t N = 0; /* #realisations that do not fail */ size_t i; ATOMIC res = RES_OK; @@ -279,15 +276,23 @@ XD(solve_probe_boundary_flux) /* Create the per thread RNG */ rngs = MEM_CALLOC (scn->dev->allocator, scn->dev->nthreads, sizeof(struct ssp_rng*)); - if(!rngs) { - res = RES_MEM_ERR; - goto error; - } + if(!rngs) { res = RES_MEM_ERR; goto error; } FOR_EACH(i, 0, scn->dev->nthreads) { res = ssp_rng_proxy_create_rng(rng_proxy, i, rngs + i); if(res != RES_OK) goto error; } + /* Create the per thread accumulator */ + #define ALLOC_ACCUMS(Dst) { \ + Dst = MEM_CALLOC(scn->dev->allocator, scn->dev->nthreads, sizeof(*Dst)); \ + if(!Dst) { res = RES_MEM_ERR; goto error; } \ + } (void)0 + ALLOC_ACCUMS(acc_tp); + ALLOC_ACCUMS(acc_fc); + ALLOC_ACCUMS(acc_fl); + ALLOC_ACCUMS(acc_fr); + #undef ALLOC_ACCUMS + /* Prebuild the interface fragment */ res = XD(build_interface_fragment) (&frag, scn, (unsigned)iprim, uv, fluid_side); @@ -299,15 +304,18 @@ XD(solve_probe_boundary_flux) /* Here we go! Launch the Monte Carlo estimation */ omp_set_num_threads((int)scn->dev->nthreads); - #pragma omp parallel for schedule(static) reduction(+:weight_t,sqr_weight_t,\ - weight_fc,sqr_weight_fc,weight_fr,sqr_weight_fr,weight_f,sqr_weight_f,N) - for(irealisation = 0; irealisation < rcount; ++irealisation) { - res_T res_local; - double T_brf[3] = { 0, 0, 0 }; + #pragma omp parallel for schedule(static) + for(irealisation = 0; irealisation < (int64_t)nrealisations; ++irealisation) { const int ithread = omp_get_thread_num(); struct ssp_rng* rng = rngs[ithread]; + struct accum* acc_temp = &acc_tp[ithread]; + struct accum* acc_flux = &acc_fl[ithread]; + struct accum* acc_fcon = &acc_fc[ithread]; + struct accum* acc_frad = &acc_fr[ithread]; double time, epsilon, hc, hr; int flux_mask = 0; + double T_brf[3] = { 0, 0, 0 }; + res_T res_local; if(ATOMIC_GET(&res) != RES_OK) continue; /* An error occurred */ @@ -325,45 +333,61 @@ XD(solve_probe_boundary_flux) if(hc > 0) flux_mask |= FLUX_FLAG_CONVECTIVE; res_local = XD(boundary_flux_realisation)(scn, rng, iprim, uv, time, solid_side, fp_to_meter, Tarad, Tref, flux_mask, T_brf); - if(res_local != RES_OK) { - if(res_local != RES_BAD_OP) { - ATOMIC_SET(&res, res_local); - continue; - } - } else { + if(res_local == RES_OK) { const double Tboundary = T_brf[0]; const double Tradiative = T_brf[1]; const double Tfluid = T_brf[2]; const double w_conv = hc * (Tboundary - Tfluid); const double w_rad = hr * (Tboundary - Tradiative); const double w_total = w_conv + w_rad; - weight_t += Tboundary; - sqr_weight_t += Tboundary * Tboundary; - weight_fc += w_conv; - sqr_weight_fc += w_conv * w_conv; - weight_fr += w_rad; - sqr_weight_fr += w_rad * w_rad; - weight_f += w_total; - sqr_weight_f += w_total * w_total; - ++N; + /* Temperature */ + acc_temp->sum += Tboundary; + acc_temp->sum2 += Tboundary*Tboundary; + ++acc_temp->count; + /* Overwall flux */ + acc_flux->sum += w_total; + acc_flux->sum2 += w_total*w_total; + ++acc_flux->count; + /* Convective flux */ + acc_fcon->sum += w_conv; + acc_fcon->sum2 += w_conv*w_conv; + ++acc_fcon->count; + /* Radiative flux */ + acc_frad->sum += w_rad; + acc_frad->sum2 += w_rad*w_rad; + ++acc_frad->count; + } else if(res_local != RES_BAD_OP) { + ATOMIC_SET(&res, res_local); + continue; } } if(res != RES_OK) goto error; + /* Redux the per thread accumulators */ + sum_accums(acc_tp, scn->dev->nthreads, &acc_tp[0]); + sum_accums(acc_fc, scn->dev->nthreads, &acc_fc[0]); + sum_accums(acc_fr, scn->dev->nthreads, &acc_fr[0]); + sum_accums(acc_fl, scn->dev->nthreads, &acc_fl[0]); + ASSERT(acc_tp[0].count == acc_fl[0].count); + ASSERT(acc_tp[0].count == acc_fr[0].count); + ASSERT(acc_tp[0].count == acc_fc[0].count); + /* Setup the estimated values */ - estimator_setup_realisations_count(estimator, nrealisations, N); - estimator_setup_temperature(estimator, weight_t, sqr_weight_t); - estimator_setup_flux(estimator, FLUX_CONVECTIVE, weight_fc, sqr_weight_fc); - estimator_setup_flux(estimator, FLUX_RADIATIVE, weight_fr, sqr_weight_fr); - estimator_setup_flux(estimator, FLUX_TOTAL, weight_f, sqr_weight_f); + estimator_setup_realisations_count(estimator, nrealisations, acc_tp[0].count); + estimator_setup_temperature(estimator, acc_tp[0].sum, acc_tp[0].sum2); + estimator_setup_flux(estimator, FLUX_CONVECTIVE, acc_fc[0].sum, acc_fc[0].sum2); + estimator_setup_flux(estimator, FLUX_RADIATIVE, acc_fr[0].sum, acc_fr[0].sum2); + estimator_setup_flux(estimator, FLUX_TOTAL, acc_fl[0].sum, acc_fl[0].sum2); exit: if(rngs) { - FOR_EACH(i, 0, scn->dev->nthreads) { - if(rngs[i]) SSP(rng_ref_put(rngs[i])); - } + FOR_EACH(i, 0, scn->dev->nthreads) {if(rngs[i]) SSP(rng_ref_put(rngs[i]));} MEM_RM(scn->dev->allocator, rngs); } + if(acc_tp) MEM_RM(scn->dev->allocator, acc_tp); + if(acc_fc) MEM_RM(scn->dev->allocator, acc_fc); + if(acc_fr) MEM_RM(scn->dev->allocator, acc_fr); + if(acc_fl) MEM_RM(scn->dev->allocator, acc_fl); if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy)); if(out_estimator) *out_estimator = estimator; return (res_T)res;