commit 8c9afb07f5524a5f4057182f54710804d766c0ee
parent 7430763d68f3ef2e1254248eedaf77c6d9183c8b
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Fri, 8 Mar 2019 17:43:32 +0100
Do not use OpenMP to perform accumulation reduction
Diffstat:
5 files changed, 223 insertions(+), 133 deletions(-)
diff --git a/src/sdis_Xd_begin.h b/src/sdis_Xd_begin.h
@@ -22,6 +22,14 @@
struct green_path_handle;
struct sdis_heat_path;
+struct accum {
+ double sum; /* Sum of MC weights */
+ double sum2; /* Sum of square MC weights */
+ size_t count; /* #accumulated MC weights */
+};
+#define ACCUM_NULL__ {0,0,0}
+static const struct accum ACCUM_NULL = ACCUM_NULL__;
+
struct rwalk_context {
struct green_path_handle* green_path;
struct sdis_heat_path* heat_path;
@@ -31,6 +39,24 @@ struct rwalk_context {
#define RWALK_CONTEXT_NULL__ {NULL, NULL, 0, 0}
static const struct rwalk_context RWALK_CONTEXT_NULL = RWALK_CONTEXT_NULL__;
+static INLINE void
+sum_accums
+ (const struct accum accums[],
+ const size_t naccums,
+ struct accum* accum)
+{
+ struct accum acc = ACCUM_NULL;
+ size_t i;
+ ASSERT(accums && naccums && accum);
+
+ FOR_EACH(i, 0, naccums) {
+ acc.sum += accums[i].sum;
+ acc.sum2 += accums[i].sum2;
+ acc.count += accums[i].count;
+ }
+ *accum = acc;
+}
+
#endif /* SDIS_XD_BEGIN_H */
#ifdef SDIS_XD_BEGIN_H__
diff --git a/src/sdis_solve_boundary_Xd.h b/src/sdis_solve_boundary_Xd.h
@@ -93,10 +93,9 @@ XD(solve_boundary)
struct sdis_estimator* estimator = NULL;
struct ssp_rng_proxy* rng_proxy = NULL;
struct ssp_rng** rngs = NULL;
+ struct accum* accums = NULL;
size_t i;
- size_t N = 0; /* #realisations that do not fail */
size_t view_nprims;
- double weight=0, sqr_weight=0;
int64_t irealisation;
ATOMIC res = RES_OK;
@@ -174,16 +173,21 @@ XD(solve_boundary)
if(res != RES_OK) goto error;
}
+ /* Create the per thread accumulator */
+ accums = MEM_CALLOC(scn->dev->allocator, scn->dev->nthreads, sizeof(*accums));
+ if(!accums) { res = RES_MEM_ERR; goto error; }
+
/* Create the estimator */
res = estimator_create(scn->dev, SDIS_ESTIMATOR_TEMPERATURE, &estimator);
if(res != RES_OK) goto error;
omp_set_num_threads((int)scn->dev->nthreads);
- #pragma omp parallel for schedule(static) reduction(+:weight,sqr_weight,N)
+ #pragma omp parallel for schedule(static)
for(irealisation=0; irealisation<(int64_t)nrealisations; ++irealisation) {
const int ithread = omp_get_thread_num();
- struct sXd(primitive) prim;
struct ssp_rng* rng = rngs[ithread];
+ struct accum* accum = &accums[ithread];
+ struct sXd(primitive) prim;
enum sdis_side side;
size_t iprim;
double w = NaN;
@@ -226,29 +230,32 @@ XD(solve_boundary)
/* Update the MC accumulators */
if(res_local == RES_OK) {
- weight += w;
- sqr_weight += w*w;
- ++N;
+ accum->sum += w;
+ accum->sum2 += w*w;
+ ++accum->count;
} else if(res_local != RES_BAD_OP) {
ATOMIC_SET(&res, res_local);
continue;
}
}
+ sum_accums(accums, scn->dev->nthreads, &accums[0]);
+
/* Setup the estimated temperature */
- estimator_setup_realisations_count(estimator, nrealisations, N);
- estimator_setup_temperature(estimator, weight, sqr_weight);
+ estimator_setup_realisations_count(estimator, nrealisations, accums[0].count);
+ estimator_setup_temperature(estimator, accums[0].sum, accums[0].sum2);
exit:
+ if(rngs) {
+ FOR_EACH(i, 0, scn->dev->nthreads) {if(rngs[i]) SSP(rng_ref_put(rngs[i]));}
+ MEM_RM(scn->dev->allocator, rngs);
+ }
+ if(accums) MEM_RM(scn->dev->allocator, accums);
if(scene) SXD(scene_ref_put(scene));
if(shape) SXD(shape_ref_put(shape));
if(view) SXD(scene_view_ref_put(view));
if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy));
if(out_estimator) *out_estimator = estimator;
- if(rngs) {
- FOR_EACH(i, 0, scn->dev->nthreads) {if(rngs[i]) SSP(rng_ref_put(rngs[i]));}
- MEM_RM(scn->dev->allocator, rngs);
- }
return (res_T)res;
error:
if(estimator) {
@@ -278,12 +285,11 @@ XD(solve_boundary_flux)
struct sdis_estimator* estimator = NULL;
struct ssp_rng_proxy* rng_proxy = NULL;
struct ssp_rng** rngs = NULL;
- double weight_t = 0, sqr_weight_t = 0;
- double weight_fc = 0, sqr_weight_fc = 0;
- double weight_fr = 0, sqr_weight_fr = 0;
- double weight_f = 0, sqr_weight_f = 0;
+ struct accum* acc_tp = NULL; /* Per thread temperature accumulator */
+ struct accum* acc_fl = NULL; /* Per thread flux accumulator */
+ struct accum* acc_fc = NULL; /* Per thread convective flux accumulator */
+ struct accum* acc_fr = NULL; /* Per thread radiative flux accumulator */
size_t i;
- size_t N = 0; /* #realisations that do not fail */
size_t view_nprims;
int64_t irealisation;
ATOMIC res = RES_OK;
@@ -363,17 +369,31 @@ XD(solve_boundary_flux)
if(res != RES_OK) goto error;
}
+ /* Create the per thread accumulator */
+ #define ALLOC_ACCUMS(Dst) { \
+ Dst = MEM_CALLOC(scn->dev->allocator, scn->dev->nthreads, sizeof(*Dst)); \
+ if(!Dst) { res = RES_MEM_ERR; goto error; } \
+ } (void)0
+ ALLOC_ACCUMS(acc_tp);
+ ALLOC_ACCUMS(acc_fc);
+ ALLOC_ACCUMS(acc_fl);
+ ALLOC_ACCUMS(acc_fr);
+ #undef ALLOC_ACCUMS
+
/* Create the estimator */
res = estimator_create(scn->dev, SDIS_ESTIMATOR_FLUX, &estimator);
if(res != RES_OK) goto error;
omp_set_num_threads((int)scn->dev->nthreads);
- #pragma omp parallel for schedule(static) reduction(+:weight_t,sqr_weight_t,\
- weight_fc,sqr_weight_fc,weight_fr,sqr_weight_fr,weight_f,sqr_weight_f,N)
+ #pragma omp parallel for schedule(static)
for(irealisation = 0; irealisation < (int64_t)nrealisations; ++irealisation) {
const int ithread = omp_get_thread_num();
- struct sXd(primitive) prim;
struct ssp_rng* rng = rngs[ithread];
+ struct accum* acc_temp = &acc_tp[ithread];
+ struct accum* acc_flux = &acc_fl[ithread];
+ struct accum* acc_fcon = &acc_fc[ithread];
+ struct accum* acc_frad = &acc_fr[ithread];
+ struct sXd(primitive) prim;
struct sdis_interface_fragment frag = SDIS_INTERFACE_FRAGMENT_NULL;
const struct sdis_interface* interf;
const struct sdis_medium *fmd, *bmd;
@@ -444,48 +464,66 @@ XD(solve_boundary_flux)
if(hc > 0) flux_mask |= FLUX_FLAG_CONVECTIVE;
res_local = XD(boundary_flux_realisation)(scn, rng, iprim, uv, time,
solid_side, fp_to_meter, Tarad, Tref, flux_mask, T_brf);
- if(res_local != RES_OK) {
- if(res_local != RES_BAD_OP) {
- ATOMIC_SET(&res, res_local);
- continue;
- }
- } else {
+ if(res_local == RES_OK) {
const double Tboundary = T_brf[0];
const double Tradiative = T_brf[1];
const double Tfluid = T_brf[2];
const double w_conv = hc * (Tboundary - Tfluid);
const double w_rad = hr * (Tboundary - Tradiative);
const double w_total = w_conv + w_rad;
- weight_t += Tboundary;
- sqr_weight_t += Tboundary * Tboundary;
- weight_fc += w_conv;
- sqr_weight_fc += w_conv * w_conv;
- weight_fr += w_rad;
- sqr_weight_fr += w_rad * w_rad;
- weight_f += w_total;
- sqr_weight_f += w_total * w_total;
- ++N;
+ /* Temperature */
+ acc_temp->sum += Tboundary;
+ acc_temp->sum2 += Tboundary*Tboundary;
+ ++acc_temp->count;
+ /* Overwall flux */
+ acc_flux->sum += w_total;
+ acc_flux->sum2 += w_total*w_total;
+ ++acc_flux->count;
+ /* Convective flux */
+ acc_fcon->sum += w_conv;
+ acc_fcon->sum2 += w_conv*w_conv;
+ ++acc_fcon->count;
+ /* Radiative flux */
+ acc_frad->sum += w_rad;
+ acc_frad->sum2 += w_rad*w_rad;
+ ++acc_frad->count;
+ } else if(res_local != RES_BAD_OP) {
+ ATOMIC_SET(&res, res_local);
+ continue;
}
}
if(res != RES_OK) goto error;
+ /* Redux the per thread accumulators */
+ sum_accums(acc_tp, scn->dev->nthreads, &acc_tp[0]);
+ sum_accums(acc_fc, scn->dev->nthreads, &acc_fc[0]);
+ sum_accums(acc_fr, scn->dev->nthreads, &acc_fr[0]);
+ sum_accums(acc_fl, scn->dev->nthreads, &acc_fl[0]);
+ ASSERT(acc_tp[0].count == acc_fl[0].count);
+ ASSERT(acc_tp[0].count == acc_fr[0].count);
+ ASSERT(acc_tp[0].count == acc_fc[0].count);
+
/* Setup the estimated values */
- estimator_setup_realisations_count(estimator, nrealisations, N);
- estimator_setup_temperature(estimator, weight_t, sqr_weight_t);
- estimator_setup_flux(estimator, FLUX_CONVECTIVE, weight_fc, sqr_weight_fc);
- estimator_setup_flux(estimator, FLUX_RADIATIVE, weight_fr, sqr_weight_fr);
- estimator_setup_flux(estimator, FLUX_TOTAL, weight_f, sqr_weight_f);
+ estimator_setup_realisations_count(estimator, nrealisations, acc_tp[0].count);
+ estimator_setup_temperature(estimator, acc_tp[0].sum, acc_tp[0].sum2);
+ estimator_setup_flux(estimator, FLUX_CONVECTIVE, acc_fc[0].sum, acc_fc[0].sum2);
+ estimator_setup_flux(estimator, FLUX_RADIATIVE, acc_fr[0].sum, acc_fr[0].sum2);
+ estimator_setup_flux(estimator, FLUX_TOTAL, acc_fl[0].sum, acc_fl[0].sum2);
exit:
+ if(rngs) {
+ FOR_EACH(i, 0, scn->dev->nthreads) { if(rngs[i]) SSP(rng_ref_put(rngs[i])); }
+ MEM_RM(scn->dev->allocator, rngs);
+ }
+ if(acc_tp) MEM_RM(scn->dev->allocator, acc_tp);
+ if(acc_fc) MEM_RM(scn->dev->allocator, acc_fc);
+ if(acc_fr) MEM_RM(scn->dev->allocator, acc_fr);
+ if(acc_fl) MEM_RM(scn->dev->allocator, acc_fl);
if(scene) SXD(scene_ref_put(scene));
if(shape) SXD(shape_ref_put(shape));
if(view) SXD(scene_view_ref_put(view));
if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy));
if(out_estimator) *out_estimator = estimator;
- if(rngs) {
- FOR_EACH(i, 0, scn->dev->nthreads) { if(rngs[i]) SSP(rng_ref_put(rngs[i])); }
- MEM_RM(scn->dev->allocator, rngs);
- }
return (res_T)res;
error:
if(estimator) {
diff --git a/src/sdis_solve_medium_Xd.h b/src/sdis_solve_medium_Xd.h
@@ -211,7 +211,7 @@ XD(solve_medium)
struct ssp_rng_proxy* rng_proxy = NULL;
struct ssp_rng** rngs = NULL;
struct sdis_estimator* estimator = NULL;
- struct accum { double sum, sum2; size_t naccums; }* accums = NULL;
+ struct accum* accums = NULL;
int64_t irealisation;
int cumul_is_init = 0;
size_t i;
@@ -300,7 +300,7 @@ XD(solve_medium)
} else {
accum->sum += weight;
accum->sum2 += weight*weight;
- ++accum->naccums;
+ ++accum->count;
}
/* Finalize the registered path */
@@ -321,15 +321,11 @@ XD(solve_medium)
if(res != RES_OK) goto error;
/* Merge the per thread accumulators into the accumulator of the thread 0 */
- FOR_EACH(i, 1, scn->dev->nthreads) {
- accums[0].sum += accums[i].sum;
- accums[0].sum2 += accums[i].sum2;
- accums[0].naccums += accums[i].naccums;
- }
- ASSERT(accums[0].naccums <= nrealisations);
+ sum_accums(accums, scn->dev->nthreads, &accums[0]);
+ ASSERT(accums[0].count <= nrealisations);
/* Setup the estimated temperature */
- estimator_setup_realisations_count(estimator, nrealisations, accums[0].naccums);
+ estimator_setup_realisations_count(estimator, nrealisations, accums[0].count);
estimator_setup_temperature(estimator, accums[0].sum, accums[0].sum2);
exit:
diff --git a/src/sdis_solve_probe_Xd.h b/src/sdis_solve_probe_Xd.h
@@ -47,11 +47,8 @@ XD(solve_probe)
struct sdis_green_function** greens = NULL;
struct ssp_rng_proxy* rng_proxy = NULL;
struct ssp_rng** rngs = NULL;
- double weight = 0;
- double sqr_weight = 0;
- const int64_t rcount = (int64_t)nrealisations;
+ struct accum* accums = NULL;
int64_t irealisation = 0;
- size_t N = 0; /* #realisations that do not fail */
size_t i;
ATOMIC res = RES_OK;
@@ -91,6 +88,10 @@ XD(solve_probe)
if(res != RES_OK) goto error;
}
+ /* Create the per thread accumulator */
+ accums = MEM_CALLOC(scn->dev->allocator, scn->dev->nthreads, sizeof(*accums));
+ if(!accums) { res = RES_MEM_ERR; goto error; }
+
/* Retrieve the medium in which the submitted position lies */
res = scene_get_medium(scn, position, NULL, &medium);
if(res != RES_OK) goto error;
@@ -114,17 +115,18 @@ XD(solve_probe)
/* Here we go! Launch the Monte Carlo estimation */
omp_set_num_threads((int)scn->dev->nthreads);
- #pragma omp parallel for schedule(static) reduction(+:weight,sqr_weight,N)
- for(irealisation = 0; irealisation < rcount; ++irealisation) {
- res_T res_local;
- double w = NaN;
- double time;
+ #pragma omp parallel for schedule(static)
+ for(irealisation = 0; irealisation < (int64_t)nrealisations; ++irealisation) {
const int ithread = omp_get_thread_num();
struct ssp_rng* rng = rngs[ithread];
+ struct accum* accum = &accums[ithread];
struct green_path_handle* pgreen_path = NULL;
struct green_path_handle green_path = GREEN_PATH_HANDLE_NULL;
struct sdis_heat_path* pheat_path = NULL;
struct sdis_heat_path heat_path;
+ double w = NaN;
+ double time;
+ res_T res_local;
if(ATOMIC_GET(&res) != RES_OK) continue; /* An error occurred */
@@ -146,12 +148,13 @@ XD(solve_probe)
res_local = XD(probe_realisation)((size_t)irealisation, scn, rng, medium,
position, time, fp_to_meter, Tarad, Tref, pgreen_path, pheat_path, &w);
- if(res_local != RES_OK) {
- if(res_local != RES_BAD_OP) { ATOMIC_SET(&res, res_local); continue; }
- } else {
- weight += w;
- sqr_weight += w*w;
- ++N;
+ if(res_local == RES_OK) {
+ accum->sum += w;
+ accum->sum2 += w*w;
+ ++accum->count;
+ } else if(res_local != RES_BAD_OP) {
+ ATOMIC_SET(&res, res_local);
+ continue;
}
if(pheat_path) {
@@ -172,8 +175,10 @@ XD(solve_probe)
/* Setup the estimated temperature */
if(out_estimator) {
- estimator_setup_realisations_count(estimator, nrealisations, N);
- estimator_setup_temperature(estimator, weight, sqr_weight);
+ struct accum acc;
+ sum_accums(accums, scn->dev->nthreads, &acc);
+ estimator_setup_realisations_count(estimator, nrealisations, acc.count);
+ estimator_setup_temperature(estimator, acc.sum, acc.sum2);
}
if(out_green) {
@@ -202,6 +207,7 @@ exit:
}
MEM_RM(scn->dev->allocator, greens);
}
+ if(accums) MEM_RM(scn->dev->allocator, accums);
if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy));
if(out_green) *out_green = green;
if(out_estimator) *out_estimator = estimator;
diff --git a/src/sdis_solve_probe_boundary_Xd.h b/src/sdis_solve_probe_boundary_Xd.h
@@ -44,11 +44,8 @@ XD(solve_probe_boundary)
struct sdis_estimator* estimator = NULL;
struct ssp_rng_proxy* rng_proxy = NULL;
struct ssp_rng** rngs = NULL;
- double weight = 0;
- double sqr_weight = 0;
- const int64_t rcount = (int64_t)nrealisations;
+ struct accum* accums = NULL;
int64_t irealisation = 0;
- size_t N = 0; /* #realisations that do not fail */
size_t i;
ATOMIC res = RES_OK;
@@ -115,28 +112,30 @@ XD(solve_probe_boundary)
/* Create the per thread RNG */
rngs = MEM_CALLOC
(scn->dev->allocator, scn->dev->nthreads, sizeof(struct ssp_rng*));
- if(!rngs) {
- res = RES_MEM_ERR;
- goto error;
- }
+ if(!rngs) { res = RES_MEM_ERR; goto error; }
FOR_EACH(i, 0, scn->dev->nthreads) {
res = ssp_rng_proxy_create_rng(rng_proxy, i, rngs+i);
if(res != RES_OK) goto error;
}
+ /* Create the per thread accumulator */
+ accums = MEM_CALLOC(scn->dev->allocator, scn->dev->nthreads, sizeof(*accums));
+ if(!accums) { res = RES_MEM_ERR; goto error; }
+
/* Create the estimator */
res = estimator_create(scn->dev, SDIS_ESTIMATOR_TEMPERATURE, &estimator);
if(res != RES_OK) goto error;
/* Here we go! Launch the Monte Carlo estimation */
omp_set_num_threads((int)scn->dev->nthreads);
- #pragma omp parallel for schedule(static) reduction(+:weight,sqr_weight,N)
- for(irealisation = 0; irealisation < rcount; ++irealisation) {
- res_T res_local;
- double w = NaN;
- double time;
+ #pragma omp parallel for schedule(static)
+ for(irealisation = 0; irealisation < (int64_t)nrealisations; ++irealisation) {
const int ithread = omp_get_thread_num();
struct ssp_rng* rng = rngs[ithread];
+ struct accum* accum = &accums[ithread];
+ double w = NaN;
+ double time;
+ res_T res_local;
if(ATOMIC_GET(&res) != RES_OK) continue; /* An error occurred */
@@ -144,22 +143,21 @@ XD(solve_probe_boundary)
res_local = XD(boundary_realisation)
(scn, rng, iprim, uv, time, side, fp_to_meter, Tarad, Tref, &w);
- if(res_local != RES_OK) {
- if(res_local != RES_BAD_OP) {
- ATOMIC_SET(&res, res_local);
- continue;
- }
- } else {
- weight += w;
- sqr_weight += w*w;
- ++N;
+ if(res_local == RES_OK) {
+ accum->sum += w;
+ accum->sum2 += w*w;
+ ++accum->count;
+ } else if(res_local != RES_BAD_OP) {
+ ATOMIC_SET(&res, res_local);
+ continue;
}
}
if(res != RES_OK) goto error;
/* Setup the estimated temperature */
- estimator_setup_realisations_count(estimator, nrealisations, N);
- estimator_setup_temperature(estimator, weight, sqr_weight);
+ sum_accums(accums, scn->dev->nthreads, &accums[0]);
+ estimator_setup_realisations_count(estimator, nrealisations, accums[0].count);
+ estimator_setup_temperature(estimator, accums[0].sum, accums[0].sum2);
exit:
if(rngs) {
@@ -168,6 +166,7 @@ exit:
}
MEM_RM(scn->dev->allocator, rngs);
}
+ if(accums) MEM_RM(scn->dev->allocator, accums);
if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy));
if(out_estimator) *out_estimator = estimator;
return (res_T)res;
@@ -198,13 +197,11 @@ XD(solve_probe_boundary_flux)
const struct sdis_medium *fmd, *bmd;
enum sdis_side solid_side, fluid_side;
struct sdis_interface_fragment frag;
- double weight_t = 0, sqr_weight_t = 0;
- double weight_fc = 0, sqr_weight_fc = 0;
- double weight_fr = 0, sqr_weight_fr = 0;
- double weight_f= 0, sqr_weight_f = 0;
- const int64_t rcount = (int64_t)nrealisations;
+ struct accum* acc_tp = NULL; /* Per thread temperature accumulator */
+ struct accum* acc_fl = NULL; /* Per thread flux accumulator */
+ struct accum* acc_fc = NULL; /* Per thread convective flux accumulator */
+ struct accum* acc_fr = NULL; /* Per thread radiative flux accumulator */
int64_t irealisation = 0;
- size_t N = 0; /* #realisations that do not fail */
size_t i;
ATOMIC res = RES_OK;
@@ -279,15 +276,23 @@ XD(solve_probe_boundary_flux)
/* Create the per thread RNG */
rngs = MEM_CALLOC
(scn->dev->allocator, scn->dev->nthreads, sizeof(struct ssp_rng*));
- if(!rngs) {
- res = RES_MEM_ERR;
- goto error;
- }
+ if(!rngs) { res = RES_MEM_ERR; goto error; }
FOR_EACH(i, 0, scn->dev->nthreads) {
res = ssp_rng_proxy_create_rng(rng_proxy, i, rngs + i);
if(res != RES_OK) goto error;
}
+ /* Create the per thread accumulator */
+ #define ALLOC_ACCUMS(Dst) { \
+ Dst = MEM_CALLOC(scn->dev->allocator, scn->dev->nthreads, sizeof(*Dst)); \
+ if(!Dst) { res = RES_MEM_ERR; goto error; } \
+ } (void)0
+ ALLOC_ACCUMS(acc_tp);
+ ALLOC_ACCUMS(acc_fc);
+ ALLOC_ACCUMS(acc_fl);
+ ALLOC_ACCUMS(acc_fr);
+ #undef ALLOC_ACCUMS
+
/* Prebuild the interface fragment */
res = XD(build_interface_fragment)
(&frag, scn, (unsigned)iprim, uv, fluid_side);
@@ -299,15 +304,18 @@ XD(solve_probe_boundary_flux)
/* Here we go! Launch the Monte Carlo estimation */
omp_set_num_threads((int)scn->dev->nthreads);
- #pragma omp parallel for schedule(static) reduction(+:weight_t,sqr_weight_t,\
- weight_fc,sqr_weight_fc,weight_fr,sqr_weight_fr,weight_f,sqr_weight_f,N)
- for(irealisation = 0; irealisation < rcount; ++irealisation) {
- res_T res_local;
- double T_brf[3] = { 0, 0, 0 };
+ #pragma omp parallel for schedule(static)
+ for(irealisation = 0; irealisation < (int64_t)nrealisations; ++irealisation) {
const int ithread = omp_get_thread_num();
struct ssp_rng* rng = rngs[ithread];
+ struct accum* acc_temp = &acc_tp[ithread];
+ struct accum* acc_flux = &acc_fl[ithread];
+ struct accum* acc_fcon = &acc_fc[ithread];
+ struct accum* acc_frad = &acc_fr[ithread];
double time, epsilon, hc, hr;
int flux_mask = 0;
+ double T_brf[3] = { 0, 0, 0 };
+ res_T res_local;
if(ATOMIC_GET(&res) != RES_OK) continue; /* An error occurred */
@@ -325,45 +333,61 @@ XD(solve_probe_boundary_flux)
if(hc > 0) flux_mask |= FLUX_FLAG_CONVECTIVE;
res_local = XD(boundary_flux_realisation)(scn, rng, iprim, uv, time,
solid_side, fp_to_meter, Tarad, Tref, flux_mask, T_brf);
- if(res_local != RES_OK) {
- if(res_local != RES_BAD_OP) {
- ATOMIC_SET(&res, res_local);
- continue;
- }
- } else {
+ if(res_local == RES_OK) {
const double Tboundary = T_brf[0];
const double Tradiative = T_brf[1];
const double Tfluid = T_brf[2];
const double w_conv = hc * (Tboundary - Tfluid);
const double w_rad = hr * (Tboundary - Tradiative);
const double w_total = w_conv + w_rad;
- weight_t += Tboundary;
- sqr_weight_t += Tboundary * Tboundary;
- weight_fc += w_conv;
- sqr_weight_fc += w_conv * w_conv;
- weight_fr += w_rad;
- sqr_weight_fr += w_rad * w_rad;
- weight_f += w_total;
- sqr_weight_f += w_total * w_total;
- ++N;
+ /* Temperature */
+ acc_temp->sum += Tboundary;
+ acc_temp->sum2 += Tboundary*Tboundary;
+ ++acc_temp->count;
+ /* Overwall flux */
+ acc_flux->sum += w_total;
+ acc_flux->sum2 += w_total*w_total;
+ ++acc_flux->count;
+ /* Convective flux */
+ acc_fcon->sum += w_conv;
+ acc_fcon->sum2 += w_conv*w_conv;
+ ++acc_fcon->count;
+ /* Radiative flux */
+ acc_frad->sum += w_rad;
+ acc_frad->sum2 += w_rad*w_rad;
+ ++acc_frad->count;
+ } else if(res_local != RES_BAD_OP) {
+ ATOMIC_SET(&res, res_local);
+ continue;
}
}
if(res != RES_OK) goto error;
+ /* Redux the per thread accumulators */
+ sum_accums(acc_tp, scn->dev->nthreads, &acc_tp[0]);
+ sum_accums(acc_fc, scn->dev->nthreads, &acc_fc[0]);
+ sum_accums(acc_fr, scn->dev->nthreads, &acc_fr[0]);
+ sum_accums(acc_fl, scn->dev->nthreads, &acc_fl[0]);
+ ASSERT(acc_tp[0].count == acc_fl[0].count);
+ ASSERT(acc_tp[0].count == acc_fr[0].count);
+ ASSERT(acc_tp[0].count == acc_fc[0].count);
+
/* Setup the estimated values */
- estimator_setup_realisations_count(estimator, nrealisations, N);
- estimator_setup_temperature(estimator, weight_t, sqr_weight_t);
- estimator_setup_flux(estimator, FLUX_CONVECTIVE, weight_fc, sqr_weight_fc);
- estimator_setup_flux(estimator, FLUX_RADIATIVE, weight_fr, sqr_weight_fr);
- estimator_setup_flux(estimator, FLUX_TOTAL, weight_f, sqr_weight_f);
+ estimator_setup_realisations_count(estimator, nrealisations, acc_tp[0].count);
+ estimator_setup_temperature(estimator, acc_tp[0].sum, acc_tp[0].sum2);
+ estimator_setup_flux(estimator, FLUX_CONVECTIVE, acc_fc[0].sum, acc_fc[0].sum2);
+ estimator_setup_flux(estimator, FLUX_RADIATIVE, acc_fr[0].sum, acc_fr[0].sum2);
+ estimator_setup_flux(estimator, FLUX_TOTAL, acc_fl[0].sum, acc_fl[0].sum2);
exit:
if(rngs) {
- FOR_EACH(i, 0, scn->dev->nthreads) {
- if(rngs[i]) SSP(rng_ref_put(rngs[i]));
- }
+ FOR_EACH(i, 0, scn->dev->nthreads) {if(rngs[i]) SSP(rng_ref_put(rngs[i]));}
MEM_RM(scn->dev->allocator, rngs);
}
+ if(acc_tp) MEM_RM(scn->dev->allocator, acc_tp);
+ if(acc_fc) MEM_RM(scn->dev->allocator, acc_fc);
+ if(acc_fr) MEM_RM(scn->dev->allocator, acc_fr);
+ if(acc_fl) MEM_RM(scn->dev->allocator, acc_fl);
if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy));
if(out_estimator) *out_estimator = estimator;
return (res_T)res;