sdis_solve_probe_boundary_Xd.h (37061B)
1 /* Copyright (C) 2016-2025 |Méso|Star> (contact@meso-star.com) 2 * 3 * This program is free software: you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published by 5 * the Free Software Foundation, either version 3 of the License, or 6 * (at your option) any later version. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. */ 15 16 #include "sdis_c.h" 17 #include "sdis_device_c.h" 18 #include "sdis_estimator_c.h" 19 #include "sdis_interface_c.h" 20 #include "sdis_log.h" 21 #include "sdis_green.h" 22 #include "sdis_medium_c.h" 23 #include "sdis_misc.h" 24 #include "sdis_realisation.h" 25 #include "sdis_scene_c.h" 26 #include "sdis_heat_path_boundary_c.h" /* check_Tref_<2d|3d> */ 27 28 #include <rsys/clock_time.h> 29 #include <star/ssp.h> 30 #include <omp.h> 31 32 #include "sdis_Xd_begin.h" 33 34 /******************************************************************************* 35 * Helper function 36 ******************************************************************************/ 37 #ifndef SDIS_SOLVE_PROBE_BOUNDARY_XD_H 38 #define SDIS_SOLVE_PROBE_BOUNDARY_XD_H 39 40 static INLINE res_T 41 check_solve_probe_boundary_args 42 (const struct sdis_solve_probe_boundary_args* args) 43 { 44 if(!args) return RES_BAD_ARG; 45 46 /* Check #realisations */ 47 if(!args->nrealisations || args->nrealisations > INT64_MAX) { 48 return RES_BAD_ARG; 49 } 50 51 /* Check side */ 52 if((unsigned)args->side >= SDIS_SIDE_NULL__) { 53 return RES_BAD_ARG; 54 } 55 56 /* Check time range */ 57 if(args->time_range[0] < 0 || args->time_range[1] < args->time_range[0]) { 58 return RES_BAD_ARG; 59 } 60 if(args->time_range[1] > DBL_MAX 61 && args->time_range[0] != args->time_range[1]) { 62 return RES_BAD_ARG; 63 } 64 65 /* Check picard order */ 66 if(args->picard_order < 1) { 67 return RES_BAD_ARG; 68 } 69 70 /* Check the RNG type */ 71 if(!args->rng_state && args->rng_type >= SSP_RNG_TYPES_COUNT__) { 72 return RES_BAD_ARG; 73 } 74 75 /* Check the diffusion algorithm */ 76 if((unsigned)args->diff_algo >= SDIS_DIFFUSION_ALGORITHMS_COUNT__) { 77 return RES_BAD_ARG; 78 } 79 80 return RES_OK; 81 } 82 83 static INLINE res_T 84 check_solve_probe_boundary_list_args 85 (struct sdis_device* dev, 86 const struct sdis_solve_probe_boundary_list_args* args) 87 { 88 size_t iprobe = 0; 89 90 if(!args) return RES_BAD_ARG; 91 92 /* Check the list of probes */ 93 if(!args->probes || !args->nprobes) { 94 return RES_BAD_ARG; 95 } 96 97 /* Check the RNG type */ 98 if(!args->rng_state && args->rng_type >= SSP_RNG_TYPES_COUNT__) { 99 return RES_BAD_ARG; 100 } 101 102 FOR_EACH(iprobe, 0, args->nprobes) { 103 const res_T res = check_solve_probe_boundary_args(args->probes+iprobe); 104 if(res != RES_OK) return res; 105 106 if(args->probes[iprobe].register_paths != SDIS_HEAT_PATH_NONE) { 107 log_warn(dev, 108 "Unable to save paths for probe boundary %lu. " 109 "Saving path is not supported when solving multiple probes\n", 110 (unsigned long)iprobe); 111 } 112 } 113 114 return RES_OK; 115 } 116 117 static INLINE res_T 118 check_solve_probe_boundary_flux_args 119 (const struct sdis_solve_probe_boundary_flux_args* args) 120 { 121 if(!args) return RES_BAD_ARG; 122 123 /* Check #realisations */ 124 if(!args->nrealisations || args->nrealisations > INT64_MAX) { 125 return RES_BAD_ARG; 126 } 127 128 /* Check time range */ 129 if(args->time_range[0] < 0 || args->time_range[1] < args->time_range[0]) { 130 return RES_BAD_ARG; 131 } 132 if(args->time_range[1] > DBL_MAX 133 && args->time_range[0] != args->time_range[1]) { 134 return RES_BAD_ARG; 135 } 136 137 /* Check picard order */ 138 if(args->picard_order < 1) { 139 return RES_BAD_ARG; 140 } 141 142 /* Check the RNG type */ 143 if(!args->rng_state && args->rng_type >= SSP_RNG_TYPES_COUNT__) { 144 return RES_BAD_ARG; 145 } 146 147 /* Check the diffusion algorithm */ 148 if((unsigned)args->diff_algo >= SDIS_DIFFUSION_ALGORITHMS_COUNT__) { 149 return RES_BAD_ARG; 150 } 151 152 return RES_OK; 153 } 154 155 #endif /* SDIS_SOLVE_PROBE_BOUNDARY_XD_H */ 156 157 static res_T 158 XD(solve_one_probe_boundary) 159 (struct sdis_scene* scn, 160 struct ssp_rng* rng, 161 const struct sdis_solve_probe_boundary_args* args, 162 struct accum* acc_temp, 163 struct accum* acc_time) 164 { 165 size_t irealisation = 0; 166 res_T res = RES_OK; 167 ASSERT(scn && rng && check_solve_probe_boundary_args(args) == RES_OK); 168 169 *acc_temp = ACCUM_NULL; 170 *acc_time = ACCUM_NULL; 171 172 FOR_EACH(irealisation, 0, args->nrealisations) { 173 struct boundary_realisation_args realis_args = BOUNDARY_REALISATION_ARGS_NULL; 174 double w = NaN; /* MC weight */ 175 double usec = 0; /* Time of a realisation */ 176 double time = 0; /* Sampled observation time */ 177 struct time t0, t1; /* Register the time spent solving a realisation */ 178 179 /* Begin time registration of the realisation */ 180 time_current(&t0); 181 182 /* Sample observation time */ 183 time = sample_time(rng, args->time_range); 184 185 /* Run a realisation */ 186 realis_args.rng = rng; 187 realis_args.iprim = args->iprim; 188 realis_args.time = time; 189 realis_args.picard_order = args->picard_order; 190 realis_args.side = args->side; 191 realis_args.irealisation = irealisation; 192 realis_args.diff_algo = args->diff_algo; 193 realis_args.uv[0] = args->uv[0]; 194 #if SDIS_XD_DIMENSION == 3 195 realis_args.uv[1] = args->uv[1]; 196 #endif 197 res = XD(boundary_realisation)(scn, &realis_args, &w); 198 if(res != RES_OK && res != RES_BAD_OP) goto error; 199 200 switch(res) { 201 /* Reject the realisation */ 202 case RES_BAD_OP: 203 res = RES_OK; 204 break; 205 206 /* Update the accumulators */ 207 case RES_OK: 208 /* Stop time registration */ 209 time_sub(&t0, time_current(&t1), &t0); 210 usec = (double)time_val(&t0, TIME_NSEC) * 0.001; 211 212 /* Update MC weights */ 213 acc_temp->sum += w; 214 acc_temp->sum2 += w*w; 215 acc_temp->count += 1; 216 acc_time->sum += usec; 217 acc_time->sum2 += usec*usec; 218 acc_time->count += 1; 219 break; 220 221 default: FATAL("Unreachable code\n"); break; 222 } 223 } 224 225 exit: 226 return res; 227 error: 228 goto exit; 229 } 230 231 /******************************************************************************* 232 * Local functions 233 ******************************************************************************/ 234 static res_T 235 XD(solve_probe_boundary) 236 (struct sdis_scene* scn, 237 const struct sdis_solve_probe_boundary_args* args, 238 struct sdis_green_function** out_green, 239 struct sdis_estimator** out_estimator) 240 { 241 /* Time registration */ 242 struct time time0, time1; 243 char buf[128]; /* Temporary buffer used to store formated time */ 244 245 /* Device variables */ 246 struct mem_allocator* allocator = NULL; 247 size_t nthreads = 0; 248 249 /* Stardis variables */ 250 struct sdis_estimator* estimator = NULL; 251 struct sdis_green_function* green = NULL; 252 struct sdis_green_function** per_thread_green = NULL; 253 254 /* Random number generator */ 255 struct ssp_rng_proxy* rng_proxy = NULL; 256 struct ssp_rng** per_thread_rng = NULL; 257 258 /* Miscellaneous */ 259 struct accum* per_thread_acc_temp = NULL; 260 struct accum* per_thread_acc_time = NULL; 261 size_t nrealisations = 0; 262 int64_t irealisation = 0; 263 int32_t* progress = NULL; /* Per process progress bar */ 264 int pcent_progress = 1; /* Percentage requiring progress update */ 265 int register_paths = SDIS_HEAT_PATH_NONE; 266 int is_master_process = 1; 267 ATOMIC nsolved_realisations = 0; 268 ATOMIC res = RES_OK; 269 270 if(!scn) { res = RES_BAD_ARG; goto error; } 271 if(!out_estimator && !out_green) { res = RES_BAD_ARG; goto error; } 272 res = check_solve_probe_boundary_args(args); 273 if(res != RES_OK) goto error; 274 res = XD(check_primitive_uv)(scn->dev, args->uv); 275 if(res != RES_OK) goto error; 276 res = XD(scene_check_dimensionality)(scn); 277 if(res != RES_OK) goto error; 278 res = scene_check_primitive_index(scn, args->iprim); 279 if(res != RES_OK) goto error; 280 281 if(out_green && args->picard_order != 1) { 282 log_err(scn->dev, "%s: the evaluation of the green function does not make " 283 "sense when dealing with the non-linearities of the system; i.e. picard " 284 "order must be set to 1 while it is currently set to %lu.\n", 285 FUNC_NAME, (unsigned long)args->picard_order); 286 res = RES_BAD_ARG; 287 goto error; 288 } 289 290 #ifdef SDIS_ENABLE_MPI 291 is_master_process = !scn->dev->use_mpi || scn->dev->mpi_rank == 0; 292 #endif 293 294 nthreads = scn->dev->nthreads; 295 allocator = scn->dev->allocator; 296 297 /* Update the progress bar every percent if escape sequences are allowed in 298 * log messages or only every 10 percent when only plain text is allowed. 299 * This reduces the number of lines of plain text printed */ 300 pcent_progress = scn->dev->no_escape_sequence ? 10 : 1; 301 302 /* Create the per thread RNGs */ 303 res = create_per_thread_rng 304 (scn->dev, args->rng_state, args->rng_type, &rng_proxy, &per_thread_rng); 305 if(res != RES_OK) goto error; 306 307 /* Allocate the per process progress status */ 308 res = alloc_process_progress(scn->dev, &progress); 309 if(res != RES_OK) goto error; 310 311 /* Create the per thread accumulators */ 312 per_thread_acc_temp = MEM_CALLOC(allocator, nthreads, sizeof(struct accum)); 313 per_thread_acc_time = MEM_CALLOC(allocator, nthreads, sizeof(struct accum)); 314 if(!per_thread_acc_temp) { res = RES_MEM_ERR; goto error; } 315 if(!per_thread_acc_time) { res = RES_MEM_ERR; goto error; } 316 317 /* Create the per thread green function */ 318 if(out_green) { 319 res = create_per_thread_green_function 320 (scn, args->signature, &per_thread_green); 321 if(res != RES_OK) goto error; 322 } 323 324 /* Create the estimator on the master process only. No estimator is needed 325 * for non master process */ 326 if(out_estimator && is_master_process) { 327 res = estimator_create(scn->dev, SDIS_ESTIMATOR_TEMPERATURE, &estimator); 328 if(res != RES_OK) goto error; 329 } 330 331 /* Synchronise the processes */ 332 process_barrier(scn->dev); 333 334 #define PROGRESS_MSG "Solving surface probe temperature: " 335 print_progress(scn->dev, progress, PROGRESS_MSG); 336 337 /* Begin time registration of the computation */ 338 time_current(&time0); 339 340 /* Here we go! Launch the Monte Carlo estimation */ 341 nrealisations = compute_process_realisations_count(scn->dev, args->nrealisations); 342 register_paths = out_estimator && is_master_process 343 ? args->register_paths : SDIS_HEAT_PATH_NONE; 344 omp_set_num_threads((int)scn->dev->nthreads); 345 #pragma omp parallel for schedule(static) 346 for(irealisation = 0; irealisation < (int64_t)nrealisations; ++irealisation) { 347 struct boundary_realisation_args realis_args = BOUNDARY_REALISATION_ARGS_NULL; 348 struct time t0, t1; 349 const int ithread = omp_get_thread_num(); 350 struct ssp_rng* rng = per_thread_rng[ithread]; 351 struct accum* acc_temp = &per_thread_acc_temp[ithread]; 352 struct accum* acc_time = &per_thread_acc_time[ithread]; 353 struct green_path_handle* pgreen_path = NULL; 354 struct green_path_handle green_path = GREEN_PATH_HANDLE_NULL; 355 struct sdis_heat_path* pheat_path = NULL; 356 struct sdis_heat_path heat_path; 357 double w = NaN; 358 double time; 359 size_t n; 360 int pcent; 361 res_T res_local = RES_OK; 362 res_T res_simul = RES_OK; 363 364 if(ATOMIC_GET(&res) != RES_OK) continue; /* An error occurred */ 365 366 /* Begin time registration */ 367 time_current(&t0); 368 369 time = sample_time(rng, args->time_range); 370 if(out_green) { 371 res_local = green_function_create_path 372 (per_thread_green[ithread], &green_path); 373 if(res_local != RES_OK) { ATOMIC_SET(&res, res_local); goto error_it; } 374 pgreen_path = &green_path; 375 } 376 377 if(register_paths) { 378 heat_path_init(scn->dev->allocator, &heat_path); 379 pheat_path = &heat_path; 380 } 381 382 /* Invoke the boundary realisation */ 383 realis_args.rng = rng; 384 realis_args.iprim = args->iprim; 385 realis_args.time = time; 386 realis_args.picard_order = args->picard_order; 387 realis_args.side = args->side; 388 realis_args.green_path = pgreen_path; 389 realis_args.heat_path = pheat_path; 390 realis_args.irealisation = (size_t)irealisation; 391 realis_args.diff_algo = args->diff_algo; 392 realis_args.uv[0] = args->uv[0]; 393 #if SDIS_XD_DIMENSION == 3 394 realis_args.uv[1] = args->uv[1]; 395 #endif 396 res_simul = XD(boundary_realisation)(scn, &realis_args, &w); 397 398 /* Handle fatal error */ 399 if(res_simul != RES_OK && res_simul != RES_BAD_OP) { 400 ATOMIC_SET(&res, res_simul); 401 goto error_it; 402 } 403 404 if(pheat_path) { 405 pheat_path->status = res_simul == RES_OK 406 ? SDIS_HEAT_PATH_SUCCESS 407 : SDIS_HEAT_PATH_FAILURE; 408 409 /* Check if the path must be saved regarding the register_paths mask */ 410 if(!(register_paths & (int)pheat_path->status)) { 411 heat_path_release(pheat_path); 412 pheat_path = NULL; 413 } else { 414 /* Register the sampled path */ 415 res_local = estimator_add_and_release_heat_path(estimator, pheat_path); 416 if(res_local != RES_OK) { 417 ATOMIC_SET(&res, res_local); 418 goto error_it; 419 } 420 pheat_path = NULL; 421 } 422 } 423 424 /* Stop time registration */ 425 time_sub(&t0, time_current(&t1), &t0); 426 427 /* Update accumulators */ 428 if(res_simul == RES_OK) { 429 const double usec = (double)time_val(&t0, TIME_NSEC) * 0.001; 430 acc_temp->sum += w; acc_temp->sum2 += w*w; ++acc_temp->count; 431 acc_time->sum += usec; acc_time->sum2 += usec*usec; ++acc_time->count; 432 } 433 434 /* Update progress */ 435 n = (size_t)ATOMIC_INCR(&nsolved_realisations); 436 pcent = (int)((double)n * 100.0 / (double)nrealisations + 0.5/*round*/); 437 #pragma omp critical 438 if(pcent/pcent_progress > progress[0]/pcent_progress) { 439 progress[0] = pcent; 440 print_progress_update(scn->dev, progress, PROGRESS_MSG); 441 } 442 443 exit_it: 444 if(pheat_path) heat_path_release(pheat_path); 445 continue; 446 error_it: 447 goto exit_it; 448 } 449 /* Synchronise processes */ 450 process_barrier(scn->dev); 451 452 res = gather_res_T(scn->dev, (res_T)res); 453 if(res != RES_OK) goto error; 454 455 print_progress_completion(scn->dev, progress, PROGRESS_MSG); 456 #undef PROGRESS_MSG 457 458 /* Report computation time */ 459 time_sub(&time0, time_current(&time1), &time0); 460 time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf)); 461 log_info(scn->dev, "Surface probe temperature solved in %s.\n", buf); 462 463 /* Gather the RNG proxy sequence IDs and ensure that the RNG proxy state of 464 * the master process is greater than the RNG proxy state of all other 465 * processes */ 466 res = gather_rng_proxy_sequence_id(scn->dev, rng_proxy); 467 if(res != RES_OK) goto error; 468 469 /* Setup the estimated temperature and per realisation time */ 470 if(out_estimator) { 471 struct accum acc_temp; 472 struct accum acc_time; 473 474 time_current(&time0); 475 476 #define GATHER_ACCUMS(Msg, Acc) { \ 477 res = gather_accumulators(scn->dev, Msg, per_thread_##Acc, &Acc); \ 478 if(res != RES_OK) goto error; \ 479 } (void)0 480 GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_TEMP, acc_temp); 481 GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_TIME, acc_time); 482 #undef GATHER_ACCUMS 483 484 time_sub(&time0, time_current(&time1), &time0); 485 time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf)); 486 log_info(scn->dev, "Accumulators gathered in %s.\n", buf); 487 488 /* Return an estimator only on master process */ 489 if(is_master_process) { 490 ASSERT(acc_temp.count == acc_time.count); 491 estimator_setup_realisations_count(estimator, args->nrealisations, acc_temp.count); 492 estimator_setup_temperature(estimator, acc_temp.sum, acc_temp.sum2); 493 estimator_setup_realisation_time(estimator, acc_time.sum, acc_time.sum2); 494 res = estimator_save_rng_state(estimator, rng_proxy); 495 if(res != RES_OK) goto error; 496 } 497 } 498 499 if(out_green) { 500 time_current(&time0); 501 502 res = gather_green_functions 503 (scn, rng_proxy, per_thread_green, per_thread_acc_time, &green); 504 if(res != RES_OK) goto error; 505 506 time_sub(&time0, time_current(&time1), &time0); 507 time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf)); 508 log_info(scn->dev, "Green functions gathered in %s.\n", buf); 509 510 /* Return a green function only on master process */ 511 if(!is_master_process) { 512 SDIS(green_function_ref_put(green)); 513 green = NULL; 514 } 515 } 516 517 exit: 518 if(per_thread_rng) release_per_thread_rng(scn->dev, per_thread_rng); 519 if(per_thread_green) release_per_thread_green_function(scn, per_thread_green); 520 if(progress) free_process_progress(scn->dev, progress); 521 if(per_thread_acc_temp) MEM_RM(scn->dev->allocator, per_thread_acc_temp); 522 if(per_thread_acc_time) MEM_RM(scn->dev->allocator, per_thread_acc_time); 523 if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy)); 524 if(out_green) *out_green = green; 525 if(out_estimator) *out_estimator = estimator; 526 return (res_T)res; 527 error: 528 if(estimator) { SDIS(estimator_ref_put(estimator)); estimator = NULL; } 529 if(green) { SDIS(green_function_ref_put(green)); green = NULL; } 530 goto exit; 531 } 532 533 static res_T 534 XD(solve_probe_boundary_list) 535 (struct sdis_scene* scn, 536 const struct sdis_solve_probe_boundary_list_args* args, 537 struct sdis_estimator_buffer** out_estim_buf) 538 { 539 /* Time registration */ 540 struct time time0, time1; 541 char buf[128]; /* Temporary buffer used to store formated time */ 542 543 /* Device variable */ 544 struct mem_allocator* allocator = NULL; 545 546 /* Stardis variables */ 547 struct sdis_estimator_buffer* estim_buf = NULL; 548 549 /* Random Number generator */ 550 struct ssp_rng_proxy* rng_proxy = NULL; 551 struct ssp_rng** per_thread_rng = NULL; 552 553 /* Probe variables */ 554 size_t process_probes[2] = {0, 0}; /* Probes range managed by the process */ 555 size_t process_nprobes = 0; /* Number of probes managed by the process */ 556 size_t nprobes = 0; 557 struct accum* per_probe_acc_temp = NULL; 558 struct accum* per_probe_acc_time = NULL; 559 560 /* Miscellaneous */ 561 int32_t* progress = NULL; /* Per process progress bar */ 562 int is_master_process = 1; 563 int pcent_progress = 1; /* Percentage requiring progress update */ 564 int64_t i = 0; 565 ATOMIC nsolved_probes = 0; 566 ATOMIC res = RES_OK; 567 568 if(!scn || !out_estim_buf) { res = RES_BAD_ARG; goto error; } 569 res = check_solve_probe_boundary_list_args(scn->dev, args); 570 if(res != RES_OK) goto error; 571 res = XD(scene_check_dimensionality)(scn); 572 if(res != RES_OK) goto error; 573 574 #ifdef SDIS_ENABLE_MPI 575 is_master_process = !scn->dev->use_mpi || scn->dev->mpi_rank == 0; 576 #endif 577 578 allocator = scn->dev->allocator; 579 580 /* Update the progress bar every percent if escape sequences are allowed in 581 * log messages or only every 10 percent when only plain text is allowed. 582 * This reduces the number of lines of plain text printed */ 583 pcent_progress = scn->dev->no_escape_sequence ? 10 : 1; 584 585 /* Create the per threads RNGs */ 586 res = create_per_thread_rng 587 (scn->dev, args->rng_state, args->rng_type, &rng_proxy, &per_thread_rng); 588 if(res != RES_OK) goto error; 589 590 /* Allocate the per process progress status */ 591 res = alloc_process_progress(scn->dev, &progress); 592 if(res != RES_OK) goto error; 593 594 /* Synchronise the processes */ 595 process_barrier(scn->dev); 596 597 /* Define the range of probes to manage in this process */ 598 process_nprobes = compute_process_index_range 599 (scn->dev, args->nprobes, process_probes); 600 601 #define PROGRESS_MSG "Solving surface probes: " 602 print_progress(scn->dev, progress, PROGRESS_MSG); 603 604 /* If there is no work to be done on this process (i.e. no probe to 605 * calculate), simply print its completion and go straight to the 606 * synchronization barrier.*/ 607 if(process_nprobes == 0) { 608 progress[0] = 100; 609 print_progress_update(scn->dev, progress, PROGRESS_MSG); 610 goto post_sync; 611 } 612 613 /* Allocate the list of accumulators per probe. On the master process, 614 * allocate a complete list in which the accumulators of all processes will be 615 * stored. */ 616 nprobes = is_master_process ? args->nprobes : process_nprobes; 617 per_probe_acc_temp = MEM_CALLOC(allocator, nprobes, sizeof(*per_probe_acc_temp)); 618 per_probe_acc_time = MEM_CALLOC(allocator, nprobes, sizeof(*per_probe_acc_time)); 619 if(!per_probe_acc_temp || !per_probe_acc_time) { 620 log_err(scn->dev, "Unable to allocate the list of accumulators per probe.\n"); 621 res = RES_MEM_ERR; 622 goto error; 623 } 624 625 /* Begin time registration of the computation */ 626 time_current(&time0); 627 628 /* Calculation of probe list */ 629 omp_set_num_threads((int)scn->dev->nthreads); 630 #pragma omp parallel for schedule(static) 631 for(i = 0; i < (int64_t)process_nprobes; ++i) { 632 /* Thread */ 633 const int ithread = omp_get_thread_num(); /* Thread ID */ 634 struct ssp_rng* rng = per_thread_rng[ithread]; /* RNG of the thread */ 635 636 /* Probe */ 637 struct accum* probe_acc_temp = NULL; 638 struct accum* probe_acc_time = NULL; 639 const struct sdis_solve_probe_boundary_args* probe_args = NULL; 640 const size_t iprobe = process_probes[0] + (size_t)i; /* Probe ID */ 641 642 /* Miscellaneous */ 643 size_t n = 0; /* Number of solved probes */ 644 int pcent = 0; /* Current progress */ 645 res_T res_local = RES_OK; 646 647 if(ATOMIC_GET(&res) != RES_OK) continue; /* An error occurred */ 648 649 /* Retrieve the probe arguments */ 650 probe_args = &args->probes[iprobe]; 651 652 /* Retrieve the probe accumulators */ 653 probe_acc_temp = &per_probe_acc_temp[i]; 654 probe_acc_time = &per_probe_acc_time[i]; 655 656 res_local = XD(solve_one_probe_boundary) 657 (scn, rng, probe_args, probe_acc_temp, probe_acc_time); 658 if(res_local != RES_OK) { 659 ATOMIC_SET(&res, res_local); 660 continue; 661 } 662 663 /* Update progress */ 664 n = (size_t)ATOMIC_INCR(&nsolved_probes); 665 pcent = (int)((double)n * 100.0 / (double)process_nprobes + 0.5/*round*/); 666 667 #pragma omp critical 668 if(pcent/pcent_progress > progress[0]/pcent_progress) { 669 progress[0] = pcent; 670 print_progress_update(scn->dev, progress, PROGRESS_MSG); 671 } 672 } 673 674 post_sync: 675 /* Synchronise processes */ 676 process_barrier(scn->dev); 677 678 res = gather_res_T(scn->dev, (res_T)res); 679 if(res != RES_OK) goto error; 680 681 print_progress_completion(scn->dev, progress, PROGRESS_MSG); 682 #undef PROGRESS_MSG 683 684 /* Report computatio time */ 685 time_sub(&time0, time_current(&time1), &time0); 686 time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf)); 687 log_info(scn->dev, "%lu surface probes solved in %s.\n", 688 (unsigned long)args->nprobes, buf); 689 690 /* Gather the RNG proxy sequence IDs and ensure that the RNG proxy state of 691 * the master process is greater than the RNG proxy state of all other 692 * processes */ 693 res = gather_rng_proxy_sequence_id(scn->dev, rng_proxy); 694 if(res != RES_OK) goto error; 695 696 time_current(&time0); 697 698 /* Gather the list of accumulators */ 699 #define GATHER_ACCUMS_LIST(Msg, Acc) { \ 700 res = gather_accumulators_list \ 701 (scn->dev, Msg, args->nprobes, process_probes, per_probe_acc_##Acc); \ 702 if(res != RES_OK) goto error; \ 703 } (void)0 704 GATHER_ACCUMS_LIST(MPI_SDIS_MSG_ACCUM_TEMP, temp); 705 GATHER_ACCUMS_LIST(MPI_SDIS_MSG_ACCUM_TIME, time); 706 #undef GATHER_ACCUMS_LIST 707 708 time_sub(&time0, time_current(&time1), &time0); 709 time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf)); 710 log_info(scn->dev, "Probes accumulator gathered in %s.\n", buf); 711 712 if(is_master_process) { 713 res = estimator_buffer_create_from_observable_list_probe_boundary 714 (scn->dev, rng_proxy, args->probes, per_probe_acc_temp, 715 per_probe_acc_time, args->nprobes, &estim_buf); 716 if(res != RES_OK) goto error; 717 } 718 719 exit: 720 if(per_thread_rng) release_per_thread_rng(scn->dev, per_thread_rng); 721 if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy)); 722 if(per_probe_acc_temp) MEM_RM(allocator, per_probe_acc_temp); 723 if(per_probe_acc_time) MEM_RM(allocator, per_probe_acc_time); 724 if(progress) free_process_progress(scn->dev, progress); 725 if(out_estim_buf) *out_estim_buf = estim_buf; 726 return (res_T)res; 727 error: 728 if(estim_buf) { 729 SDIS(estimator_buffer_ref_put(estim_buf)); 730 estim_buf = NULL; 731 } 732 goto exit; 733 } 734 735 static res_T 736 XD(solve_probe_boundary_flux) 737 (struct sdis_scene* scn, 738 const struct sdis_solve_probe_boundary_flux_args* args, 739 struct sdis_estimator** out_estimator) 740 { 741 /* Time registration */ 742 struct time time0, time1; 743 char buf[128]; /* Temporary buffer used to store formated time */ 744 745 /* Stardis variables */ 746 const struct sdis_interface* interf = NULL; 747 const struct sdis_medium* fmd = NULL; 748 const struct sdis_medium* bmd = NULL; 749 struct sdis_estimator* estimator = NULL; 750 struct sdis_interface_fragment frag = SDIS_INTERFACE_FRAGMENT_NULL; 751 enum sdis_side solid_side = SDIS_SIDE_NULL__; 752 enum sdis_side fluid_side = SDIS_SIDE_NULL__; 753 754 /* Random number generator */ 755 struct ssp_rng_proxy* rng_proxy = NULL; 756 struct ssp_rng** per_thread_rng = NULL; 757 758 /* Per thread accumulators */ 759 struct accum* per_thread_acc_tp = NULL; /* Temperature accumulator */ 760 struct accum* per_thread_acc_ti = NULL; /* Realisation time */ 761 struct accum* per_thread_acc_fl = NULL; /* Flux accumulator */ 762 struct accum* per_thread_acc_fc = NULL; /* Convective flux accumulator */ 763 struct accum* per_thread_acc_fr = NULL; /* Radiative flux accumulator */ 764 struct accum* per_thread_acc_fi = NULL; /* Imposed flux accumulator */ 765 766 /* Gathered accumulator */ 767 struct accum acc_tp = ACCUM_NULL; 768 struct accum acc_ti = ACCUM_NULL; 769 struct accum acc_fl = ACCUM_NULL; 770 struct accum acc_fc = ACCUM_NULL; 771 struct accum acc_fr = ACCUM_NULL; 772 struct accum acc_fi = ACCUM_NULL; 773 774 /* Miscellaneous */ 775 size_t nrealisations = 0; 776 int64_t irealisation = 0; 777 int32_t* progress = NULL; /* Per process progress bar */ 778 int pcent_progress = 1; /* Percentage requiring progress update */ 779 int is_master_process = 1; 780 ATOMIC nsolved_realisations = 0; 781 ATOMIC res = RES_OK; 782 783 if(!scn) { res = RES_BAD_ARG; goto error; } 784 if(!out_estimator) { res = RES_BAD_ARG; goto error; } 785 res = check_solve_probe_boundary_flux_args(args); 786 if(res != RES_OK) goto error; 787 res = XD(check_primitive_uv)(scn->dev, args->uv); 788 if(res != RES_OK) goto error; 789 res = XD(scene_check_dimensionality)(scn); 790 if(res != RES_OK) goto error; 791 res = scene_check_primitive_index(scn, args->iprim); 792 if(res != RES_OK) goto error; 793 794 /* Check medium is fluid on one side and solid on the other */ 795 interf = scene_get_interface(scn, (unsigned)args->iprim); 796 fmd = interface_get_medium(interf, SDIS_FRONT); 797 bmd = interface_get_medium(interf, SDIS_BACK); 798 if(!fmd || !bmd || fmd->type == bmd->type) { 799 log_err(scn->dev, 800 "%s: Attempt to compute a flux at a %s-%s interface.\n", 801 FUNC_NAME, 802 (!fmd ? "undefined" : (fmd->type == SDIS_FLUID ? "fluid" : "solid")), 803 (!bmd ? "undefined" : (bmd->type == SDIS_FLUID ? "fluid" : "solid"))); 804 res = RES_BAD_ARG; 805 goto error; 806 } 807 solid_side = (fmd->type == SDIS_SOLID) ? SDIS_FRONT : SDIS_BACK; 808 fluid_side = (fmd->type == SDIS_FLUID) ? SDIS_FRONT : SDIS_BACK; 809 810 #ifdef SDIS_ENABLE_MPI 811 is_master_process = !scn->dev->use_mpi || scn->dev->mpi_rank == 0; 812 #endif 813 814 /* Update the progress bar every percent if escape sequences are allowed in 815 * log messages or only every 10 percent when only plain text is allowed. 816 * This reduces the number of lines of plain text printed */ 817 pcent_progress = scn->dev->no_escape_sequence ? 10 : 1; 818 819 /* Create the per thread RNGs */ 820 res = create_per_thread_rng 821 (scn->dev, args->rng_state, args->rng_type, &rng_proxy, &per_thread_rng); 822 if(res != RES_OK) goto error; 823 824 /* Allocate the per process progress status */ 825 res = alloc_process_progress(scn->dev, &progress); 826 if(res != RES_OK) goto error; 827 828 /* Create the per thread accumulators */ 829 #define ALLOC_ACCUMS(Dst) { \ 830 Dst = MEM_CALLOC(scn->dev->allocator, scn->dev->nthreads, sizeof(*Dst)); \ 831 if(!Dst) { res = RES_MEM_ERR; goto error; } \ 832 } (void)0 833 ALLOC_ACCUMS(per_thread_acc_tp); 834 ALLOC_ACCUMS(per_thread_acc_ti); 835 ALLOC_ACCUMS(per_thread_acc_fc); 836 ALLOC_ACCUMS(per_thread_acc_fl); 837 ALLOC_ACCUMS(per_thread_acc_fr); 838 ALLOC_ACCUMS(per_thread_acc_fi); 839 #undef ALLOC_ACCUMS 840 841 /* Prebuild the interface fragment */ 842 res = XD(build_interface_fragment) 843 (&frag, scn, (unsigned)args->iprim, args->uv, fluid_side); 844 if(res != RES_OK) goto error; 845 846 if(is_master_process) { 847 /* Create the estimator */ 848 res = estimator_create(scn->dev, SDIS_ESTIMATOR_FLUX, &estimator); 849 if(res != RES_OK) goto error; 850 } 851 852 /* Synchronise the processes */ 853 process_barrier(scn->dev); 854 855 #define PROGRESS_MSG "Solving surface probe flux: " 856 print_progress(scn->dev, progress, PROGRESS_MSG); 857 858 /* Begin time registration of the computation */ 859 time_current(&time0); 860 861 /* Here we go! Launch the Monte Carlo estimation */ 862 nrealisations = compute_process_realisations_count(scn->dev, args->nrealisations); 863 omp_set_num_threads((int)scn->dev->nthreads); 864 #pragma omp parallel for schedule(static) 865 for(irealisation = 0; irealisation < (int64_t)nrealisations; ++irealisation) { 866 struct boundary_flux_realisation_args realis_args = 867 BOUNDARY_FLUX_REALISATION_ARGS_NULL; 868 struct time t0, t1; 869 const int ithread = omp_get_thread_num(); 870 struct sdis_interface_fragment frag_local = frag; 871 struct ssp_rng* rng = per_thread_rng[ithread]; 872 struct accum* acc_temp = &per_thread_acc_tp[ithread]; 873 struct accum* acc_time = &per_thread_acc_ti[ithread]; 874 struct accum* acc_flux = &per_thread_acc_fl[ithread]; 875 struct accum* acc_fcon = &per_thread_acc_fc[ithread]; 876 struct accum* acc_frad = &per_thread_acc_fr[ithread]; 877 struct accum* acc_fimp = &per_thread_acc_fi[ithread]; 878 double time, epsilon, hc, hr, imposed_flux, imposed_temp; 879 int flux_mask = 0; 880 struct bound_flux_result result = BOUND_FLUX_RESULT_NULL__; 881 double Tref = SDIS_TEMPERATURE_NONE; 882 size_t n; 883 int pcent; 884 res_T res_local = RES_OK; 885 res_T res_simul = RES_OK; 886 887 if(ATOMIC_GET(&res) != RES_OK) continue; /* An error occurred */ 888 889 /* Begin time registration */ 890 time_current(&t0); 891 892 time = sample_time(rng, args->time_range); 893 894 /* Compute hr and hc */ 895 frag_local.time = time; 896 frag_local.side = fluid_side; 897 epsilon = interface_side_get_emissivity 898 (interf, SDIS_INTERN_SOURCE_ID, &frag_local); 899 Tref = interface_side_get_reference_temperature(interf, &frag_local); 900 hc = interface_get_convection_coef(interf, &frag_local); 901 if(epsilon <= 0) { 902 hr = 0; 903 } else { 904 res_local = XD(check_Tref)(scn, frag_local.P, Tref, FUNC_NAME); 905 if(res_local != RES_OK) { ATOMIC_SET(&res, &res_local); continue; } 906 hr = 4.0 * BOLTZMANN_CONSTANT * Tref * Tref * Tref * epsilon; 907 } 908 909 frag_local.side = solid_side; 910 imposed_flux = interface_side_get_flux(interf, &frag_local); 911 imposed_temp = interface_side_get_temperature(interf, &frag_local); 912 if(SDIS_TEMPERATURE_IS_KNOWN(imposed_temp)) { 913 /* Flux computation on T boundaries is not supported yet */ 914 log_err(scn->dev, 915 "%s: Attempt to compute a flux at a Dirichlet boundary " 916 "(not available yet).\n", FUNC_NAME); 917 ATOMIC_SET(&res, RES_BAD_ARG); 918 continue; 919 } 920 921 /* Fluid, Radiative and Solid temperatures */ 922 flux_mask = 0; 923 if(hr > 0) flux_mask |= FLUX_FLAG_RADIATIVE; 924 if(hc > 0) flux_mask |= FLUX_FLAG_CONVECTIVE; 925 926 /* Invoke the boundary flux realisation */ 927 realis_args.rng = rng; 928 realis_args.iprim = args->iprim; 929 realis_args.time = time; 930 realis_args.picard_order = args->picard_order; 931 realis_args.solid_side = solid_side; 932 realis_args.flux_mask = flux_mask; 933 realis_args.irealisation = (size_t)irealisation; 934 realis_args.diff_algo = args->diff_algo; 935 realis_args.uv[0] = args->uv[0]; 936 #if SDIS_XD_DIMENSION == 3 937 realis_args.uv[1] = args->uv[1]; 938 #endif 939 res_simul = XD(boundary_flux_realisation)(scn, &realis_args, &result); 940 941 /* Stop time registration */ 942 time_sub(&t0, time_current(&t1), &t0); 943 944 if(res_simul != RES_OK && res_simul != RES_BAD_OP) { 945 ATOMIC_SET(&res, res_simul); 946 continue; 947 } else if(res_simul == RES_OK) { /* Update accumulators */ 948 const double usec = (double)time_val(&t0, TIME_NSEC) * 0.001; 949 /* Convective flux from fluid to solid */ 950 const double w_conv = hc > 0 ? hc * (result.Tfluid - result.Tboundary) : 0; 951 /* Radiative flux from ambient to solid */ 952 const double w_rad = hr > 0 ? hr * (result.Tradiative - result.Tboundary) : 0; 953 /* Imposed flux that goes _into_ the solid */ 954 const double w_imp = (imposed_flux != SDIS_FLUX_NONE) ? imposed_flux : 0; 955 /* Total flux */ 956 const double w_total = w_conv + w_rad + w_imp; 957 /* Temperature */ 958 acc_temp->sum += result.Tboundary; 959 acc_temp->sum2 += result.Tboundary*result.Tboundary; 960 ++acc_temp->count; 961 /* Time */ 962 acc_time->sum += usec; 963 acc_time->sum2 += usec*usec; 964 ++acc_time->count; 965 /* Overwall flux */ 966 acc_flux->sum += w_total; 967 acc_flux->sum2 += w_total*w_total; 968 ++acc_flux->count; 969 /* Convective flux */ 970 acc_fcon->sum += w_conv; 971 acc_fcon->sum2 += w_conv*w_conv; 972 ++acc_fcon->count; 973 /* Radiative flux */ 974 acc_frad->sum += w_rad; 975 acc_frad->sum2 += w_rad*w_rad; 976 ++acc_frad->count; 977 /* Imposed flux */ 978 acc_fimp->sum += w_imp; 979 acc_fimp->sum2 += w_imp*w_imp; 980 ++acc_fimp->count; 981 } 982 983 /* Update progress */ 984 n = (size_t)ATOMIC_INCR(&nsolved_realisations); 985 pcent = (int)((double)n * 100.0 / (double)nrealisations + 0.5/*round*/); 986 #pragma omp critical 987 if(pcent/pcent_progress > progress[0]/pcent_progress) { 988 progress[0] = pcent; 989 print_progress_update(scn->dev, progress, PROGRESS_MSG); 990 } 991 } 992 /* Synchronise processes */ 993 process_barrier(scn->dev); 994 995 res = gather_res_T(scn->dev, (res_T)res); 996 if(res != RES_OK) goto error; 997 998 print_progress_completion(scn->dev, progress, PROGRESS_MSG); 999 #undef PROGRESS_MSG 1000 1001 /* Report computation time */ 1002 time_sub(&time0, time_current(&time1), &time0); 1003 time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf)); 1004 log_info(scn->dev, "Surface probe flux solved in %s.\n", buf); 1005 1006 /* Gather the RNG proxy sequence IDs and ensure that the RNG proxy state of 1007 * the master process is greater than the RNG proxy state of all other 1008 * processes */ 1009 res = gather_rng_proxy_sequence_id(scn->dev, rng_proxy); 1010 if(res != RES_OK) goto error; 1011 1012 time_current(&time0); 1013 1014 #define GATHER_ACCUMS(Msg, Acc) { \ 1015 res = gather_accumulators(scn->dev, Msg, per_thread_##Acc, &Acc); \ 1016 if(res != RES_OK) goto error; \ 1017 } (void)0 1018 GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_TEMP, acc_tp); 1019 GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_TIME, acc_ti); 1020 GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_FLUX_CONVECTIVE, acc_fc); 1021 GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_FLUX_IMPOSED, acc_fi); 1022 GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_FLUX_RADIATIVE, acc_fr); 1023 GATHER_ACCUMS(MPI_SDIS_MSG_ACCUM_FLUX_TOTAL, acc_fl); 1024 #undef GATHER_ACCUMS 1025 1026 time_sub(&time0, time_current(&time1), &time0); 1027 time_dump(&time0, TIME_ALL, NULL, buf, sizeof(buf)); 1028 log_info(scn->dev, "Accumulators gathered in %s.\n", buf); 1029 1030 /* Setup the estimated values */ 1031 if(is_master_process) { 1032 ASSERT(acc_tp.count == acc_fl.count); 1033 ASSERT(acc_tp.count == acc_ti.count); 1034 ASSERT(acc_tp.count == acc_fr.count); 1035 ASSERT(acc_tp.count == acc_fc.count); 1036 ASSERT(acc_tp.count == acc_fi.count); 1037 estimator_setup_realisations_count(estimator, args->nrealisations, acc_tp.count); 1038 estimator_setup_temperature(estimator, acc_tp.sum, acc_tp.sum2); 1039 estimator_setup_realisation_time(estimator, acc_ti.sum, acc_ti.sum2); 1040 estimator_setup_flux(estimator, FLUX_CONVECTIVE, acc_fc.sum, acc_fc.sum2); 1041 estimator_setup_flux(estimator, FLUX_RADIATIVE, acc_fr.sum, acc_fr.sum2); 1042 estimator_setup_flux(estimator, FLUX_IMPOSED, acc_fi.sum, acc_fi.sum2); 1043 estimator_setup_flux(estimator, FLUX_TOTAL, acc_fl.sum, acc_fl.sum2); 1044 1045 res = estimator_save_rng_state(estimator, rng_proxy); 1046 if(res != RES_OK) goto error; 1047 } 1048 1049 exit: 1050 if(per_thread_rng) release_per_thread_rng(scn->dev, per_thread_rng); 1051 if(per_thread_acc_tp) MEM_RM(scn->dev->allocator, per_thread_acc_tp); 1052 if(per_thread_acc_ti) MEM_RM(scn->dev->allocator, per_thread_acc_ti); 1053 if(per_thread_acc_fc) MEM_RM(scn->dev->allocator, per_thread_acc_fc); 1054 if(per_thread_acc_fr) MEM_RM(scn->dev->allocator, per_thread_acc_fr); 1055 if(per_thread_acc_fl) MEM_RM(scn->dev->allocator, per_thread_acc_fl); 1056 if(per_thread_acc_fi) MEM_RM(scn->dev->allocator, per_thread_acc_fi); 1057 if(progress) free_process_progress(scn->dev, progress); 1058 if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy)); 1059 if(out_estimator) *out_estimator = estimator; 1060 return (res_T)res; 1061 error: 1062 if(estimator) { SDIS(estimator_ref_put(estimator)); estimator = NULL; } 1063 goto exit; 1064 } 1065 1066 #include "sdis_Xd_end.h"