rnatm_octree.c (52145B)
1 /* Copyright (C) 2022, 2023, 2025 Centre National de la Recherche Scientifique 2 * Copyright (C) 2022, 2023, 2025 Institut Pierre-Simon Laplace 3 * Copyright (C) 2022, 2023, 2025 Institut de Physique du Globe de Paris 4 * Copyright (C) 2022, 2023, 2025 |Méso|Star> (contact@meso-star.com) 5 * Copyright (C) 2022, 2023, 2025 Observatoire de Paris 6 * Copyright (C) 2022, 2023, 2025 Université de Reims Champagne-Ardenne 7 * Copyright (C) 2022, 2023, 2025 Université de Versaille Saint-Quentin 8 * Copyright (C) 2022, 2023, 2025 Université Paul Sabatier 9 * 10 * This program is free software: you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation, either version 3 of the License, or 13 * (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program. If not, see <http://www.gnu.org/licenses/>. */ 22 23 #define _POSIX_C_SOURCE 200112L /* lround */ 24 25 #include "rnatm_c.h" 26 #include "rnatm_log.h" 27 #include "rnatm_voxel_partition.h" 28 #include "rnatm_octrees_storage.h" 29 30 #include <star/sars.h> 31 #include <star/sck.h> 32 #include <star/suvm.h> 33 #include <star/svx.h> 34 35 #include <rsys/clock_time.h> 36 #include <rsys/condition.h> 37 #include <rsys/cstr.h> 38 #include <rsys/double3.h> 39 #include <rsys/float4.h> 40 #include <rsys/math.h> 41 #include <rsys/morton.h> 42 #include <rsys/mutex.h> 43 #include <rsys/rsys.h> 44 45 #include <math.h> /* lround */ 46 #include <omp.h> 47 48 #define VOXELIZE_MSG "voxelize atmosphere: %3d%%\r" 49 50 /* Structure used to synchronise the voxelization and the build threads */ 51 struct build_sync { 52 struct mutex* mutex; 53 struct cond* cond; 54 size_t ibatch; /* Index of the batch currently consumed by the build thread */ 55 }; 56 #define BUILD_SYNC_NULL__ {NULL, NULL, 0} 57 static const struct build_sync BUILD_SYNC_NULL = BUILD_SYNC_NULL__; 58 59 struct build_octree_context { 60 struct pool* pool; 61 struct partition* part; /* Current partition */ 62 size_t iitem; /* Index of the item to handle in the partition */ 63 64 /* Optical thickness threshold criteria for merging voxels */ 65 double tau_threshold; 66 }; 67 #define BUILD_OCTREE_CONTEXT_NULL__ {NULL, NULL, 0, 0} 68 static const struct build_octree_context BUILD_OCTREE_CONTEXT_NULL = 69 BUILD_OCTREE_CONTEXT_NULL__; 70 71 struct radcoefs { 72 float ka_min, ka_max; 73 float ks_min, ks_max; 74 }; 75 #define RADCOEFS_NULL__ { \ 76 FLT_MAX, -FLT_MAX, \ 77 FLT_MAX, -FLT_MAX, \ 78 FLT_MAX, -FLT_MAX \ 79 } 80 81 /* Generate the dynamic array of radiative coefficient lists */ 82 #define DARRAY_NAME radcoef_list 83 #define DARRAY_DATA struct radcoefs* 84 #include <rsys/dynamic_array.h> 85 86 /* Generate the dynmaic array of partitions */ 87 #define DARRAY_NAME partition 88 #define DARRAY_DATA struct partition* 89 #include <rsys/dynamic_array.h> 90 91 struct voxelize_batch_args { 92 /* Working data structure */ 93 struct darray_size_t_list* per_thread_tetra_list; 94 struct darray_radcoef_list* per_thread_radcoef_list; 95 struct darray_partition* per_thread_dummy_partition; 96 struct pool* pool; 97 98 size_t part_def; /* Partition definition */ 99 size_t nparts[3]; /* #partitions required to cover the entire grid */ 100 size_t nparts_adjusted; /* #partitions allowing their indexing by morton id */ 101 size_t nparts_overall; /* Total number of partitions to voxelize */ 102 size_t nparts_voxelized; /* #partitions already voxelized */ 103 104 /* AABB of the atmosphere */ 105 double atm_low[3]; 106 double atm_upp[3]; 107 108 double vxsz[3]; /* Size of a voxel */ 109 110 struct accel_struct* accel_structs; 111 size_t batch_size; /* #acceleration structures */ 112 }; 113 #define VOXELIZE_BATCH_ARGS_NULL__ { \ 114 NULL, /* Per thread tetra list */ \ 115 NULL, /* Per thread radiative coefs */ \ 116 NULL, /* Per thread dummy partition */ \ 117 NULL, /* Partition pool */ \ 118 \ 119 0, /* Partition definition */ \ 120 {0,0,0}, /* #partitions to cover the entire grid */ \ 121 0, /* #partitions adjusted wrt morton indexing */ \ 122 0, /* Total number of partitions to voxelize */ \ 123 0, /* #partitions already voxelized */ \ 124 \ 125 /* AABB of the atmosphere */ \ 126 { DBL_MAX, DBL_MAX, DBL_MAX}, \ 127 {-DBL_MAX,-DBL_MAX,-DBL_MAX}, \ 128 \ 129 {0,0,0}, /* Size of a voxel */ \ 130 \ 131 NULL, /* Spectral items */ \ 132 0 /* Batch size */ \ 133 } 134 static const struct voxelize_batch_args VOXELIZE_BATCH_ARGS_NULL = 135 VOXELIZE_BATCH_ARGS_NULL__; 136 137 struct voxelize_args { 138 struct darray_size_t* tetra_ids; 139 struct radcoefs* per_item_radcoefs; 140 141 struct partition* part; 142 struct partition* part_dummy; 143 size_t part_def; /* Partition definition */ 144 145 /* AABB of the partition */ 146 double part_low[3]; 147 double part_upp[3]; 148 149 double vxsz[3]; /* Size of a voxel */ 150 151 const struct accel_struct* accel_structs; 152 size_t batch_size; /* #spectral items */ 153 154 }; 155 #define VOXELIZE_ARGS_NULL__ { \ 156 NULL, /* Tetra ids */ \ 157 NULL, /* Radiative coefs */ \ 158 \ 159 NULL, /* Partition */ \ 160 NULL, /* Dummy partition */ \ 161 0, /* Partition definition */ \ 162 \ 163 /* AABB of the partition */ \ 164 { DBL_MAX, DBL_MAX, DBL_MAX}, \ 165 {-DBL_MAX,-DBL_MAX,-DBL_MAX}, \ 166 \ 167 {0,0,0}, /* Size of a voxel */ \ 168 \ 169 NULL, /* Spectral items */ \ 170 0 /* Batch size */ \ 171 } 172 static const struct voxelize_args VOXELIZE_ARGS_NULL = VOXELIZE_ARGS_NULL__; 173 174 /******************************************************************************* 175 * Helper functions 176 ******************************************************************************/ 177 static INLINE res_T 178 check_voxelize_batch_args 179 (struct rnatm* atm, 180 const struct voxelize_batch_args* args) 181 { 182 size_t i; 183 if(!args 184 || !args->per_thread_tetra_list 185 || !args->per_thread_radcoef_list 186 || !args->pool 187 || !IS_POW2(args->part_def) 188 || !args->nparts[0] 189 || !args->nparts[1] 190 || !args->nparts[2] 191 || !IS_POW2(args->nparts_adjusted) 192 || args->nparts_adjusted < args->nparts[0]*args->nparts[1]*args->nparts[2] 193 || args->atm_low[0] >= args->atm_upp[0] 194 || args->atm_low[1] >= args->atm_upp[1] 195 || args->atm_low[2] >= args->atm_upp[2] 196 || !args->vxsz[0] 197 || !args->vxsz[1] 198 || !args->vxsz[2] 199 || !args->accel_structs 200 || !args->batch_size) { 201 return RES_BAD_ARG; 202 } 203 if(atm->nthreads != darray_size_t_list_size_get(args->per_thread_tetra_list) 204 || atm->nthreads != darray_radcoef_list_size_get(args->per_thread_radcoef_list)) { 205 return RES_BAD_ARG; 206 } 207 FOR_EACH(i, 0, atm->nthreads) { 208 if(!darray_radcoef_list_cdata_get(args->per_thread_radcoef_list)[i]) { 209 return RES_BAD_ARG; 210 } 211 } 212 return RES_OK; 213 } 214 215 static INLINE res_T 216 check_voxelize_args(const struct voxelize_args* args) 217 { 218 if(!args->tetra_ids 219 || !args->per_item_radcoefs 220 || args->part_low[0] >= args->part_upp[0] 221 || args->part_low[1] >= args->part_upp[1] 222 || args->part_low[2] >= args->part_upp[2] 223 || !IS_POW2(args->part_def) 224 || !args->vxsz[0] 225 || !args->vxsz[1] 226 || !args->vxsz[2] 227 || !args->accel_structs 228 || !args->batch_size) { 229 return RES_BAD_ARG; 230 } 231 return RES_OK; 232 } 233 234 static INLINE res_T 235 check_trace_ray_args 236 (const struct rnatm* atm, 237 const struct rnatm_trace_ray_args* args) 238 { 239 size_t n; 240 if(!args) return RES_BAD_ARG; 241 242 /* Invalid band index */ 243 n = darray_band_size_get(&atm->bands) - 1; 244 if(args->iband < darray_band_cdata_get(&atm->bands)[0].index 245 || args->iband > darray_band_cdata_get(&atm->bands)[n].index) 246 return RES_BAD_ARG; 247 248 return RES_OK; 249 } 250 251 static void 252 build_sync_release(struct build_sync* sync) 253 { 254 if(sync->mutex) mutex_destroy(sync->mutex); 255 if(sync->cond) cond_destroy(sync->cond); 256 } 257 258 static res_T 259 build_sync_init(struct build_sync* sync) 260 { 261 res_T res = RES_OK; 262 ASSERT(sync); 263 264 memset(sync, 0, sizeof(*sync)); 265 266 sync->mutex = mutex_create(); 267 if(!sync->mutex) { res = RES_UNKNOWN_ERR; goto error; } 268 sync->cond = cond_create(); 269 if(!sync->cond) { res = RES_UNKNOWN_ERR; goto error; } 270 sync->ibatch = 0; 271 272 exit: 273 return res; 274 error: 275 build_sync_release(sync); 276 goto exit; 277 } 278 279 static FINLINE unsigned 280 round_pow2(const unsigned val) 281 { 282 const unsigned next_pow2 = (unsigned)round_up_pow2(val); 283 if(val == 0 || next_pow2 - val <= next_pow2/4) { 284 return next_pow2; 285 } else { 286 return next_pow2/2; 287 } 288 } 289 290 /* Return the number of quadrature points for the range of bands overlapped by 291 * the input spectral range */ 292 static INLINE size_t 293 compute_nquad_pts(const struct rnatm* atm) 294 { 295 size_t nquad_pts = 0; 296 size_t iband = 0; 297 ASSERT(atm); 298 299 FOR_EACH(iband, 0, darray_band_size_get(&atm->bands)) { 300 nquad_pts += darray_band_cdata_get(&atm->bands)[iband].nquad_pts; 301 } 302 return nquad_pts; 303 } 304 305 static res_T 306 setup_accel_structs(struct rnatm* atm) 307 { 308 struct accel_struct* accel_structs = NULL; 309 size_t istruct; 310 size_t naccel_structs; 311 size_t i; 312 res_T res = RES_OK; 313 ASSERT(atm); 314 315 naccel_structs = compute_nquad_pts(atm); 316 317 res = darray_accel_struct_resize(&atm->accel_structs, naccel_structs); 318 if(res != RES_OK) { 319 log_err(atm, 320 "%s: failed to allocate the list of acceleration structures -- %s", 321 FUNC_NAME, res_to_cstr(res)); 322 goto error; 323 } 324 accel_structs = darray_accel_struct_data_get(&atm->accel_structs); 325 326 istruct = 0; 327 FOR_EACH(i, 0, darray_band_size_get(&atm->bands)) { 328 const struct band* band = darray_band_cdata_get(&atm->bands)+i; 329 size_t iquad_pt; 330 331 FOR_EACH(iquad_pt, 0, band->nquad_pts) { 332 ASSERT(istruct < naccel_structs); 333 accel_structs[istruct].iband = band->index; 334 accel_structs[istruct].iquad_pt = iquad_pt; 335 ++istruct; 336 } 337 } 338 339 exit: 340 return res; 341 error: 342 darray_accel_struct_clear(&atm->accel_structs); 343 goto exit; 344 } 345 346 static INLINE void 347 register_tetra 348 (const struct suvm_primitive* prim, 349 const double low[3], 350 const double upp[3], 351 void* context) 352 { 353 struct darray_size_t* tetra_ids = context; 354 ASSERT(prim && low && upp && context); 355 ASSERT(low[0] < upp[0]); 356 ASSERT(low[1] < upp[1]); 357 ASSERT(low[2] < upp[2]); 358 (void)low, (void)upp; 359 CHK(darray_size_t_push_back(tetra_ids, &prim->iprim) == RES_OK); 360 } 361 362 static res_T 363 compute_grid_definition(struct rnatm* atm, const struct rnatm_create_args* args) 364 { 365 double low[3]; 366 double upp[3]; 367 double sz[3]; 368 double sz_max; 369 double vxsz; 370 unsigned def[3]; 371 int iaxis_max; 372 int iaxis_remain[2]; 373 int i; 374 ASSERT(atm && args); 375 376 /* Recover the AABB from the atmosphere. Note that we have already made sure 377 * when setting up the meshes of the gas and aerosols that the aerosols are 378 * included in the gas. Therefore, the AABB of the gas is the same as the 379 * AABB of the atmosphere */ 380 SUVM(volume_get_aabb(atm->gas.volume, low, upp)); 381 sz[0] = upp[0] - low[0]; 382 sz[1] = upp[1] - low[1]; 383 sz[2] = upp[2] - low[2]; 384 385 /* Find the axis along which the atmosphere's AABB is greatest */ 386 sz_max = -DBL_MAX; 387 FOR_EACH(i, 0, 3) { 388 if(sz[i] > sz_max) { iaxis_max = i; sz_max = sz[i]; } 389 } 390 391 /* Define the other axes */ 392 iaxis_remain[0] = (iaxis_max + 1) % 3; 393 iaxis_remain[1] = (iaxis_max + 2) % 3; 394 395 /* Fix the definition along the maximum axis and calculate the size of a 396 * voxel along this axis */ 397 def[iaxis_max] = round_pow2(args->grid_definition_hint); 398 vxsz = sz[iaxis_max] / def[iaxis_max]; 399 400 /* Calculate the definitions along the remaining 2 axes. First calculates 401 * them assuming the voxels are cubic, then rounds them to their nearest 402 * power of two */ 403 def[iaxis_remain[0]] = (unsigned)lround(sz[iaxis_remain[0]]/vxsz); 404 def[iaxis_remain[1]] = (unsigned)lround(sz[iaxis_remain[1]]/vxsz); 405 def[iaxis_remain[0]] = round_pow2(def[iaxis_remain[0]]); 406 def[iaxis_remain[1]] = round_pow2(def[iaxis_remain[1]]); 407 408 /* Setup grid definition */ 409 atm->grid_definition[0] = def[0]; 410 atm->grid_definition[1] = def[1]; 411 atm->grid_definition[2] = def[2]; 412 413 return RES_OK; 414 } 415 416 /* Compute the radiative coefficients range of the tetrahedron */ 417 static INLINE void 418 setup_tetra_radcoefs 419 (struct rnatm* atm, 420 const struct suvm_primitive* tetra, 421 const size_t cpnt, 422 const size_t iband, 423 const size_t iquad, 424 struct radcoefs* radcoefs) 425 { 426 float ka[4]; 427 float ks[4]; 428 ASSERT(atm && tetra && radcoefs); 429 ASSERT(cpnt == RNATM_GAS || cpnt < darray_aerosol_size_get(&atm->aerosols)); 430 431 if(tetra_get_radcoef(atm, tetra, cpnt, iband, iquad, RNATM_RADCOEF_Ka, ka) 432 && tetra_get_radcoef(atm, tetra, cpnt, iband, iquad, RNATM_RADCOEF_Ks, ks)) { 433 /* Radiative coefficients are defined for this component at the considered 434 * spectral band */ 435 radcoefs->ka_min = MMIN(MMIN(ka[0], ka[1]), MMIN(ka[2], ka[3])); 436 radcoefs->ka_max = MMAX(MMAX(ka[0], ka[1]), MMAX(ka[2], ka[3])); 437 radcoefs->ks_min = MMIN(MMIN(ks[0], ks[1]), MMIN(ks[2], ks[3])); 438 radcoefs->ks_max = MMAX(MMAX(ks[0], ks[1]), MMAX(ks[2], ks[3])); 439 } else { 440 /* No valid radiative coefficients are set: ignore the tetrahedron */ 441 radcoefs->ka_min = FLT_MAX; 442 radcoefs->ka_max =-FLT_MAX; 443 radcoefs->ks_min = FLT_MAX; 444 radcoefs->ks_max =-FLT_MAX; 445 } 446 } 447 448 static INLINE void 449 update_voxel 450 (struct rnatm* atm, 451 const struct radcoefs* radcoefs, 452 struct partition* part, 453 const uint64_t vx_mcode, 454 const uint64_t vx_iitem) 455 { 456 float vx_ka_min, vx_ka_max; 457 float vx_ks_min, vx_ks_max; 458 459 float* vx = NULL; 460 ASSERT(atm && radcoefs && part); 461 (void)atm; 462 463 vx = partition_get_voxel(part, vx_mcode, vx_iitem); 464 465 /* Update the range of the radiative coefficients of the voxel */ 466 vx_ka_min = vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MIN)]; 467 vx_ka_max = vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MAX)]; 468 vx_ks_min = vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MIN)]; 469 vx_ks_max = vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MAX)]; 470 vx_ka_min = MMIN(vx_ka_min, radcoefs->ka_min); 471 vx_ka_max = MMAX(vx_ka_max, radcoefs->ka_max); 472 vx_ks_min = MMIN(vx_ks_min, radcoefs->ks_min); 473 vx_ks_max = MMAX(vx_ks_max, radcoefs->ks_max); 474 vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MIN)] = vx_ka_min; 475 vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MAX)] = vx_ka_max; 476 vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MIN)] = vx_ks_min; 477 vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MAX)] = vx_ks_max; 478 } 479 480 static res_T 481 voxelize_tetra 482 (struct rnatm* atm, 483 const struct voxelize_args* args, 484 struct partition* part, /* Partition to fill */ 485 const size_t cpnt) 486 { 487 const struct suvm_volume* volume = NULL; 488 size_t i; 489 ASSERT(atm && check_voxelize_args(args) == RES_OK); 490 ASSERT(cpnt == RNATM_GAS || cpnt < darray_aerosol_size_get(&atm->aerosols)); 491 492 /* Fetch the component volumetric mesh */ 493 if(cpnt == RNATM_GAS) { 494 volume = atm->gas.volume; 495 } else { 496 volume = darray_aerosol_cdata_get(&atm->aerosols)[cpnt].volume; 497 } 498 499 FOR_EACH(i, 0, darray_size_t_size_get(args->tetra_ids)) { 500 struct suvm_primitive tetra; 501 struct suvm_polyhedron poly; 502 double poly_low[3]; 503 double poly_upp[3]; 504 float vx_low[3]; 505 float vx_upp[3]; 506 uint32_t ivx_low[3]; 507 uint32_t ivx_upp[3]; 508 uint32_t ivx[3]; 509 uint64_t mcode[3]; /* Cache of 3D morton code */ 510 size_t iitem; 511 const size_t itetra = darray_size_t_cdata_get(args->tetra_ids)[i]; 512 enum suvm_intersection_type intersect; 513 514 /* Recover the tetrahedron and setup its polyhedron */ 515 SUVM(volume_get_primitive(volume, itetra, &tetra)); 516 SUVM(primitive_setup_polyhedron(&tetra, &poly)); 517 ASSERT(poly.lower[0] <= args->part_upp[0]); 518 ASSERT(poly.lower[1] <= args->part_upp[1]); 519 ASSERT(poly.lower[2] <= args->part_upp[2]); 520 ASSERT(poly.upper[0] >= args->part_low[0]); 521 ASSERT(poly.upper[1] >= args->part_low[1]); 522 ASSERT(poly.upper[2] >= args->part_low[2]); 523 524 /* Clamp the AABB of the polyhedra to the partition bounds */ 525 poly_low[0] = MMAX(poly.lower[0], args->part_low[0]); 526 poly_low[1] = MMAX(poly.lower[1], args->part_low[1]); 527 poly_low[2] = MMAX(poly.lower[2], args->part_low[2]); 528 poly_upp[0] = MMIN(poly.upper[0], args->part_upp[0]); 529 poly_upp[1] = MMIN(poly.upper[1], args->part_upp[1]); 530 poly_upp[2] = MMIN(poly.upper[2], args->part_upp[2]); 531 532 /* Transform the AABB of the polyhedron in voxel space of the partition */ 533 ivx_low[0] = (uint32_t)((poly_low[0] - args->part_low[0]) / args->vxsz[0]); 534 ivx_low[1] = (uint32_t)((poly_low[1] - args->part_low[1]) / args->vxsz[1]); 535 ivx_low[2] = (uint32_t)((poly_low[2] - args->part_low[2]) / args->vxsz[2]); 536 ivx_upp[0] = (uint32_t)ceil((poly_upp[0] - args->part_low[0]) / args->vxsz[0]); 537 ivx_upp[1] = (uint32_t)ceil((poly_upp[1] - args->part_low[1]) / args->vxsz[1]); 538 ivx_upp[2] = (uint32_t)ceil((poly_upp[2] - args->part_low[2]) / args->vxsz[2]); 539 ASSERT(ivx_upp[0] <= args->part_def); 540 ASSERT(ivx_upp[1] <= args->part_def); 541 ASSERT(ivx_upp[2] <= args->part_def); 542 543 /* Compute the range of the tetrahedron radiative coefficients */ 544 FOR_EACH(iitem, 0, args->batch_size) { 545 const size_t iband = args->accel_structs[iitem].iband; 546 const size_t iquad_pt = args->accel_structs[iitem].iquad_pt; 547 struct radcoefs* radcoefs = args->per_item_radcoefs + iitem; 548 setup_tetra_radcoefs(atm, &tetra, cpnt, iband, iquad_pt, radcoefs); 549 } 550 551 /* Iterate voxels intersected by the AABB of the polyedron */ 552 FOR_EACH(ivx[2], ivx_low[2], ivx_upp[2]) { 553 vx_low[2] = (float)((double)ivx[2]*args->vxsz[2] + args->part_low[2]); 554 vx_upp[2] = vx_low[2] + (float)args->vxsz[2]; 555 mcode[2] = morton3D_encode_u21(ivx[2]); 556 557 FOR_EACH(ivx[1], ivx_low[1], ivx_upp[1]) { 558 vx_low[1] = (float)((double)ivx[1]*args->vxsz[1] + args->part_low[1]); 559 vx_upp[1] = vx_low[1] + (float)args->vxsz[1]; 560 mcode[1] = (morton3D_encode_u21(ivx[1]) << 1) | mcode[2]; 561 562 FOR_EACH(ivx[0], ivx_low[0], ivx_upp[0]) { 563 vx_low[0] = (float)((double)ivx[0]*args->vxsz[0] + args->part_low[0]); 564 vx_upp[0] = vx_low[0] + (float)args->vxsz[0]; 565 mcode[0] = (morton3D_encode_u21(ivx[0]) << 2) | mcode[1]; 566 567 intersect = suvm_polyhedron_intersect_aabb(&poly, vx_low, vx_upp); 568 if(intersect == SUVM_INTERSECT_NONE) continue; 569 570 /* Update the intersected voxel */ 571 FOR_EACH(iitem, 0, args->batch_size) { 572 struct radcoefs* radcoefs = args->per_item_radcoefs + iitem; 573 update_voxel(atm, radcoefs, part, mcode[0], iitem); 574 } 575 } 576 } 577 } 578 } 579 580 return RES_OK; 581 } 582 583 static res_T 584 voxelize_partition(struct rnatm* atm, const struct voxelize_args* args) 585 { 586 STATIC_ASSERT((RNATM_GAS+1) == 0, Unexpected_constant_value); 587 588 size_t naerosols = 0; 589 size_t cpnt = 0; 590 int part_is_empty = 1; 591 res_T res = RES_OK; 592 ASSERT(atm && check_voxelize_args(args) == RES_OK); 593 594 partition_clear_voxels(args->part); 595 596 /* Voxelize atmospheric components */ 597 naerosols = darray_aerosol_size_get(&atm->aerosols); 598 599 /* CAUTION: in the following loop, it is assumed that the first component is 600 * necessarily the gas which is therefore voxelized directly into the 601 * partition. Aerosols are voxelized in a dummy partition before their 602 * accumulation in the partition */ 603 cpnt = RNATM_GAS; 604 605 do { 606 struct suvm_volume* volume = NULL; 607 608 /* Get the volumetric mesh of the component */ 609 if(cpnt == RNATM_GAS) { 610 volume = atm->gas.volume; 611 } else { 612 volume = darray_aerosol_cdata_get(&atm->aerosols)[cpnt].volume; 613 } 614 615 /* Find the list of tetrahedra that overlap the partition */ 616 darray_size_t_clear(args->tetra_ids); 617 res = suvm_volume_intersect_aabb 618 (volume, args->part_low, args->part_upp, register_tetra, args->tetra_ids); 619 if(res != RES_OK) goto error; 620 621 /* The partition is not covered by any tetrahedron of the component */ 622 if(darray_size_t_size_get(args->tetra_ids) == 0) continue; 623 624 /* The partition is covered by at least one tetrahedron and is therefore no 625 * longer empty */ 626 part_is_empty = 0; 627 628 /* Voxelize the gas directly into the partition */ 629 if(cpnt == RNATM_GAS) { 630 res = voxelize_tetra(atm, args, args->part, cpnt); 631 if(res != RES_OK) goto error; 632 633 /* Voxelize each aerosol into the dummy partition before its accumulation */ 634 } else { 635 partition_clear_voxels(args->part_dummy); 636 res = voxelize_tetra(atm, args, args->part_dummy, cpnt); 637 if(res != RES_OK) goto error; 638 partition_accum(args->part, args->part_dummy); 639 } 640 } while(++cpnt < naerosols); 641 642 /* The partition is not covered by any tetrahedron */ 643 if(part_is_empty) partition_empty(args->part); 644 645 exit: 646 return res; 647 error: 648 goto exit; 649 } 650 651 static res_T 652 voxelize_batch(struct rnatm* atm, const struct voxelize_batch_args* args) 653 { 654 int64_t i; 655 int progress = 0; 656 ATOMIC nparts_voxelized; 657 ATOMIC res = RES_OK; 658 ASSERT(atm && check_voxelize_batch_args(atm, args) == RES_OK); 659 660 pool_reset(args->pool); /* Reset the next partition id to 0 */ 661 662 /* #partitions already voxelized */ 663 nparts_voxelized = (ATOMIC)args->nparts_voxelized; 664 665 /* Iterates over the partitions of the grid according to their Morton order 666 * and voxelizes the tetrahedrons that overlap them */ 667 omp_set_num_threads((int)atm->nthreads); 668 #pragma omp parallel for schedule(static, 1/*chunk size*/) 669 for(i = 0; i < (int64_t)args->nparts_adjusted; ++i) { 670 struct voxelize_args voxelize_args = VOXELIZE_ARGS_NULL; 671 struct partition* part_dummy = NULL; 672 struct darray_size_t* tetra_ids = NULL; 673 struct partition* part = NULL; 674 struct radcoefs* per_item_radcoefs = NULL; 675 double part_low[3]; 676 double part_upp[3]; 677 uint32_t part_ids[3]; 678 size_t n; 679 const int ithread = omp_get_thread_num(); 680 int pcent; 681 res_T res_local = RES_OK; 682 683 if(ATOMIC_GET(&res) != RES_OK) continue; 684 685 /* Recover the batch partitions */ 686 part = pool_next_partition(args->pool); 687 if(!part) { 688 ATOMIC_SET(&res, RES_UNKNOWN_ERR); 689 continue; 690 }; 691 692 /* Is the current partition out of bounds of the atmosphere grid */ 693 morton_xyz_decode_u21((uint64_t)partition_get_id(part), part_ids); 694 if(part_ids[0] >= args->nparts[0] 695 || part_ids[1] >= args->nparts[1] 696 || part_ids[2] >= args->nparts[2]) { 697 partition_free(part); 698 continue; 699 } 700 701 /* Compute the AABB of the partition */ 702 part_low[0] = (double)(part_ids[0] * args->part_def) * args->vxsz[0]; 703 part_low[1] = (double)(part_ids[1] * args->part_def) * args->vxsz[1]; 704 part_low[2] = (double)(part_ids[2] * args->part_def) * args->vxsz[2]; 705 part_low[0] = part_low[0] + args->atm_low[0]; 706 part_low[1] = part_low[1] + args->atm_low[1]; 707 part_low[2] = part_low[2] + args->atm_low[2]; 708 part_upp[0] = part_low[0] + (double)args->part_def * args->vxsz[0]; 709 part_upp[1] = part_low[1] + (double)args->part_def * args->vxsz[1]; 710 part_upp[2] = part_low[2] + (double)args->part_def * args->vxsz[2]; 711 712 /* Retrieves the array where to store the indices of tetrahedra that 713 * overlap the partition */ 714 tetra_ids = darray_size_t_list_data_get(args->per_thread_tetra_list) + ithread; 715 716 /* Retrieve the spectral item data structure used to store radiative 717 * coeficients */ 718 per_item_radcoefs = darray_radcoef_list_data_get 719 (args->per_thread_radcoef_list)[ithread]; 720 721 /* Obtain the dummy partition needed to temporarily store aerosol 722 * voxelization prior to accumulation in the current partition */ 723 part_dummy = darray_partition_data_get 724 (args->per_thread_dummy_partition)[ithread]; 725 726 /* Voxelizes the partition and once done, commits */ 727 voxelize_args.tetra_ids = tetra_ids; 728 voxelize_args.part_def = args->part_def; 729 d3_set(voxelize_args.part_low, part_low); 730 d3_set(voxelize_args.part_upp, part_upp); 731 d3_set(voxelize_args.vxsz, args->vxsz); 732 voxelize_args.part = part; 733 voxelize_args.part_dummy = part_dummy; 734 voxelize_args.per_item_radcoefs = per_item_radcoefs; 735 voxelize_args.accel_structs = args->accel_structs; 736 voxelize_args.batch_size = args->batch_size; 737 res_local = voxelize_partition(atm, &voxelize_args); 738 if(res_local == RES_OK) { 739 partition_commit(part, args->batch_size); 740 } else { 741 partition_free(part); 742 ATOMIC_SET(&res, res_local); 743 continue; 744 }; 745 746 /* Update progress bar */ 747 n = (size_t)ATOMIC_INCR(&nparts_voxelized); 748 pcent = (int)((n * 100) / args->nparts_overall); 749 #pragma omp critical 750 if(pcent > progress) { 751 progress = pcent; 752 log_info(atm, VOXELIZE_MSG, pcent); 753 } 754 } 755 if(res != RES_OK) goto error; 756 757 exit: 758 return (res_T)res; 759 error: 760 goto exit; 761 } 762 763 static res_T 764 voxelize_atmosphere 765 (struct rnatm* atm, 766 struct pool* pool, 767 struct build_sync* sync) 768 { 769 /* Batch of spectral items to process in a single voxelization */ 770 struct voxelize_batch_args batch_args = VOXELIZE_BATCH_ARGS_NULL; 771 size_t batch_size = 0; 772 size_t nbatches = 0; 773 size_t ibatch = 0; 774 775 /* Working data structures */ 776 struct darray_size_t_list per_thread_tetra_list; 777 struct darray_radcoef_list per_thread_radcoef_list; 778 struct darray_partition per_thread_dummy_partition; 779 780 /* Voxelization parameters */ 781 double atm_low[3]; 782 double atm_upp[3]; 783 double vxsz[3]; 784 size_t nparts[3]; /* #partitions along the 3 axis */ 785 size_t nparts_adjusted; /* #partitions allowing their indexing by morton id */ 786 size_t nparts_overall; /* Total number of voxelized partitions */ 787 size_t part_def; /* Definition of a partition */ 788 789 /* Miscellaneous variables */ 790 struct accel_struct* accel_structs = NULL; 791 size_t naccel_structs = 0; 792 size_t i = 0; 793 res_T res = RES_OK; 794 795 ASSERT(atm && pool && sync); 796 797 darray_size_t_list_init(atm->allocator, &per_thread_tetra_list); 798 darray_radcoef_list_init(atm->allocator, &per_thread_radcoef_list); 799 darray_partition_init(atm->allocator, &per_thread_dummy_partition); 800 801 /* Allocate the per thread lists */ 802 res = darray_size_t_list_resize(&per_thread_tetra_list, atm->nthreads); 803 if(res != RES_OK) goto error; 804 res = darray_radcoef_list_resize(&per_thread_radcoef_list, atm->nthreads); 805 if(res != RES_OK) goto error; 806 res = darray_partition_resize(&per_thread_dummy_partition, atm->nthreads); 807 if(res != RES_OK) goto error; 808 809 /* Setup the per thread and per item working data structures */ 810 batch_size = pool_get_voxel_width(pool); 811 FOR_EACH(i, 0, atm->nthreads) { 812 struct radcoefs* per_item_k = NULL; 813 per_item_k = MEM_CALLOC(atm->allocator, batch_size, sizeof(*per_item_k)); 814 if(!per_item_k) { res = RES_MEM_ERR; goto error; } 815 darray_radcoef_list_data_get(&per_thread_radcoef_list)[i] = per_item_k; 816 } 817 818 /* Setup the per thread dummy partition */ 819 FOR_EACH(i, 0, atm->nthreads) { 820 struct partition* part = pool_dummy_partition(pool); 821 darray_partition_data_get(&per_thread_dummy_partition)[i] = part; 822 } 823 824 /* Recover the AABB atmosphere and compute the size of a voxel */ 825 SUVM(volume_get_aabb(atm->gas.volume, atm_low, atm_upp)); 826 vxsz[0] = (atm_upp[0] - atm_low[0]) / (double)atm->grid_definition[0]; 827 vxsz[1] = (atm_upp[1] - atm_low[1]) / (double)atm->grid_definition[1]; 828 vxsz[2] = (atm_upp[2] - atm_low[2]) / (double)atm->grid_definition[2]; 829 830 /* Number of partitions required to cover the entire atmosphere grid */ 831 part_def = pool_get_partition_definition(pool); 832 nparts[0] = (atm->grid_definition[0] + (part_def-1)/*ceil*/) / part_def; 833 nparts[1] = (atm->grid_definition[1] + (part_def-1)/*ceil*/) / part_def; 834 nparts[2] = (atm->grid_definition[2] + (part_def-1)/*ceil*/) / part_def; 835 836 /* Adjust the #partitions allowing their indexing by their morton code */ 837 nparts_adjusted = MMAX(nparts[0], MMAX(nparts[1], nparts[2])); 838 nparts_adjusted = round_up_pow2(nparts_adjusted); 839 nparts_adjusted = nparts_adjusted * nparts_adjusted * nparts_adjusted; 840 841 /* Calculate the number of batches required to build the accelerating 842 * structures for the spectral bands submitted; currently we are building one 843 * octree per quadrature point of each band. And each octree requires a 844 * complete voxelization of the atmospheric meshes which takes time. To 845 * amortize this cost without prohibitive increase in memory space, one fills 846 * up to N octrees from a single voxelization of the mesh. The number of 847 * batches corresponds to the total number of voxelization required */ 848 accel_structs = darray_accel_struct_data_get(&atm->accel_structs); 849 naccel_structs = darray_accel_struct_size_get(&atm->accel_structs); 850 nbatches = (naccel_structs + (batch_size - 1)/*ceil*/) / batch_size; 851 852 /* Calculate the total number of partitions to voxelize. Note that the same 853 * partition can be voxelized several times, once per batch */ 854 nparts_overall = nparts[0] * nparts[1] * nparts[2] * nbatches; 855 856 /* Print the size of a voxel */ 857 log_info(atm, "voxel size = {%g, %g, %g}\n", SPLIT3(vxsz)); 858 859 /* Setup regular batch arguments */ 860 batch_args.per_thread_tetra_list = &per_thread_tetra_list; 861 batch_args.per_thread_radcoef_list = &per_thread_radcoef_list; 862 batch_args.per_thread_dummy_partition = &per_thread_dummy_partition; 863 batch_args.pool = pool; 864 batch_args.part_def = part_def; 865 batch_args.nparts[0] = nparts[0]; 866 batch_args.nparts[1] = nparts[1]; 867 batch_args.nparts[2] = nparts[2]; 868 batch_args.nparts_adjusted = nparts_adjusted; 869 batch_args.nparts_overall = nparts_overall; 870 batch_args.nparts_voxelized = 0; 871 d3_set(batch_args.atm_low, atm_low); 872 d3_set(batch_args.atm_upp, atm_upp); 873 d3_set(batch_args.vxsz, vxsz); 874 875 /* Print voxelization status */ 876 log_info(atm, VOXELIZE_MSG, 0); 877 878 /* Voxelize the batches */ 879 FOR_EACH(ibatch, 0, nbatches) { 880 size_t item_range[2]; 881 item_range[0] = ibatch * batch_size; 882 item_range[1] = MMIN(item_range[0] + batch_size, naccel_structs); 883 884 batch_args.accel_structs = accel_structs + item_range[0]; 885 batch_args.batch_size = item_range[1] - item_range[0]; 886 887 /* Wait for the building thread to finish consuming the previous batch */ 888 mutex_lock(sync->mutex); 889 if(sync->ibatch != ibatch) { 890 ASSERT(sync->ibatch == ibatch - 1); 891 cond_wait(sync->cond, sync->mutex); 892 /* An error occured in the building thread */ 893 if(sync->ibatch != ibatch) res = RES_BAD_ARG; 894 } 895 mutex_unlock(sync->mutex); 896 if(res != RES_OK) goto error; 897 898 /* Generate the voxels of the current batch */ 899 res = voxelize_batch(atm, &batch_args); 900 if(res != RES_OK) goto error; 901 902 batch_args.nparts_voxelized += nparts[0]*nparts[1]*nparts[2]; 903 } 904 905 /* Print final status message */ 906 log_info(atm, VOXELIZE_MSG"\n", 100); 907 908 exit: 909 FOR_EACH(i, 0, darray_radcoef_list_size_get(&per_thread_radcoef_list)) { 910 struct radcoefs* per_item_k = NULL; 911 per_item_k = darray_radcoef_list_data_get(&per_thread_radcoef_list)[i]; 912 MEM_RM(atm->allocator,per_item_k); 913 } 914 FOR_EACH(i, 0, darray_partition_size_get(&per_thread_dummy_partition)) { 915 struct partition* part = NULL; 916 part = darray_partition_data_get(&per_thread_dummy_partition)[i]; 917 partition_free(part); 918 } 919 darray_size_t_list_release(&per_thread_tetra_list); 920 darray_radcoef_list_release(&per_thread_radcoef_list); 921 darray_partition_release(&per_thread_dummy_partition); 922 return res; 923 error: 924 goto exit; 925 } 926 927 static void 928 vx_get(const size_t xyz[3], const uint64_t mcode, void* dst, void* context) 929 { 930 struct build_octree_context* ctx = context; 931 const float* vx = NULL; 932 uint64_t ivx, ipart; 933 int log2_part_def; 934 ASSERT(xyz && dst && ctx); 935 (void)xyz; 936 937 /* Retrieve the partition's morton id and the voxel's morton id in the 938 * partition from the morton code of the voxel in the whole octree. Note 939 * that, like voxels, partitions are sorted in morton order. Therefore, the 940 * partition's morton id is encoded in the most significant bits of the 941 * voxel's morton code, while the voxel's morton id in the partition is 942 * stored in its least significant bits */ 943 log2_part_def = log2i((int)pool_get_partition_definition(ctx->pool)); 944 ipart = (mcode >> (log2_part_def*3)); 945 ivx = (mcode & (BIT_U64(log2_part_def*3)-1)); 946 947 /* Recover the partition storing the voxel */ 948 if(ctx->part == NULL || partition_get_id(ctx->part) != ipart) { 949 if(ctx->part) partition_free(ctx->part); 950 ctx->part = pool_fetch_partition(ctx->pool, ipart); 951 952 if(ctx->part == NULL) { /* An error occurs */ 953 memset(dst, 0, NFLOATS_PER_VOXEL * sizeof(float)); 954 return; 955 } 956 } 957 958 vx = partition_cget_voxel(ctx->part, ivx, ctx->iitem); 959 memcpy(dst, vx, NFLOATS_PER_VOXEL * sizeof(float)); 960 } 961 962 static void 963 vx_merge(void* dst, const void* vxs[], const size_t nvxs, void* ctx) 964 { 965 float ka_min = FLT_MAX, ka_max = -FLT_MAX; 966 float ks_min = FLT_MAX, ks_max = -FLT_MAX; 967 float* merged_vx = dst; 968 size_t ivx; 969 ASSERT(dst && vxs && nvxs); 970 (void)ctx; 971 972 FOR_EACH(ivx, 0, nvxs) { 973 const float* vx = vxs[ivx]; 974 ka_min = MMIN(ka_min, vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MIN)]); 975 ka_max = MMAX(ka_max, vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MAX)]); 976 ks_min = MMIN(ks_min, vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MIN)]); 977 ks_max = MMAX(ks_max, vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MAX)]); 978 } 979 980 merged_vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MIN)] = ka_min; 981 merged_vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MAX)] = ka_max; 982 merged_vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MIN)] = ks_min; 983 merged_vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MAX)] = ks_max; 984 } 985 986 static int 987 vx_challenge_merge 988 (const struct svx_voxel vxs[], 989 const size_t nvxs, 990 void* context) 991 { 992 const struct build_octree_context* ctx = context; 993 double low[3] = { DBL_MAX, DBL_MAX, DBL_MAX}; 994 double upp[3] = {-DBL_MAX,-DBL_MAX,-DBL_MAX}; 995 double tau; 996 float kext_min = FLT_MAX, kext_max = -FLT_MAX; 997 double sz_max; 998 size_t ivx; 999 ASSERT(vxs && nvxs && context); 1000 1001 /* Compute the range of the extinction coefficients of the submitted voxels */ 1002 FOR_EACH(ivx, 0, nvxs) { 1003 const float* vx = vxs[ivx].data; 1004 const float ka_min = vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MIN)]; 1005 const float ka_max = vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MAX)]; 1006 const float ks_min = vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MIN)]; 1007 const float ks_max = vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MAX)]; 1008 if(ka_min > ka_max) { ASSERT(ks_min > ks_max); continue; } /* Empty voxel */ 1009 kext_min = MMIN(kext_min, ka_min + ks_min); 1010 kext_max = MMAX(kext_max, ka_max + ks_max); 1011 } 1012 1013 /* Always merge empty voxels */ 1014 if(kext_min > kext_max) return 1; 1015 1016 /* Compute the AABB of the submitted voxels */ 1017 FOR_EACH(ivx, 0, nvxs) { 1018 low[0] = MMIN(vxs[ivx].lower[0], low[0]); 1019 low[1] = MMIN(vxs[ivx].lower[1], low[1]); 1020 low[2] = MMIN(vxs[ivx].lower[2], low[2]); 1021 upp[0] = MMAX(vxs[ivx].upper[0], upp[0]); 1022 upp[1] = MMAX(vxs[ivx].upper[1], upp[1]); 1023 upp[2] = MMAX(vxs[ivx].upper[2], upp[2]); 1024 } 1025 1026 /* Compute the size of the largest dimension of the AABB */ 1027 sz_max = upp[0] - low[0]; 1028 sz_max = MMAX(upp[1] - low[1], sz_max); 1029 sz_max = MMAX(upp[2] - low[2], sz_max); 1030 1031 /* Check if voxels can be merged by comparing the optical thickness along the 1032 * maximum size of the merged AABB against a user-defined threshold */ 1033 tau = (kext_max - kext_min) * sz_max; 1034 return tau < ctx->tau_threshold; 1035 } 1036 1037 static res_T 1038 init_octrees_storage(struct rnatm* atm, const struct rnatm_create_args* args) 1039 { 1040 struct octrees_storage_desc challenge = OCTREES_STORAGE_DESC_NULL; 1041 struct octrees_storage_desc reference = OCTREES_STORAGE_DESC_NULL; 1042 res_T res = RES_OK; 1043 ASSERT(atm && args); 1044 1045 if(!args->octrees_storage) goto exit; 1046 1047 /* Register the storage file */ 1048 atm->octrees_storage = args->octrees_storage; 1049 1050 res = octrees_storage_desc_init(atm, &challenge); 1051 if(res != RES_OK) goto error; 1052 1053 /* Save the computed descriptor in the storage */ 1054 if(!args->load_octrees_from_storage) { 1055 res = write_octrees_storage_desc(atm, &challenge, atm->octrees_storage); 1056 if(res != RES_OK) goto error; 1057 1058 /* The octrees must be loaded from storage. Verify that the data and settings 1059 * used by the saved octrees are compatibles with the current parameters */ 1060 } else { 1061 1062 res = octrees_storage_desc_init_from_stream 1063 (atm, &reference, args->octrees_storage); 1064 if(res != RES_OK) goto error; 1065 1066 res = check_octrees_storage_compatibility(&reference, &challenge); 1067 if(res != RES_OK) { 1068 log_err(atm, 1069 "unable to load octrees from the supplied storage. The saved " 1070 "octrees are built from different data\n"); 1071 res = RES_BAD_ARG; 1072 goto error; 1073 } 1074 } 1075 1076 exit: 1077 octrees_storage_desc_release(&challenge); 1078 octrees_storage_desc_release(&reference); 1079 return res; 1080 error: 1081 goto exit; 1082 } 1083 1084 static res_T 1085 create_storage_toc(struct rnatm* atm, fpos_t* toc) 1086 { 1087 int err = 0; 1088 res_T res = RES_OK; 1089 ASSERT(atm && toc); 1090 1091 if(!atm->octrees_storage) goto exit; 1092 1093 err = fgetpos(atm->octrees_storage, toc); 1094 if(err != 0) { 1095 log_err(atm, "error retrieving toc position of octrees storage -- %s\n", 1096 strerror(errno)); 1097 res = RES_IO_ERR; 1098 goto error; 1099 } 1100 1101 res = reserve_octrees_storage_toc(atm, atm->octrees_storage); 1102 if(res != RES_OK) goto error; 1103 1104 exit: 1105 return res; 1106 error: 1107 log_err(atm, "error reserving octree storage header -- %s\n", 1108 strerror(errno)); 1109 goto exit; 1110 } 1111 1112 static res_T 1113 offload_octrees 1114 (struct rnatm* atm, 1115 const size_t ioctrees[2]) /* Octrees to write. Limits are inclusive */ 1116 { 1117 size_t ioctree = 0; 1118 res_T res = RES_OK; 1119 ASSERT(atm && ioctrees && ioctrees[0] <= ioctrees[1]); 1120 ASSERT(ioctrees[1] < darray_accel_struct_size_get(&atm->accel_structs)); 1121 1122 /* No storage: nothing to do */ 1123 if(!atm->octrees_storage) goto exit; 1124 1125 res = store_octrees(atm, ioctrees, atm->octrees_storage); 1126 if(res != RES_OK) goto error; 1127 1128 FOR_EACH(ioctree, ioctrees[0], ioctrees[1]+1) { 1129 unload_octree(atm, ioctree); /* Free the octree memory */ 1130 } 1131 1132 exit: 1133 return res; 1134 error: 1135 goto exit; 1136 } 1137 1138 static res_T 1139 finalize_storage(struct rnatm* atm, const fpos_t* toc) 1140 { 1141 int err = 0; 1142 res_T res = RES_OK; 1143 ASSERT(atm && toc); 1144 1145 /* No storage: nothing to do */ 1146 if(!atm->octrees_storage) goto exit; 1147 1148 err = fsetpos(atm->octrees_storage, toc); 1149 if(err != 0) { 1150 log_err(atm, "error positioning on octree storage toc -- %s\n", 1151 strerror(errno)); 1152 res = RES_IO_ERR; 1153 goto error; 1154 } 1155 1156 /* Update the table of content */ 1157 res = write_octrees_storage_toc(atm, atm->octrees_storage); 1158 if(res != RES_OK) goto exit; 1159 1160 CHK(fflush(atm->octrees_storage) == 0); 1161 1162 exit: 1163 return res; 1164 error: 1165 goto exit; 1166 } 1167 1168 static res_T 1169 build_octrees 1170 (struct rnatm* atm, 1171 const struct rnatm_create_args* args, 1172 struct pool* pool, 1173 struct build_sync* sync) 1174 { 1175 fpos_t storage_toc; 1176 struct accel_struct* accel_structs = NULL; 1177 double low[3], upp[3]; 1178 size_t def[3]; 1179 size_t istruct; 1180 size_t naccel_structs; 1181 size_t voxel_width; 1182 ATOMIC res = RES_OK; 1183 ASSERT(atm && args && pool); 1184 1185 /* Recover the AABB from the atmosphere. Note that we have already made sure 1186 * when setting up the meshes of the gas and aerosols that the aerosols are 1187 * included in the gas. Therefore, the AABB of the gas is the same as the 1188 * AABB of the atmosphere */ 1189 SUVM(volume_get_aabb(atm->gas.volume, low, upp)); 1190 1191 /* Retrieve grid definition */ 1192 def[0] = (size_t)atm->grid_definition[0]; 1193 def[1] = (size_t)atm->grid_definition[1]; 1194 def[2] = (size_t)atm->grid_definition[2]; 1195 1196 accel_structs = darray_accel_struct_data_get(&atm->accel_structs); 1197 naccel_structs = darray_accel_struct_size_get(&atm->accel_structs); 1198 voxel_width = pool_get_voxel_width(pool); 1199 1200 /* Setup the disk storage of the octrees */ 1201 res = create_storage_toc(atm, &storage_toc); 1202 if(res != RES_OK) goto error; 1203 1204 /* Build the octrees. Each thread consumes an element of the voxels generated 1205 * by the voxelization thread, each element corresponding to the voxel of an 1206 * octree to be constructed. By fixing the number of threads to the width of 1207 * the voxel, we therefore build `voxel_width' octrees in parallel from a 1208 * single voxelization of the atmospheric meshes */ 1209 for(istruct = 0; istruct < naccel_structs; istruct += voxel_width) { 1210 const size_t batch_size = MMIN(voxel_width, naccel_structs - istruct); 1211 size_t ioctrees[2]; 1212 omp_set_num_threads((int)batch_size); 1213 1214 /* Note that we are using a parallel block rather than a parallel loop in 1215 * order to add an implicit barrier after a batch has been fully consumed. 1216 * This is necessary to prevent a thread from consuming voxels from the 1217 * previous batch */ 1218 #pragma omp parallel 1219 { 1220 struct build_octree_context ctx = BUILD_OCTREE_CONTEXT_NULL; 1221 struct svx_voxel_desc vx_desc = SVX_VOXEL_DESC_NULL; 1222 struct svx_tree* octree = NULL; 1223 const int ithread = omp_get_thread_num(); 1224 const size_t istruct_curr = (size_t)ithread + istruct; 1225 res_T res_local = RES_OK; 1226 1227 /* Setup the build context */ 1228 ctx.pool = pool; 1229 ctx.part = NULL; 1230 ctx.iitem = (size_t)ithread; 1231 ctx.tau_threshold = args->optical_thickness; 1232 1233 /* Setup the voxel descriptor */ 1234 vx_desc.get = vx_get; 1235 vx_desc.merge = vx_merge; 1236 vx_desc.challenge_merge = vx_challenge_merge; 1237 vx_desc.context = &ctx; 1238 vx_desc.size = NFLOATS_PER_VOXEL * sizeof(float); 1239 1240 res_local = svx_octree_create(atm->svx, low, upp, def, &vx_desc, &octree); 1241 if(ctx.part) partition_free(ctx.part); 1242 if(res_local != RES_OK) { 1243 ATOMIC_SET(&res, res_local); 1244 } else { /* Register the built octree */ 1245 accel_structs[istruct_curr].octree = octree; 1246 } 1247 } 1248 if(res != RES_OK) goto error; 1249 1250 /* Signal the voxelization thread to generate the next batch */ 1251 mutex_lock(sync->mutex); 1252 sync->ibatch += 1; 1253 mutex_unlock(sync->mutex); 1254 cond_signal(sync->cond); 1255 1256 /* Offload builded octrees */ 1257 ioctrees[0] = istruct; 1258 ioctrees[1] = istruct + batch_size - 1; 1259 res = offload_octrees(atm, ioctrees); 1260 if(res != RES_OK) goto error; 1261 } 1262 1263 /* Finalize the file where octrees are offloaded */ 1264 res = finalize_storage(atm, &storage_toc); 1265 if(res != RES_OK) goto error; 1266 1267 exit: 1268 return (res_T)res; 1269 error: 1270 /* Signal to the voxelization thread that there is no need to wait for the 1271 * build thread */ 1272 cond_signal(sync->cond); 1273 darray_accel_struct_clear(&atm->accel_structs); 1274 goto exit; 1275 } 1276 1277 static res_T 1278 create_pool(struct rnatm* atm, struct pool** out_pool, const size_t nbands) 1279 { 1280 /* Empirical constant that defines the number of non empty partitions to 1281 * pre-allocate per thread to avoid contension between the thread building the 1282 * octrees from the partitions and the threads that fill these partitions */ 1283 const size_t NPARTITIONS_PER_THREAD = 64; 1284 1285 /* Number of dummy partitions per thread. These partitions are used to 1286 * temporarily store the voxelization of aerosols that are then accumulated 1287 * in the voxelized partition. Actually, one such partition per voxelization 1288 * thread is required */ 1289 size_t NPARTITIONS_PER_THREAD_DUMMY = 1; 1290 1291 /* Empirical constant that defines the total number of partitions to 1292 * pre-allocate per thread. This constant is necessarily greater than or 1293 * equal to (NPARTITIONS_PER_THREAD + NPARTITIONS_PER_THREAD_DUMMY), the 1294 * difference representing the number of completely empty partitions. Such 1295 * partitions help reduce thread contention with a sparse grid without 1296 * significantly increasing memory usage */ 1297 const size_t OVERALL_NPARTITIONS_PER_THREAD = 1298 NPARTITIONS_PER_THREAD * 64 1299 + NPARTITIONS_PER_THREAD_DUMMY; 1300 1301 /* Number of items stored per voxel data. A width greater than 1 makes it 1302 * possible to store by partition the radiative coefficients of several 1303 * spectral data. The goal is to voxelize the volumetric mesh once for N 1304 * spectral data */ 1305 const size_t VOXEL_WIDTH = MMIN(4, nbands); 1306 1307 /* Definition of a partition on the 3 axis */ 1308 const size_t PARTITION_DEFINITION = 8; 1309 1310 struct pool_create_args pool_args = POOL_CREATE_ARGS_DEFAULT; 1311 struct pool* pool = NULL; 1312 res_T res = RES_OK; 1313 ASSERT(atm && out_pool); 1314 1315 /* Create the partition pool */ 1316 pool_args.npartitions = atm->nthreads * OVERALL_NPARTITIONS_PER_THREAD; 1317 pool_args.npreallocated_partitions = atm->nthreads * NPARTITIONS_PER_THREAD; 1318 pool_args.partition_definition = PARTITION_DEFINITION; 1319 pool_args.voxel_width = VOXEL_WIDTH; 1320 pool_args.allocator = atm->allocator; 1321 res = pool_create(&pool_args, &pool); 1322 if(res != RES_OK) goto error; 1323 1324 exit: 1325 *out_pool = pool; 1326 return res; 1327 error: 1328 log_err(atm, "failed to create the partition pool -- %s\n", 1329 res_to_cstr(res)); 1330 if(pool) { pool_ref_put(pool); pool = NULL; } 1331 goto exit; 1332 } 1333 1334 static res_T 1335 create_octrees(struct rnatm* atm, const struct rnatm_create_args* args) 1336 { 1337 struct build_sync sync = BUILD_SYNC_NULL; 1338 struct pool* pool = NULL; 1339 size_t nbands = 0; 1340 1341 ATOMIC res = RES_OK; 1342 ASSERT(atm); 1343 1344 nbands = darray_band_size_get(&atm->bands); 1345 1346 res = create_pool(atm, &pool, nbands); 1347 if(res != RES_OK) goto error; 1348 res = build_sync_init(&sync); 1349 if(res != RES_OK) goto error; 1350 1351 log_info(atm, 1352 "partitionning of radiative properties " 1353 "(grid definition = %ux%ux%u; #octrees = %lu)\n", 1354 SPLIT3(atm->grid_definition), 1355 (unsigned long)darray_accel_struct_size_get(&atm->accel_structs)); 1356 1357 /* Enable nested threads to allow multi-threading when voxelizing tetrahedra 1358 * and building octrees */ 1359 omp_set_nested(1); 1360 1361 #pragma omp parallel sections num_threads(2) 1362 { 1363 #pragma omp section 1364 { 1365 const res_T res_local = voxelize_atmosphere(atm, pool, &sync); 1366 if(res_local != RES_OK) { 1367 log_err(atm, "error when voxelizing the atmosphere -- %s\n", 1368 res_to_cstr((res_T)res_local)); 1369 pool_invalidate(pool); 1370 ATOMIC_SET(&res, res_local); 1371 } 1372 } 1373 1374 #pragma omp section 1375 { 1376 const res_T res_local = build_octrees(atm, args, pool, &sync); 1377 if(res_local != RES_OK) { 1378 log_err(atm, "error building octrees -- %s\n", 1379 res_to_cstr((res_T)res_local)); 1380 pool_invalidate(pool); 1381 ATOMIC_SET(&res, res_local); 1382 } 1383 } 1384 } 1385 if(res != RES_OK) goto error; 1386 1387 exit: 1388 build_sync_release(&sync); 1389 if(pool) pool_ref_put(pool); 1390 return (res_T)res; 1391 error: 1392 goto exit; 1393 } 1394 1395 static INLINE res_T 1396 load_octrees(struct rnatm* atm) 1397 { 1398 res_T res = RES_OK; 1399 ASSERT(atm && atm->octrees_storage); 1400 1401 /* Only load the storage table of content. The octrees will be loaded on 1402 * demand */ 1403 res = read_octrees_storage_toc(atm, atm->octrees_storage); 1404 if(res != RES_OK) goto error; 1405 1406 exit: 1407 return res; 1408 error: 1409 goto exit; 1410 } 1411 1412 /******************************************************************************* 1413 * Exported functions 1414 ******************************************************************************/ 1415 res_T 1416 rnatm_trace_ray 1417 (struct rnatm* atm, 1418 const struct rnatm_trace_ray_args* args, 1419 struct svx_hit* hit) 1420 { 1421 const struct accel_struct* accel_struct = NULL; 1422 size_t i; 1423 size_t iaccel; 1424 res_T res = RES_OK; 1425 1426 if(!atm || !hit) { res = RES_BAD_ARG; goto error; } 1427 res = check_trace_ray_args(atm, args); 1428 if(res != RES_OK) goto error; 1429 1430 /* Start calculating the id of the acceleration structure to consider */ 1431 i = args->iband - darray_band_cdata_get(&atm->bands)[0].index; 1432 ASSERT(i < darray_band_size_get(&atm->bands)); 1433 ASSERT(args->iband == darray_band_cdata_get(&atm->bands)[i].index); 1434 1435 /* Invalid quadrature point index */ 1436 if(args->iquad >= darray_band_cdata_get(&atm->bands)[i].nquad_pts) 1437 return RES_BAD_ARG; 1438 1439 /* Find the id of the acceleration structure */ 1440 iaccel = darray_band_cdata_get(&atm->bands)[i].offset_accel_struct + args->iquad; 1441 1442 /* Retrieve the acceleration structure */ 1443 ASSERT(i < darray_accel_struct_size_get(&atm->accel_structs)); 1444 accel_struct = darray_accel_struct_cdata_get(&atm->accel_structs) + iaccel; 1445 ASSERT(args->iband == accel_struct->iband); 1446 ASSERT(args->iquad == accel_struct->iquad_pt); 1447 1448 res = make_sure_octree_is_loaded(atm, iaccel); 1449 if(res != RES_OK) goto error; 1450 1451 *hit = SVX_HIT_NULL; 1452 1453 res = svx_tree_trace_ray 1454 (accel_struct->octree, 1455 args->ray_org, 1456 args->ray_dir, 1457 args->ray_range, 1458 args->challenge, 1459 args->filter, 1460 args->context, 1461 hit); 1462 if(res != RES_OK) { 1463 log_err(atm, 1464 "%s: error tracing ray " 1465 "(origin = %g, %g, %g; direction = %g, %g, %g; range = %g, %g) -- %s\n", 1466 FUNC_NAME, SPLIT3(args->ray_org), SPLIT3(args->ray_dir), 1467 SPLIT2(args->ray_range), res_to_cstr(res)); 1468 goto error; 1469 } 1470 1471 exit: 1472 return res; 1473 error: 1474 goto exit; 1475 } 1476 1477 /******************************************************************************* 1478 * Local functions 1479 ******************************************************************************/ 1480 res_T 1481 setup_octrees(struct rnatm* atm, const struct rnatm_create_args* args) 1482 { 1483 char buf[128]; 1484 struct time t0, t1; 1485 size_t sz; 1486 res_T res = RES_OK; 1487 ASSERT(atm && args); 1488 1489 /* Start time recording */ 1490 time_current(&t0); 1491 1492 /* Create the Star-VoXel device */ 1493 res = svx_device_create 1494 (atm->logger, &atm->svx_allocator, atm->verbose, &atm->svx); 1495 if(res != RES_OK) goto error; 1496 1497 /* Setup miscellaneous parameters */ 1498 atm->optical_thickness = args->optical_thickness; 1499 1500 res = compute_grid_definition(atm, args); 1501 if(res != RES_OK) goto error; 1502 1503 /* Initialize storage *after* calculating the grid definition since it is 1504 * saved in the storage file */ 1505 res = init_octrees_storage(atm, args); 1506 if(res != RES_OK) goto error; 1507 1508 res = setup_accel_structs(atm); 1509 if(res != RES_OK) goto error; 1510 1511 if(args->load_octrees_from_storage) { 1512 res = load_octrees(atm); 1513 if(res != RES_OK) goto error; 1514 } else { 1515 res = create_octrees(atm, args); 1516 if(res != RES_OK) goto error; 1517 } 1518 1519 /* Log elapsed time */ 1520 time_sub(&t0, time_current(&t1), &t0); 1521 time_dump(&t0, TIME_ALL, NULL, buf, sizeof(buf)); 1522 log_info(atm, "acceleration structures built in %s\n", buf); 1523 1524 /* Log memory space used by Star-VX */ 1525 sz = MEM_ALLOCATED_SIZE(&atm->svx_allocator); 1526 size_to_cstr(sz, SIZE_ALL, NULL, buf, sizeof(buf)); 1527 log_info(atm, "Star-VoXel memory space: %s\n", buf); 1528 1529 exit: 1530 return res; 1531 error: 1532 goto exit; 1533 }