rnatm_octree.c - rnatm - Load and structure data describing an atmosphere

rnatm_octree.c (52145B)
      1 /* Copyright (C) 2022, 2023, 2025 Centre National de la Recherche Scientifique
      2  * Copyright (C) 2022, 2023, 2025 Institut Pierre-Simon Laplace
      3  * Copyright (C) 2022, 2023, 2025 Institut de Physique du Globe de Paris
      4  * Copyright (C) 2022, 2023, 2025 |Méso|Star> (contact@meso-star.com)
      5  * Copyright (C) 2022, 2023, 2025 Observatoire de Paris
      6  * Copyright (C) 2022, 2023, 2025 Université de Reims Champagne-Ardenne
      7  * Copyright (C) 2022, 2023, 2025 Université de Versaille Saint-Quentin
      8  * Copyright (C) 2022, 2023, 2025 Université Paul Sabatier
      9  *
     10  * This program is free software: you can redistribute it and/or modify
     11  * it under the terms of the GNU General Public License as published by
     12  * the Free Software Foundation, either version 3 of the License, or
     13  * (at your option) any later version.
     14  *
     15  * This program is distributed in the hope that it will be useful,
     16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     18  * GNU General Public License for more details.
     19  *
     20  * You should have received a copy of the GNU General Public License
     21  * along with this program. If not, see <http://www.gnu.org/licenses/>. */
     22 
     23 #define _POSIX_C_SOURCE 200112L /* lround */
     24 
     25 #include "rnatm_c.h"
     26 #include "rnatm_log.h"
     27 #include "rnatm_voxel_partition.h"
     28 #include "rnatm_octrees_storage.h"
     29 
     30 #include <star/sars.h>
     31 #include <star/sck.h>
     32 #include <star/suvm.h>
     33 #include <star/svx.h>
     34 
     35 #include <rsys/clock_time.h>
     36 #include <rsys/condition.h>
     37 #include <rsys/cstr.h>
     38 #include <rsys/double3.h>
     39 #include <rsys/float4.h>
     40 #include <rsys/math.h>
     41 #include <rsys/morton.h>
     42 #include <rsys/mutex.h>
     43 #include <rsys/rsys.h>
     44 
     45 #include <math.h> /* lround */
     46 #include <omp.h>
     47 
     48 #define VOXELIZE_MSG "voxelize atmosphere: %3d%%\r"
     49 
     50 /* Structure used to synchronise the voxelization and the build threads */
     51 struct build_sync {
     52   struct mutex* mutex;
     53   struct cond* cond;
     54   size_t ibatch; /* Index of the batch currently consumed by the build thread */
     55 };
     56 #define BUILD_SYNC_NULL__ {NULL, NULL, 0}
     57 static const struct build_sync BUILD_SYNC_NULL = BUILD_SYNC_NULL__;
     58 
     59 struct build_octree_context {
     60   struct pool* pool;
     61   struct partition* part; /* Current partition */
     62   size_t iitem; /* Index of the item to handle in the partition */
     63 
     64   /* Optical thickness threshold criteria for merging voxels */
     65   double tau_threshold;
     66 };
     67 #define BUILD_OCTREE_CONTEXT_NULL__ {NULL, NULL, 0, 0}
     68 static const struct build_octree_context BUILD_OCTREE_CONTEXT_NULL =
     69   BUILD_OCTREE_CONTEXT_NULL__;
     70 
     71 struct radcoefs {
     72   float ka_min, ka_max;
     73   float ks_min, ks_max;
     74 };
     75 #define RADCOEFS_NULL__ {                                                      \
     76   FLT_MAX, -FLT_MAX,                                                           \
     77   FLT_MAX, -FLT_MAX,                                                           \
     78   FLT_MAX, -FLT_MAX                                                            \
     79 }
     80 
     81 /* Generate the dynamic array of radiative coefficient lists */
     82 #define DARRAY_NAME radcoef_list
     83 #define DARRAY_DATA struct radcoefs*
     84 #include <rsys/dynamic_array.h>
     85 
     86 /* Generate the dynmaic array of partitions */
     87 #define DARRAY_NAME partition
     88 #define DARRAY_DATA struct partition*
     89 #include <rsys/dynamic_array.h>
     90 
     91 struct voxelize_batch_args {
     92   /* Working data structure */
     93   struct darray_size_t_list* per_thread_tetra_list;
     94   struct darray_radcoef_list* per_thread_radcoef_list;
     95   struct darray_partition* per_thread_dummy_partition;
     96   struct pool* pool;
     97 
     98   size_t part_def; /* Partition definition */
     99   size_t nparts[3]; /* #partitions required to cover the entire grid */
    100   size_t nparts_adjusted; /* #partitions allowing their indexing by morton id */
    101   size_t nparts_overall; /* Total number of partitions to voxelize */
    102   size_t nparts_voxelized; /* #partitions already voxelized */
    103 
    104   /* AABB of the atmosphere */
    105   double atm_low[3];
    106   double atm_upp[3];
    107 
    108   double vxsz[3]; /* Size of a voxel */
    109 
    110   struct accel_struct* accel_structs;
    111   size_t batch_size; /* #acceleration structures */
    112 };
    113 #define VOXELIZE_BATCH_ARGS_NULL__ {                                           \
    114   NULL, /* Per thread tetra list */                                            \
    115   NULL, /* Per thread radiative coefs */                                       \
    116   NULL, /* Per thread dummy partition */                                       \
    117   NULL, /* Partition pool */                                                   \
    118                                                                                \
    119   0, /* Partition definition */                                                \
    120   {0,0,0}, /* #partitions to cover the entire grid */                          \
    121   0, /* #partitions adjusted wrt morton indexing */                            \
    122   0, /* Total number of partitions to voxelize */                              \
    123   0, /* #partitions already voxelized */                                       \
    124                                                                                \
    125   /* AABB of the atmosphere */                                                 \
    126   { DBL_MAX, DBL_MAX, DBL_MAX},                                                \
    127   {-DBL_MAX,-DBL_MAX,-DBL_MAX},                                                \
    128                                                                                \
    129   {0,0,0}, /* Size of a voxel */                                               \
    130                                                                                \
    131   NULL, /* Spectral items */                                                   \
    132   0 /* Batch size */                                                           \
    133 }
    134 static const struct voxelize_batch_args VOXELIZE_BATCH_ARGS_NULL =
    135   VOXELIZE_BATCH_ARGS_NULL__;
    136 
    137 struct voxelize_args {
    138   struct darray_size_t* tetra_ids;
    139   struct radcoefs* per_item_radcoefs;
    140 
    141   struct partition* part;
    142   struct partition* part_dummy;
    143   size_t part_def; /* Partition definition */
    144 
    145   /* AABB of the partition */
    146   double part_low[3];
    147   double part_upp[3];
    148 
    149   double vxsz[3]; /* Size of a voxel */
    150 
    151   const struct accel_struct* accel_structs;
    152   size_t batch_size; /* #spectral items */
    153 
    154 };
    155 #define VOXELIZE_ARGS_NULL__ {                                                 \
    156   NULL, /* Tetra ids */                                                        \
    157   NULL, /* Radiative coefs */                                                  \
    158                                                                                \
    159   NULL, /* Partition */                                                        \
    160   NULL, /* Dummy partition */                                                  \
    161   0, /* Partition definition */                                                \
    162                                                                                \
    163   /* AABB of the partition */                                                  \
    164   { DBL_MAX, DBL_MAX, DBL_MAX},                                                \
    165   {-DBL_MAX,-DBL_MAX,-DBL_MAX},                                                \
    166                                                                                \
    167   {0,0,0}, /* Size of a voxel */                                               \
    168                                                                                \
    169   NULL, /* Spectral items */                                                   \
    170   0 /* Batch size */                                                           \
    171 }
    172 static const struct voxelize_args VOXELIZE_ARGS_NULL = VOXELIZE_ARGS_NULL__;
    173 
    174 /*******************************************************************************
    175  * Helper functions
    176  ******************************************************************************/
    177 static INLINE res_T
    178 check_voxelize_batch_args
    179   (struct rnatm* atm,
    180    const struct voxelize_batch_args* args)
    181 {
    182   size_t i;
    183   if(!args
    184   || !args->per_thread_tetra_list
    185   || !args->per_thread_radcoef_list
    186   || !args->pool
    187   || !IS_POW2(args->part_def)
    188   || !args->nparts[0]
    189   || !args->nparts[1]
    190   || !args->nparts[2]
    191   || !IS_POW2(args->nparts_adjusted)
    192   || args->nparts_adjusted < args->nparts[0]*args->nparts[1]*args->nparts[2]
    193   || args->atm_low[0] >= args->atm_upp[0]
    194   || args->atm_low[1] >= args->atm_upp[1]
    195   || args->atm_low[2] >= args->atm_upp[2]
    196   || !args->vxsz[0]
    197   || !args->vxsz[1]
    198   || !args->vxsz[2]
    199   || !args->accel_structs
    200   || !args->batch_size) {
    201     return RES_BAD_ARG;
    202   }
    203   if(atm->nthreads != darray_size_t_list_size_get(args->per_thread_tetra_list)
    204   || atm->nthreads != darray_radcoef_list_size_get(args->per_thread_radcoef_list)) {
    205     return RES_BAD_ARG;
    206   }
    207   FOR_EACH(i, 0, atm->nthreads) {
    208     if(!darray_radcoef_list_cdata_get(args->per_thread_radcoef_list)[i]) {
    209       return RES_BAD_ARG;
    210     }
    211   }
    212   return RES_OK;
    213 }
    214 
    215 static INLINE res_T
    216 check_voxelize_args(const struct voxelize_args* args)
    217 {
    218   if(!args->tetra_ids
    219   || !args->per_item_radcoefs
    220   || args->part_low[0] >= args->part_upp[0]
    221   || args->part_low[1] >= args->part_upp[1]
    222   || args->part_low[2] >= args->part_upp[2]
    223   || !IS_POW2(args->part_def)
    224   || !args->vxsz[0]
    225   || !args->vxsz[1]
    226   || !args->vxsz[2]
    227   || !args->accel_structs
    228   || !args->batch_size) {
    229     return RES_BAD_ARG;
    230   }
    231   return RES_OK;
    232 }
    233 
    234 static INLINE res_T
    235 check_trace_ray_args
    236   (const struct rnatm* atm,
    237    const struct rnatm_trace_ray_args* args)
    238 {
    239   size_t n;
    240   if(!args) return RES_BAD_ARG;
    241 
    242   /* Invalid band index */
    243   n = darray_band_size_get(&atm->bands) - 1;
    244   if(args->iband < darray_band_cdata_get(&atm->bands)[0].index
    245   || args->iband > darray_band_cdata_get(&atm->bands)[n].index)
    246     return RES_BAD_ARG;
    247 
    248   return RES_OK;
    249 }
    250 
    251 static void
    252 build_sync_release(struct build_sync* sync)
    253 {
    254   if(sync->mutex) mutex_destroy(sync->mutex);
    255   if(sync->cond) cond_destroy(sync->cond);
    256 }
    257 
    258 static res_T
    259 build_sync_init(struct build_sync* sync)
    260 {
    261   res_T res = RES_OK;
    262   ASSERT(sync);
    263 
    264   memset(sync, 0, sizeof(*sync));
    265 
    266   sync->mutex = mutex_create();
    267   if(!sync->mutex) { res = RES_UNKNOWN_ERR; goto error; }
    268   sync->cond = cond_create();
    269   if(!sync->cond) { res = RES_UNKNOWN_ERR; goto error; }
    270   sync->ibatch = 0;
    271 
    272 exit:
    273   return res;
    274 error:
    275   build_sync_release(sync);
    276   goto exit;
    277 }
    278 
    279 static FINLINE unsigned
    280 round_pow2(const unsigned val)
    281 {
    282   const unsigned next_pow2 = (unsigned)round_up_pow2(val);
    283   if(val == 0 || next_pow2 - val <= next_pow2/4) {
    284     return next_pow2;
    285   } else {
    286     return next_pow2/2;
    287   }
    288 }
    289 
    290 /* Return the number of quadrature points for the range of bands overlapped by
    291  * the input spectral range */
    292 static INLINE size_t
    293 compute_nquad_pts(const struct rnatm* atm)
    294 {
    295   size_t nquad_pts = 0;
    296   size_t iband = 0;
    297   ASSERT(atm);
    298 
    299   FOR_EACH(iband, 0, darray_band_size_get(&atm->bands)) {
    300     nquad_pts += darray_band_cdata_get(&atm->bands)[iband].nquad_pts;
    301   }
    302   return nquad_pts;
    303 }
    304 
    305 static res_T
    306 setup_accel_structs(struct rnatm* atm)
    307 {
    308   struct accel_struct* accel_structs = NULL;
    309   size_t istruct;
    310   size_t naccel_structs;
    311   size_t i;
    312   res_T res = RES_OK;
    313   ASSERT(atm);
    314 
    315   naccel_structs = compute_nquad_pts(atm);
    316 
    317   res = darray_accel_struct_resize(&atm->accel_structs, naccel_structs);
    318   if(res != RES_OK) {
    319     log_err(atm,
    320       "%s: failed to allocate the list of acceleration structures -- %s",
    321       FUNC_NAME, res_to_cstr(res));
    322     goto error;
    323   }
    324   accel_structs = darray_accel_struct_data_get(&atm->accel_structs);
    325 
    326   istruct = 0;
    327   FOR_EACH(i, 0, darray_band_size_get(&atm->bands)) {
    328     const struct band* band = darray_band_cdata_get(&atm->bands)+i;
    329     size_t iquad_pt;
    330 
    331     FOR_EACH(iquad_pt, 0, band->nquad_pts) {
    332       ASSERT(istruct < naccel_structs);
    333       accel_structs[istruct].iband = band->index;
    334       accel_structs[istruct].iquad_pt = iquad_pt;
    335       ++istruct;
    336     }
    337   }
    338 
    339 exit:
    340   return res;
    341 error:
    342   darray_accel_struct_clear(&atm->accel_structs);
    343   goto exit;
    344 }
    345 
    346 static INLINE void
    347 register_tetra
    348   (const struct suvm_primitive* prim,
    349    const double low[3],
    350    const double upp[3],
    351    void* context)
    352 {
    353   struct darray_size_t* tetra_ids = context;
    354   ASSERT(prim && low && upp && context);
    355   ASSERT(low[0] < upp[0]);
    356   ASSERT(low[1] < upp[1]);
    357   ASSERT(low[2] < upp[2]);
    358   (void)low, (void)upp;
    359   CHK(darray_size_t_push_back(tetra_ids, &prim->iprim) == RES_OK);
    360 }
    361 
    362 static res_T
    363 compute_grid_definition(struct rnatm* atm, const struct rnatm_create_args* args)
    364 {
    365   double low[3];
    366   double upp[3];
    367   double sz[3];
    368   double sz_max;
    369   double vxsz;
    370   unsigned def[3];
    371   int iaxis_max;
    372   int iaxis_remain[2];
    373   int i;
    374   ASSERT(atm && args);
    375 
    376   /* Recover the AABB from the atmosphere. Note that we have already made sure
    377    * when setting up the meshes of the gas and aerosols that the aerosols are
    378    * included in the gas. Therefore, the AABB of the gas is the same as the
    379    * AABB of the atmosphere */
    380   SUVM(volume_get_aabb(atm->gas.volume, low, upp));
    381   sz[0] = upp[0] - low[0];
    382   sz[1] = upp[1] - low[1];
    383   sz[2] = upp[2] - low[2];
    384 
    385   /* Find the axis along which the atmosphere's AABB is greatest */
    386   sz_max = -DBL_MAX;
    387   FOR_EACH(i, 0, 3) {
    388     if(sz[i] > sz_max) { iaxis_max = i; sz_max = sz[i]; }
    389   }
    390 
    391   /* Define the other axes */
    392   iaxis_remain[0] = (iaxis_max + 1) % 3;
    393   iaxis_remain[1] = (iaxis_max + 2) % 3;
    394 
    395   /* Fix the definition along the maximum axis and calculate the size of a
    396    * voxel along this axis */
    397   def[iaxis_max] = round_pow2(args->grid_definition_hint);
    398   vxsz = sz[iaxis_max] / def[iaxis_max];
    399 
    400   /* Calculate the definitions along the remaining 2 axes. First calculates
    401    * them assuming the voxels are cubic, then rounds them to their nearest
    402    * power of two */
    403   def[iaxis_remain[0]] = (unsigned)lround(sz[iaxis_remain[0]]/vxsz);
    404   def[iaxis_remain[1]] = (unsigned)lround(sz[iaxis_remain[1]]/vxsz);
    405   def[iaxis_remain[0]] = round_pow2(def[iaxis_remain[0]]);
    406   def[iaxis_remain[1]] = round_pow2(def[iaxis_remain[1]]);
    407 
    408   /* Setup grid definition */
    409   atm->grid_definition[0] = def[0];
    410   atm->grid_definition[1] = def[1];
    411   atm->grid_definition[2] = def[2];
    412 
    413   return RES_OK;
    414 }
    415 
    416 /* Compute the radiative coefficients range of the tetrahedron */
    417 static INLINE void
    418 setup_tetra_radcoefs
    419   (struct rnatm* atm,
    420    const struct suvm_primitive* tetra,
    421    const size_t cpnt,
    422    const size_t iband,
    423    const size_t iquad,
    424    struct radcoefs* radcoefs)
    425 {
    426   float ka[4];
    427   float ks[4];
    428   ASSERT(atm && tetra && radcoefs);
    429   ASSERT(cpnt == RNATM_GAS || cpnt < darray_aerosol_size_get(&atm->aerosols));
    430 
    431   if(tetra_get_radcoef(atm, tetra, cpnt, iband, iquad, RNATM_RADCOEF_Ka, ka)
    432   && tetra_get_radcoef(atm, tetra, cpnt, iband, iquad, RNATM_RADCOEF_Ks, ks)) {
    433     /* Radiative coefficients are defined for this component at the considered
    434      * spectral band */
    435     radcoefs->ka_min = MMIN(MMIN(ka[0], ka[1]), MMIN(ka[2], ka[3]));
    436     radcoefs->ka_max = MMAX(MMAX(ka[0], ka[1]), MMAX(ka[2], ka[3]));
    437     radcoefs->ks_min = MMIN(MMIN(ks[0], ks[1]), MMIN(ks[2], ks[3]));
    438     radcoefs->ks_max = MMAX(MMAX(ks[0], ks[1]), MMAX(ks[2], ks[3]));
    439   } else {
    440     /* No valid radiative coefficients are set: ignore the tetrahedron */
    441     radcoefs->ka_min = FLT_MAX;
    442     radcoefs->ka_max =-FLT_MAX;
    443     radcoefs->ks_min = FLT_MAX;
    444     radcoefs->ks_max =-FLT_MAX;
    445   }
    446 }
    447 
    448 static INLINE void
    449 update_voxel
    450   (struct rnatm* atm,
    451    const struct radcoefs* radcoefs,
    452    struct partition* part,
    453    const uint64_t vx_mcode,
    454    const uint64_t vx_iitem)
    455 {
    456   float vx_ka_min, vx_ka_max;
    457   float vx_ks_min, vx_ks_max;
    458 
    459   float* vx = NULL;
    460   ASSERT(atm && radcoefs && part);
    461   (void)atm;
    462 
    463   vx = partition_get_voxel(part, vx_mcode, vx_iitem);
    464 
    465   /* Update the range of the radiative coefficients of the voxel */
    466   vx_ka_min = vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MIN)];
    467   vx_ka_max = vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MAX)];
    468   vx_ks_min = vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MIN)];
    469   vx_ks_max = vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MAX)];
    470   vx_ka_min = MMIN(vx_ka_min, radcoefs->ka_min);
    471   vx_ka_max = MMAX(vx_ka_max, radcoefs->ka_max);
    472   vx_ks_min = MMIN(vx_ks_min, radcoefs->ks_min);
    473   vx_ks_max = MMAX(vx_ks_max, radcoefs->ks_max);
    474   vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MIN)] = vx_ka_min;
    475   vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MAX)] = vx_ka_max;
    476   vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MIN)] = vx_ks_min;
    477   vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MAX)] = vx_ks_max;
    478 }
    479 
    480 static res_T
    481 voxelize_tetra
    482   (struct rnatm* atm,
    483    const struct voxelize_args* args,
    484    struct partition* part, /* Partition to fill */
    485    const size_t cpnt)
    486 {
    487   const struct suvm_volume* volume = NULL;
    488   size_t i;
    489   ASSERT(atm && check_voxelize_args(args) == RES_OK);
    490   ASSERT(cpnt == RNATM_GAS || cpnt < darray_aerosol_size_get(&atm->aerosols));
    491 
    492   /* Fetch the component volumetric mesh */
    493   if(cpnt == RNATM_GAS) {
    494     volume = atm->gas.volume;
    495   } else {
    496     volume = darray_aerosol_cdata_get(&atm->aerosols)[cpnt].volume;
    497   }
    498 
    499   FOR_EACH(i, 0, darray_size_t_size_get(args->tetra_ids)) {
    500     struct suvm_primitive tetra;
    501     struct suvm_polyhedron poly;
    502     double poly_low[3];
    503     double poly_upp[3];
    504     float vx_low[3];
    505     float vx_upp[3];
    506     uint32_t ivx_low[3];
    507     uint32_t ivx_upp[3];
    508     uint32_t ivx[3];
    509     uint64_t mcode[3]; /* Cache of 3D morton code */
    510     size_t iitem;
    511     const size_t itetra = darray_size_t_cdata_get(args->tetra_ids)[i];
    512     enum suvm_intersection_type intersect;
    513 
    514     /* Recover the tetrahedron and setup its polyhedron */
    515     SUVM(volume_get_primitive(volume, itetra, &tetra));
    516     SUVM(primitive_setup_polyhedron(&tetra, &poly));
    517     ASSERT(poly.lower[0] <= args->part_upp[0]);
    518     ASSERT(poly.lower[1] <= args->part_upp[1]);
    519     ASSERT(poly.lower[2] <= args->part_upp[2]);
    520     ASSERT(poly.upper[0] >= args->part_low[0]);
    521     ASSERT(poly.upper[1] >= args->part_low[1]);
    522     ASSERT(poly.upper[2] >= args->part_low[2]);
    523 
    524     /* Clamp the AABB of the polyhedra to the partition bounds */
    525     poly_low[0] = MMAX(poly.lower[0], args->part_low[0]);
    526     poly_low[1] = MMAX(poly.lower[1], args->part_low[1]);
    527     poly_low[2] = MMAX(poly.lower[2], args->part_low[2]);
    528     poly_upp[0] = MMIN(poly.upper[0], args->part_upp[0]);
    529     poly_upp[1] = MMIN(poly.upper[1], args->part_upp[1]);
    530     poly_upp[2] = MMIN(poly.upper[2], args->part_upp[2]);
    531 
    532     /* Transform the AABB of the polyhedron in voxel space of the partition */
    533     ivx_low[0] = (uint32_t)((poly_low[0] - args->part_low[0]) / args->vxsz[0]);
    534     ivx_low[1] = (uint32_t)((poly_low[1] - args->part_low[1]) / args->vxsz[1]);
    535     ivx_low[2] = (uint32_t)((poly_low[2] - args->part_low[2]) / args->vxsz[2]);
    536     ivx_upp[0] = (uint32_t)ceil((poly_upp[0] - args->part_low[0]) / args->vxsz[0]);
    537     ivx_upp[1] = (uint32_t)ceil((poly_upp[1] - args->part_low[1]) / args->vxsz[1]);
    538     ivx_upp[2] = (uint32_t)ceil((poly_upp[2] - args->part_low[2]) / args->vxsz[2]);
    539     ASSERT(ivx_upp[0] <= args->part_def);
    540     ASSERT(ivx_upp[1] <= args->part_def);
    541     ASSERT(ivx_upp[2] <= args->part_def);
    542 
    543     /* Compute the range of the tetrahedron radiative coefficients */
    544     FOR_EACH(iitem, 0, args->batch_size) {
    545       const size_t iband = args->accel_structs[iitem].iband;
    546       const size_t iquad_pt = args->accel_structs[iitem].iquad_pt;
    547       struct radcoefs* radcoefs = args->per_item_radcoefs + iitem;
    548       setup_tetra_radcoefs(atm, &tetra, cpnt, iband, iquad_pt, radcoefs);
    549     }
    550 
    551     /* Iterate voxels intersected by the AABB of the polyedron */
    552     FOR_EACH(ivx[2], ivx_low[2], ivx_upp[2]) {
    553       vx_low[2] = (float)((double)ivx[2]*args->vxsz[2] + args->part_low[2]);
    554       vx_upp[2] = vx_low[2] + (float)args->vxsz[2];
    555       mcode[2] = morton3D_encode_u21(ivx[2]);
    556 
    557       FOR_EACH(ivx[1], ivx_low[1], ivx_upp[1]) {
    558         vx_low[1] = (float)((double)ivx[1]*args->vxsz[1] + args->part_low[1]);
    559         vx_upp[1] = vx_low[1] + (float)args->vxsz[1];
    560         mcode[1] = (morton3D_encode_u21(ivx[1]) << 1) | mcode[2];
    561 
    562         FOR_EACH(ivx[0], ivx_low[0], ivx_upp[0]) {
    563           vx_low[0] = (float)((double)ivx[0]*args->vxsz[0] + args->part_low[0]);
    564           vx_upp[0] = vx_low[0] + (float)args->vxsz[0];
    565           mcode[0] = (morton3D_encode_u21(ivx[0]) << 2) | mcode[1];
    566 
    567           intersect = suvm_polyhedron_intersect_aabb(&poly, vx_low, vx_upp);
    568           if(intersect == SUVM_INTERSECT_NONE) continue;
    569 
    570           /* Update the intersected voxel */
    571           FOR_EACH(iitem, 0, args->batch_size) {
    572             struct radcoefs* radcoefs = args->per_item_radcoefs + iitem;
    573             update_voxel(atm, radcoefs, part, mcode[0], iitem);
    574           }
    575         }
    576       }
    577     }
    578   }
    579 
    580   return RES_OK;
    581 }
    582 
    583 static res_T
    584 voxelize_partition(struct rnatm* atm, const struct voxelize_args* args)
    585 {
    586   STATIC_ASSERT((RNATM_GAS+1) == 0, Unexpected_constant_value);
    587 
    588   size_t naerosols = 0;
    589   size_t cpnt = 0;
    590   int part_is_empty = 1;
    591   res_T res = RES_OK;
    592   ASSERT(atm && check_voxelize_args(args) == RES_OK);
    593 
    594   partition_clear_voxels(args->part);
    595 
    596   /* Voxelize atmospheric components */
    597   naerosols = darray_aerosol_size_get(&atm->aerosols);
    598 
    599   /* CAUTION: in the following loop, it is assumed that the first component is
    600    * necessarily the gas which is therefore voxelized directly into the
    601    * partition. Aerosols are voxelized in a dummy partition before their
    602    * accumulation in the partition */
    603   cpnt = RNATM_GAS;
    604 
    605   do {
    606     struct suvm_volume* volume = NULL;
    607 
    608     /* Get the volumetric mesh of the component */
    609     if(cpnt == RNATM_GAS) {
    610       volume = atm->gas.volume;
    611     } else {
    612       volume = darray_aerosol_cdata_get(&atm->aerosols)[cpnt].volume;
    613     }
    614 
    615     /* Find the list of tetrahedra that overlap the partition */
    616     darray_size_t_clear(args->tetra_ids);
    617     res = suvm_volume_intersect_aabb
    618       (volume, args->part_low, args->part_upp, register_tetra, args->tetra_ids);
    619     if(res != RES_OK) goto error;
    620 
    621     /* The partition is not covered by any tetrahedron of the component */
    622     if(darray_size_t_size_get(args->tetra_ids) == 0) continue;
    623 
    624     /* The partition is covered by at least one tetrahedron and is therefore no
    625      * longer empty */
    626     part_is_empty = 0;
    627 
    628     /* Voxelize the gas directly into the partition */
    629     if(cpnt == RNATM_GAS) {
    630       res = voxelize_tetra(atm, args, args->part, cpnt);
    631       if(res != RES_OK) goto error;
    632 
    633     /* Voxelize each aerosol into the dummy partition before its accumulation */
    634     } else {
    635       partition_clear_voxels(args->part_dummy);
    636       res = voxelize_tetra(atm, args, args->part_dummy, cpnt);
    637       if(res != RES_OK) goto error;
    638       partition_accum(args->part, args->part_dummy);
    639     }
    640   } while(++cpnt < naerosols);
    641 
    642   /* The partition is not covered by any tetrahedron */
    643   if(part_is_empty) partition_empty(args->part);
    644 
    645 exit:
    646   return res;
    647 error:
    648   goto exit;
    649 }
    650 
    651 static res_T
    652 voxelize_batch(struct rnatm* atm, const struct voxelize_batch_args* args)
    653 {
    654   int64_t i;
    655   int progress = 0;
    656   ATOMIC nparts_voxelized;
    657   ATOMIC res = RES_OK;
    658   ASSERT(atm && check_voxelize_batch_args(atm, args) == RES_OK);
    659 
    660   pool_reset(args->pool); /* Reset the next partition id to 0 */
    661 
    662   /* #partitions already voxelized */
    663   nparts_voxelized = (ATOMIC)args->nparts_voxelized;
    664 
    665   /* Iterates over the partitions of the grid according to their Morton order
    666    * and voxelizes the tetrahedrons that overlap them */
    667   omp_set_num_threads((int)atm->nthreads);
    668   #pragma omp parallel for schedule(static, 1/*chunk size*/)
    669   for(i = 0; i < (int64_t)args->nparts_adjusted; ++i) {
    670     struct voxelize_args voxelize_args = VOXELIZE_ARGS_NULL;
    671     struct partition* part_dummy = NULL;
    672     struct darray_size_t* tetra_ids = NULL;
    673     struct partition* part = NULL;
    674     struct radcoefs* per_item_radcoefs = NULL;
    675     double part_low[3];
    676     double part_upp[3];
    677     uint32_t part_ids[3];
    678     size_t n;
    679     const int ithread = omp_get_thread_num();
    680     int pcent;
    681     res_T res_local = RES_OK;
    682 
    683     if(ATOMIC_GET(&res) != RES_OK) continue;
    684 
    685     /* Recover the batch partitions */
    686     part = pool_next_partition(args->pool);
    687     if(!part) {
    688       ATOMIC_SET(&res, RES_UNKNOWN_ERR);
    689       continue;
    690     };
    691 
    692     /* Is the current partition out of bounds of the atmosphere grid */
    693     morton_xyz_decode_u21((uint64_t)partition_get_id(part), part_ids);
    694     if(part_ids[0] >= args->nparts[0]
    695     || part_ids[1] >= args->nparts[1]
    696     || part_ids[2] >= args->nparts[2]) {
    697       partition_free(part);
    698       continue;
    699     }
    700 
    701     /* Compute the AABB of the partition */
    702     part_low[0] = (double)(part_ids[0] * args->part_def) * args->vxsz[0];
    703     part_low[1] = (double)(part_ids[1] * args->part_def) * args->vxsz[1];
    704     part_low[2] = (double)(part_ids[2] * args->part_def) * args->vxsz[2];
    705     part_low[0] = part_low[0] + args->atm_low[0];
    706     part_low[1] = part_low[1] + args->atm_low[1];
    707     part_low[2] = part_low[2] + args->atm_low[2];
    708     part_upp[0] = part_low[0] + (double)args->part_def * args->vxsz[0];
    709     part_upp[1] = part_low[1] + (double)args->part_def * args->vxsz[1];
    710     part_upp[2] = part_low[2] + (double)args->part_def * args->vxsz[2];
    711 
    712     /* Retrieves the array where to store the indices of tetrahedra that
    713      * overlap the partition */
    714     tetra_ids = darray_size_t_list_data_get(args->per_thread_tetra_list) + ithread;
    715 
    716     /* Retrieve the spectral item data structure used to store radiative
    717      * coeficients */
    718     per_item_radcoefs = darray_radcoef_list_data_get
    719       (args->per_thread_radcoef_list)[ithread];
    720 
    721     /* Obtain the dummy partition needed to temporarily store aerosol
    722      * voxelization prior to accumulation in the current partition */
    723     part_dummy = darray_partition_data_get
    724       (args->per_thread_dummy_partition)[ithread];
    725 
    726     /* Voxelizes the partition and once done, commits */
    727     voxelize_args.tetra_ids = tetra_ids;
    728     voxelize_args.part_def = args->part_def;
    729     d3_set(voxelize_args.part_low, part_low);
    730     d3_set(voxelize_args.part_upp, part_upp);
    731     d3_set(voxelize_args.vxsz, args->vxsz);
    732     voxelize_args.part = part;
    733     voxelize_args.part_dummy = part_dummy;
    734     voxelize_args.per_item_radcoefs = per_item_radcoefs;
    735     voxelize_args.accel_structs = args->accel_structs;
    736     voxelize_args.batch_size = args->batch_size;
    737     res_local = voxelize_partition(atm, &voxelize_args);
    738     if(res_local == RES_OK) {
    739       partition_commit(part, args->batch_size);
    740     } else {
    741       partition_free(part);
    742       ATOMIC_SET(&res, res_local);
    743       continue;
    744     };
    745 
    746     /* Update progress bar */
    747     n = (size_t)ATOMIC_INCR(&nparts_voxelized);
    748     pcent = (int)((n * 100) / args->nparts_overall);
    749     #pragma omp critical
    750     if(pcent > progress) {
    751       progress = pcent;
    752       log_info(atm, VOXELIZE_MSG, pcent);
    753     }
    754   }
    755   if(res != RES_OK) goto error;
    756 
    757 exit:
    758   return (res_T)res;
    759 error:
    760   goto exit;
    761 }
    762 
    763 static res_T
    764 voxelize_atmosphere
    765   (struct rnatm* atm,
    766    struct pool* pool,
    767    struct build_sync* sync)
    768 {
    769   /* Batch of spectral items to process in a single voxelization */
    770   struct voxelize_batch_args batch_args = VOXELIZE_BATCH_ARGS_NULL;
    771   size_t batch_size = 0;
    772   size_t nbatches = 0;
    773   size_t ibatch = 0;
    774 
    775   /* Working data structures */
    776   struct darray_size_t_list per_thread_tetra_list;
    777   struct darray_radcoef_list per_thread_radcoef_list;
    778   struct darray_partition per_thread_dummy_partition;
    779 
    780   /* Voxelization parameters */
    781   double atm_low[3];
    782   double atm_upp[3];
    783   double vxsz[3];
    784   size_t nparts[3]; /* #partitions along the 3 axis */
    785   size_t nparts_adjusted; /* #partitions allowing their indexing by morton id */
    786   size_t nparts_overall; /* Total number of voxelized partitions */
    787   size_t part_def; /* Definition of a partition */
    788 
    789   /* Miscellaneous variables */
    790   struct accel_struct* accel_structs = NULL;
    791   size_t naccel_structs = 0;
    792   size_t i = 0;
    793   res_T res = RES_OK;
    794 
    795   ASSERT(atm && pool && sync);
    796 
    797   darray_size_t_list_init(atm->allocator, &per_thread_tetra_list);
    798   darray_radcoef_list_init(atm->allocator, &per_thread_radcoef_list);
    799   darray_partition_init(atm->allocator, &per_thread_dummy_partition);
    800 
    801   /* Allocate the per thread lists */
    802   res = darray_size_t_list_resize(&per_thread_tetra_list, atm->nthreads);
    803   if(res != RES_OK) goto error;
    804   res = darray_radcoef_list_resize(&per_thread_radcoef_list, atm->nthreads);
    805   if(res != RES_OK) goto error;
    806   res = darray_partition_resize(&per_thread_dummy_partition, atm->nthreads);
    807   if(res != RES_OK) goto error;
    808 
    809   /* Setup the per thread and per item working data structures */
    810   batch_size = pool_get_voxel_width(pool);
    811   FOR_EACH(i, 0, atm->nthreads) {
    812     struct radcoefs* per_item_k = NULL;
    813     per_item_k = MEM_CALLOC(atm->allocator, batch_size, sizeof(*per_item_k));
    814     if(!per_item_k) { res = RES_MEM_ERR; goto error; }
    815     darray_radcoef_list_data_get(&per_thread_radcoef_list)[i] = per_item_k;
    816   }
    817 
    818   /* Setup the per thread dummy partition */
    819   FOR_EACH(i, 0, atm->nthreads) {
    820     struct partition* part = pool_dummy_partition(pool);
    821     darray_partition_data_get(&per_thread_dummy_partition)[i] = part;
    822   }
    823 
    824   /* Recover the AABB atmosphere and compute the size of a voxel */
    825   SUVM(volume_get_aabb(atm->gas.volume, atm_low, atm_upp));
    826   vxsz[0] = (atm_upp[0] - atm_low[0]) / (double)atm->grid_definition[0];
    827   vxsz[1] = (atm_upp[1] - atm_low[1]) / (double)atm->grid_definition[1];
    828   vxsz[2] = (atm_upp[2] - atm_low[2]) / (double)atm->grid_definition[2];
    829 
    830   /* Number of partitions required to cover the entire atmosphere grid */
    831   part_def = pool_get_partition_definition(pool);
    832   nparts[0] = (atm->grid_definition[0] + (part_def-1)/*ceil*/) / part_def;
    833   nparts[1] = (atm->grid_definition[1] + (part_def-1)/*ceil*/) / part_def;
    834   nparts[2] = (atm->grid_definition[2] + (part_def-1)/*ceil*/) / part_def;
    835 
    836   /* Adjust the #partitions allowing their indexing by their morton code */
    837   nparts_adjusted = MMAX(nparts[0], MMAX(nparts[1], nparts[2]));
    838   nparts_adjusted = round_up_pow2(nparts_adjusted);
    839   nparts_adjusted = nparts_adjusted * nparts_adjusted * nparts_adjusted;
    840 
    841   /* Calculate the number of batches required to build the accelerating
    842    * structures for the spectral bands submitted; currently we are building one
    843    * octree per quadrature point of each band. And each octree requires a
    844    * complete voxelization of the atmospheric meshes which takes time. To
    845    * amortize this cost without prohibitive increase in memory space, one fills
    846    * up to N octrees from a single voxelization of the mesh. The number of
    847    * batches corresponds to the total number of voxelization required */
    848   accel_structs = darray_accel_struct_data_get(&atm->accel_structs);
    849   naccel_structs = darray_accel_struct_size_get(&atm->accel_structs);
    850   nbatches = (naccel_structs + (batch_size - 1)/*ceil*/) / batch_size;
    851 
    852   /* Calculate the total number of partitions to voxelize. Note that the same
    853    * partition can be voxelized several times, once per batch */
    854   nparts_overall = nparts[0] * nparts[1] * nparts[2] * nbatches;
    855 
    856   /* Print the size of a voxel */
    857   log_info(atm, "voxel size = {%g, %g, %g}\n", SPLIT3(vxsz));
    858 
    859   /* Setup regular batch arguments */
    860   batch_args.per_thread_tetra_list = &per_thread_tetra_list;
    861   batch_args.per_thread_radcoef_list = &per_thread_radcoef_list;
    862   batch_args.per_thread_dummy_partition = &per_thread_dummy_partition;
    863   batch_args.pool = pool;
    864   batch_args.part_def = part_def;
    865   batch_args.nparts[0] = nparts[0];
    866   batch_args.nparts[1] = nparts[1];
    867   batch_args.nparts[2] = nparts[2];
    868   batch_args.nparts_adjusted = nparts_adjusted;
    869   batch_args.nparts_overall = nparts_overall;
    870   batch_args.nparts_voxelized = 0;
    871   d3_set(batch_args.atm_low, atm_low);
    872   d3_set(batch_args.atm_upp, atm_upp);
    873   d3_set(batch_args.vxsz, vxsz);
    874 
    875   /* Print voxelization status */
    876   log_info(atm, VOXELIZE_MSG, 0);
    877 
    878   /* Voxelize the batches */
    879   FOR_EACH(ibatch, 0, nbatches) {
    880     size_t item_range[2];
    881     item_range[0] = ibatch * batch_size;
    882     item_range[1] = MMIN(item_range[0] + batch_size, naccel_structs);
    883 
    884     batch_args.accel_structs = accel_structs + item_range[0];
    885     batch_args.batch_size = item_range[1] - item_range[0];
    886 
    887     /* Wait for the building thread to finish consuming the previous batch */
    888     mutex_lock(sync->mutex);
    889     if(sync->ibatch != ibatch) {
    890       ASSERT(sync->ibatch == ibatch - 1);
    891       cond_wait(sync->cond, sync->mutex);
    892       /* An error occured in the building thread */
    893       if(sync->ibatch != ibatch) res = RES_BAD_ARG;
    894     }
    895     mutex_unlock(sync->mutex);
    896     if(res != RES_OK) goto error;
    897 
    898     /* Generate the voxels of the current batch */
    899     res = voxelize_batch(atm, &batch_args);
    900     if(res != RES_OK) goto error;
    901 
    902     batch_args.nparts_voxelized += nparts[0]*nparts[1]*nparts[2];
    903   }
    904 
    905   /* Print final status message */
    906   log_info(atm, VOXELIZE_MSG"\n", 100);
    907 
    908 exit:
    909   FOR_EACH(i, 0, darray_radcoef_list_size_get(&per_thread_radcoef_list)) {
    910     struct radcoefs* per_item_k = NULL;
    911     per_item_k = darray_radcoef_list_data_get(&per_thread_radcoef_list)[i];
    912     MEM_RM(atm->allocator,per_item_k);
    913   }
    914   FOR_EACH(i, 0, darray_partition_size_get(&per_thread_dummy_partition)) {
    915     struct partition* part = NULL;
    916     part = darray_partition_data_get(&per_thread_dummy_partition)[i];
    917     partition_free(part);
    918   }
    919   darray_size_t_list_release(&per_thread_tetra_list);
    920   darray_radcoef_list_release(&per_thread_radcoef_list);
    921   darray_partition_release(&per_thread_dummy_partition);
    922   return res;
    923 error:
    924   goto exit;
    925 }
    926 
    927 static void
    928 vx_get(const size_t xyz[3], const uint64_t mcode, void* dst, void* context)
    929 {
    930   struct build_octree_context* ctx = context;
    931   const float* vx = NULL;
    932   uint64_t ivx, ipart;
    933   int log2_part_def;
    934   ASSERT(xyz && dst && ctx);
    935   (void)xyz;
    936 
    937   /* Retrieve the partition's morton id and the voxel's morton id in the
    938    * partition from the morton code of the voxel in the whole octree. Note
    939    * that, like voxels, partitions are sorted in morton order. Therefore, the
    940    * partition's morton id is encoded in the most significant bits of the
    941    * voxel's morton code, while the voxel's morton id in the partition is
    942    * stored in its least significant bits */
    943   log2_part_def = log2i((int)pool_get_partition_definition(ctx->pool));
    944   ipart = (mcode >> (log2_part_def*3));
    945   ivx = (mcode & (BIT_U64(log2_part_def*3)-1));
    946 
    947   /* Recover the partition storing the voxel */
    948   if(ctx->part == NULL || partition_get_id(ctx->part) != ipart) {
    949     if(ctx->part) partition_free(ctx->part);
    950     ctx->part = pool_fetch_partition(ctx->pool, ipart);
    951 
    952     if(ctx->part == NULL) { /* An error occurs */
    953       memset(dst, 0, NFLOATS_PER_VOXEL * sizeof(float));
    954       return;
    955     }
    956   }
    957 
    958   vx = partition_cget_voxel(ctx->part, ivx, ctx->iitem);
    959   memcpy(dst, vx, NFLOATS_PER_VOXEL * sizeof(float));
    960 }
    961 
    962 static void
    963 vx_merge(void* dst, const void* vxs[], const size_t nvxs, void* ctx)
    964 {
    965   float ka_min = FLT_MAX, ka_max = -FLT_MAX;
    966   float ks_min = FLT_MAX, ks_max = -FLT_MAX;
    967   float* merged_vx = dst;
    968   size_t ivx;
    969   ASSERT(dst && vxs && nvxs);
    970   (void)ctx;
    971 
    972   FOR_EACH(ivx, 0, nvxs) {
    973     const float* vx = vxs[ivx];
    974     ka_min = MMIN(ka_min, vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MIN)]);
    975     ka_max = MMAX(ka_max, vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MAX)]);
    976     ks_min = MMIN(ks_min, vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MIN)]);
    977     ks_max = MMAX(ks_max, vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MAX)]);
    978   }
    979 
    980   merged_vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MIN)] = ka_min;
    981   merged_vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MAX)] = ka_max;
    982   merged_vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MIN)] = ks_min;
    983   merged_vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MAX)] = ks_max;
    984 }
    985 
    986 static int
    987 vx_challenge_merge
    988   (const struct svx_voxel vxs[],
    989    const size_t nvxs,
    990    void* context)
    991 {
    992   const struct build_octree_context* ctx = context;
    993   double low[3] = { DBL_MAX, DBL_MAX, DBL_MAX};
    994   double upp[3] = {-DBL_MAX,-DBL_MAX,-DBL_MAX};
    995   double tau;
    996   float kext_min = FLT_MAX, kext_max = -FLT_MAX;
    997   double sz_max;
    998   size_t ivx;
    999   ASSERT(vxs && nvxs && context);
   1000 
   1001   /* Compute the range of the extinction coefficients of the submitted voxels */
   1002   FOR_EACH(ivx, 0, nvxs) {
   1003     const float* vx = vxs[ivx].data;
   1004     const float ka_min = vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MIN)];
   1005     const float ka_max = vx[voxel_idata(RNATM_RADCOEF_Ka, RNATM_SVX_OP_MAX)];
   1006     const float ks_min = vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MIN)];
   1007     const float ks_max = vx[voxel_idata(RNATM_RADCOEF_Ks, RNATM_SVX_OP_MAX)];
   1008     if(ka_min > ka_max) { ASSERT(ks_min > ks_max); continue; } /* Empty voxel */
   1009     kext_min = MMIN(kext_min, ka_min + ks_min);
   1010     kext_max = MMAX(kext_max, ka_max + ks_max);
   1011   }
   1012 
   1013   /* Always merge empty voxels */
   1014   if(kext_min > kext_max)  return 1;
   1015 
   1016   /* Compute the AABB of the submitted voxels */
   1017   FOR_EACH(ivx, 0, nvxs) {
   1018     low[0] = MMIN(vxs[ivx].lower[0], low[0]);
   1019     low[1] = MMIN(vxs[ivx].lower[1], low[1]);
   1020     low[2] = MMIN(vxs[ivx].lower[2], low[2]);
   1021     upp[0] = MMAX(vxs[ivx].upper[0], upp[0]);
   1022     upp[1] = MMAX(vxs[ivx].upper[1], upp[1]);
   1023     upp[2] = MMAX(vxs[ivx].upper[2], upp[2]);
   1024   }
   1025 
   1026   /* Compute the size of the largest dimension of the AABB */
   1027   sz_max = upp[0] - low[0];
   1028   sz_max = MMAX(upp[1] - low[1], sz_max);
   1029   sz_max = MMAX(upp[2] - low[2], sz_max);
   1030 
   1031   /* Check if voxels can be merged by comparing the optical thickness along the
   1032    * maximum size of the merged AABB against a user-defined threshold */
   1033   tau = (kext_max - kext_min) * sz_max;
   1034   return tau < ctx->tau_threshold;
   1035 }
   1036 
   1037 static res_T
   1038 init_octrees_storage(struct rnatm* atm, const struct rnatm_create_args* args)
   1039 {
   1040   struct octrees_storage_desc challenge = OCTREES_STORAGE_DESC_NULL;
   1041   struct octrees_storage_desc reference = OCTREES_STORAGE_DESC_NULL;
   1042   res_T res = RES_OK;
   1043   ASSERT(atm && args);
   1044 
   1045   if(!args->octrees_storage) goto exit;
   1046 
   1047   /* Register the storage file */
   1048   atm->octrees_storage = args->octrees_storage;
   1049 
   1050   res = octrees_storage_desc_init(atm, &challenge);
   1051   if(res != RES_OK) goto error;
   1052 
   1053   /* Save the computed descriptor in the storage */
   1054   if(!args->load_octrees_from_storage) {
   1055     res = write_octrees_storage_desc(atm, &challenge, atm->octrees_storage);
   1056     if(res != RES_OK) goto error;
   1057 
   1058   /* The octrees must be loaded from storage. Verify that the data and settings
   1059    * used by the saved octrees are compatibles with the current parameters */
   1060   } else {
   1061 
   1062     res = octrees_storage_desc_init_from_stream
   1063       (atm, &reference, args->octrees_storage);
   1064     if(res != RES_OK) goto error;
   1065 
   1066     res = check_octrees_storage_compatibility(&reference, &challenge);
   1067     if(res != RES_OK) {
   1068       log_err(atm,
   1069         "unable to load octrees from the supplied storage. The saved "
   1070         "octrees are built from different data\n");
   1071       res = RES_BAD_ARG;
   1072       goto error;
   1073     }
   1074   }
   1075 
   1076 exit:
   1077   octrees_storage_desc_release(&challenge);
   1078   octrees_storage_desc_release(&reference);
   1079   return res;
   1080 error:
   1081   goto exit;
   1082 }
   1083 
   1084 static res_T
   1085 create_storage_toc(struct rnatm* atm, fpos_t* toc)
   1086 {
   1087   int err = 0;
   1088   res_T res = RES_OK;
   1089   ASSERT(atm && toc);
   1090 
   1091   if(!atm->octrees_storage) goto exit;
   1092 
   1093   err = fgetpos(atm->octrees_storage, toc);
   1094   if(err != 0) {
   1095     log_err(atm, "error retrieving toc position of octrees storage -- %s\n",
   1096       strerror(errno));
   1097     res = RES_IO_ERR;
   1098     goto error;
   1099   }
   1100 
   1101   res = reserve_octrees_storage_toc(atm, atm->octrees_storage);
   1102   if(res != RES_OK) goto error;
   1103 
   1104 exit:
   1105   return res;
   1106 error:
   1107   log_err(atm, "error reserving octree storage header -- %s\n",
   1108     strerror(errno));
   1109   goto exit;
   1110 }
   1111 
   1112 static res_T
   1113 offload_octrees
   1114   (struct rnatm* atm,
   1115    const size_t ioctrees[2]) /* Octrees to write. Limits are inclusive */
   1116 {
   1117   size_t ioctree = 0;
   1118   res_T res = RES_OK;
   1119   ASSERT(atm && ioctrees && ioctrees[0] <= ioctrees[1]);
   1120   ASSERT(ioctrees[1] < darray_accel_struct_size_get(&atm->accel_structs));
   1121 
   1122   /* No storage: nothing to do */
   1123   if(!atm->octrees_storage) goto exit;
   1124 
   1125   res = store_octrees(atm, ioctrees, atm->octrees_storage);
   1126   if(res != RES_OK) goto error;
   1127 
   1128   FOR_EACH(ioctree, ioctrees[0], ioctrees[1]+1) {
   1129     unload_octree(atm, ioctree); /* Free the octree memory */
   1130   }
   1131 
   1132 exit:
   1133   return res;
   1134 error:
   1135   goto exit;
   1136 }
   1137 
   1138 static res_T
   1139 finalize_storage(struct rnatm* atm, const fpos_t* toc)
   1140 {
   1141   int err = 0;
   1142   res_T res = RES_OK;
   1143   ASSERT(atm && toc);
   1144 
   1145   /* No storage: nothing to do */
   1146   if(!atm->octrees_storage) goto exit;
   1147 
   1148   err = fsetpos(atm->octrees_storage, toc);
   1149   if(err != 0) {
   1150     log_err(atm, "error positioning on octree storage toc -- %s\n",
   1151       strerror(errno));
   1152     res = RES_IO_ERR;
   1153     goto error;
   1154   }
   1155 
   1156   /* Update the table of content */
   1157   res = write_octrees_storage_toc(atm, atm->octrees_storage);
   1158   if(res != RES_OK) goto exit;
   1159 
   1160   CHK(fflush(atm->octrees_storage) == 0);
   1161 
   1162 exit:
   1163   return res;
   1164 error:
   1165   goto exit;
   1166 }
   1167 
   1168 static res_T
   1169 build_octrees
   1170   (struct rnatm* atm,
   1171    const struct rnatm_create_args* args,
   1172    struct pool* pool,
   1173    struct build_sync* sync)
   1174 {
   1175   fpos_t storage_toc;
   1176   struct accel_struct* accel_structs = NULL;
   1177   double low[3], upp[3];
   1178   size_t def[3];
   1179   size_t istruct;
   1180   size_t naccel_structs;
   1181   size_t voxel_width;
   1182   ATOMIC res = RES_OK;
   1183   ASSERT(atm && args && pool);
   1184 
   1185   /* Recover the AABB from the atmosphere. Note that we have already made sure
   1186    * when setting up the meshes of the gas and aerosols that the aerosols are
   1187    * included in the gas. Therefore, the AABB of the gas is the same as the
   1188    * AABB of the atmosphere */
   1189   SUVM(volume_get_aabb(atm->gas.volume, low, upp));
   1190 
   1191   /* Retrieve grid definition */
   1192   def[0] = (size_t)atm->grid_definition[0];
   1193   def[1] = (size_t)atm->grid_definition[1];
   1194   def[2] = (size_t)atm->grid_definition[2];
   1195 
   1196   accel_structs = darray_accel_struct_data_get(&atm->accel_structs);
   1197   naccel_structs = darray_accel_struct_size_get(&atm->accel_structs);
   1198   voxel_width = pool_get_voxel_width(pool);
   1199 
   1200   /* Setup the disk storage of the octrees */
   1201   res = create_storage_toc(atm, &storage_toc);
   1202   if(res != RES_OK) goto error;
   1203 
   1204   /* Build the octrees. Each thread consumes an element of the voxels generated
   1205    * by the voxelization thread, each element corresponding to the voxel of an
   1206    * octree to be constructed. By fixing the number of threads to the width of
   1207    * the voxel, we therefore build `voxel_width' octrees in parallel from a
   1208    * single voxelization of the atmospheric meshes */
   1209   for(istruct = 0; istruct < naccel_structs; istruct += voxel_width) {
   1210     const size_t batch_size = MMIN(voxel_width, naccel_structs - istruct);
   1211     size_t ioctrees[2];
   1212     omp_set_num_threads((int)batch_size);
   1213 
   1214     /* Note that we are using a parallel block rather than a parallel loop in
   1215      * order to add an implicit barrier after a batch has been fully consumed.
   1216      * This is necessary to prevent a thread from consuming voxels from the
   1217      * previous batch */
   1218     #pragma omp parallel
   1219     {
   1220       struct build_octree_context ctx = BUILD_OCTREE_CONTEXT_NULL;
   1221       struct svx_voxel_desc vx_desc = SVX_VOXEL_DESC_NULL;
   1222       struct svx_tree* octree = NULL;
   1223       const int ithread = omp_get_thread_num();
   1224       const size_t istruct_curr = (size_t)ithread + istruct;
   1225       res_T res_local = RES_OK;
   1226 
   1227       /* Setup the build context */
   1228       ctx.pool = pool;
   1229       ctx.part = NULL;
   1230       ctx.iitem = (size_t)ithread;
   1231       ctx.tau_threshold = args->optical_thickness;
   1232 
   1233       /* Setup the voxel descriptor */
   1234       vx_desc.get = vx_get;
   1235       vx_desc.merge = vx_merge;
   1236       vx_desc.challenge_merge = vx_challenge_merge;
   1237       vx_desc.context = &ctx;
   1238       vx_desc.size = NFLOATS_PER_VOXEL * sizeof(float);
   1239 
   1240       res_local = svx_octree_create(atm->svx, low, upp, def, &vx_desc, &octree);
   1241       if(ctx.part) partition_free(ctx.part);
   1242       if(res_local != RES_OK) {
   1243         ATOMIC_SET(&res, res_local);
   1244       } else { /* Register the built octree */
   1245         accel_structs[istruct_curr].octree = octree;
   1246       }
   1247     }
   1248     if(res != RES_OK) goto error;
   1249 
   1250     /* Signal the voxelization thread to generate the next batch */
   1251     mutex_lock(sync->mutex);
   1252     sync->ibatch += 1;
   1253     mutex_unlock(sync->mutex);
   1254     cond_signal(sync->cond);
   1255 
   1256     /* Offload builded octrees */
   1257     ioctrees[0] = istruct;
   1258     ioctrees[1] = istruct + batch_size - 1;
   1259     res = offload_octrees(atm, ioctrees);
   1260     if(res != RES_OK) goto error;
   1261   }
   1262 
   1263   /* Finalize the file where octrees are offloaded */
   1264   res = finalize_storage(atm, &storage_toc);
   1265   if(res != RES_OK) goto error;
   1266 
   1267 exit:
   1268   return (res_T)res;
   1269 error:
   1270   /* Signal to the voxelization thread that there is no need to wait for the
   1271    * build thread */
   1272   cond_signal(sync->cond);
   1273   darray_accel_struct_clear(&atm->accel_structs);
   1274   goto exit;
   1275 }
   1276 
   1277 static res_T
   1278 create_pool(struct rnatm* atm, struct pool** out_pool, const size_t nbands)
   1279 {
   1280   /* Empirical constant that defines the number of non empty partitions to
   1281    * pre-allocate per thread to avoid contension between the thread building the
   1282    * octrees from the partitions and the threads that fill these partitions */
   1283   const size_t NPARTITIONS_PER_THREAD = 64;
   1284 
   1285   /* Number of dummy partitions per thread. These partitions are used to
   1286    * temporarily store the voxelization of aerosols that are then accumulated
   1287    * in the voxelized partition. Actually, one such partition per voxelization
   1288    * thread is required */
   1289   size_t NPARTITIONS_PER_THREAD_DUMMY = 1;
   1290 
   1291   /* Empirical constant that defines the total number of partitions to
   1292    * pre-allocate per thread. This constant is necessarily greater than or
   1293    * equal to (NPARTITIONS_PER_THREAD + NPARTITIONS_PER_THREAD_DUMMY), the
   1294    * difference representing the number of completely empty partitions. Such
   1295    * partitions help reduce thread contention with a sparse grid without
   1296    * significantly increasing memory usage */
   1297   const size_t OVERALL_NPARTITIONS_PER_THREAD =
   1298     NPARTITIONS_PER_THREAD * 64
   1299   + NPARTITIONS_PER_THREAD_DUMMY;
   1300 
   1301   /* Number of items stored per voxel data. A width greater than 1 makes it
   1302    * possible to store by partition the radiative coefficients of several
   1303    * spectral data. The goal is to voxelize the volumetric mesh once for N
   1304    * spectral data */
   1305   const size_t VOXEL_WIDTH = MMIN(4, nbands);
   1306 
   1307   /* Definition of a partition on the 3 axis */
   1308   const size_t PARTITION_DEFINITION = 8;
   1309 
   1310   struct pool_create_args pool_args = POOL_CREATE_ARGS_DEFAULT;
   1311   struct pool* pool = NULL;
   1312   res_T res = RES_OK;
   1313   ASSERT(atm && out_pool);
   1314 
   1315   /* Create the partition pool */
   1316   pool_args.npartitions = atm->nthreads * OVERALL_NPARTITIONS_PER_THREAD;
   1317   pool_args.npreallocated_partitions =  atm->nthreads * NPARTITIONS_PER_THREAD;
   1318   pool_args.partition_definition = PARTITION_DEFINITION;
   1319   pool_args.voxel_width = VOXEL_WIDTH;
   1320   pool_args.allocator = atm->allocator;
   1321   res = pool_create(&pool_args, &pool);
   1322   if(res != RES_OK) goto error;
   1323 
   1324 exit:
   1325   *out_pool = pool;
   1326   return res;
   1327 error:
   1328   log_err(atm, "failed to create the partition pool -- %s\n",
   1329     res_to_cstr(res));
   1330   if(pool) { pool_ref_put(pool); pool = NULL; }
   1331   goto exit;
   1332 }
   1333 
   1334 static res_T
   1335 create_octrees(struct rnatm* atm, const struct rnatm_create_args* args)
   1336 {
   1337   struct build_sync sync = BUILD_SYNC_NULL;
   1338   struct pool* pool = NULL;
   1339   size_t nbands = 0;
   1340 
   1341   ATOMIC res = RES_OK;
   1342   ASSERT(atm);
   1343 
   1344   nbands = darray_band_size_get(&atm->bands);
   1345 
   1346   res = create_pool(atm, &pool, nbands);
   1347   if(res != RES_OK) goto error;
   1348   res = build_sync_init(&sync);
   1349   if(res != RES_OK) goto error;
   1350 
   1351   log_info(atm,
   1352     "partitionning of radiative properties "
   1353     "(grid definition = %ux%ux%u; #octrees = %lu)\n",
   1354     SPLIT3(atm->grid_definition),
   1355     (unsigned long)darray_accel_struct_size_get(&atm->accel_structs));
   1356 
   1357   /* Enable nested threads to allow multi-threading when voxelizing tetrahedra
   1358    * and building octrees */
   1359   omp_set_nested(1);
   1360 
   1361   #pragma omp parallel sections num_threads(2)
   1362   {
   1363     #pragma omp section
   1364     {
   1365       const res_T res_local = voxelize_atmosphere(atm, pool, &sync);
   1366       if(res_local != RES_OK) {
   1367         log_err(atm, "error when voxelizing the atmosphere -- %s\n",
   1368           res_to_cstr((res_T)res_local));
   1369         pool_invalidate(pool);
   1370         ATOMIC_SET(&res, res_local);
   1371       }
   1372     }
   1373 
   1374     #pragma omp section
   1375     {
   1376       const res_T res_local = build_octrees(atm, args, pool, &sync);
   1377       if(res_local != RES_OK) {
   1378         log_err(atm, "error building octrees -- %s\n",
   1379           res_to_cstr((res_T)res_local));
   1380         pool_invalidate(pool);
   1381         ATOMIC_SET(&res, res_local);
   1382       }
   1383     }
   1384   }
   1385   if(res != RES_OK) goto error;
   1386 
   1387 exit:
   1388   build_sync_release(&sync);
   1389   if(pool) pool_ref_put(pool);
   1390   return (res_T)res;
   1391 error:
   1392   goto exit;
   1393 }
   1394 
   1395 static INLINE res_T
   1396 load_octrees(struct rnatm* atm)
   1397 {
   1398   res_T res = RES_OK;
   1399   ASSERT(atm && atm->octrees_storage);
   1400 
   1401   /* Only load the storage table of content. The octrees will be loaded on
   1402    * demand */
   1403   res = read_octrees_storage_toc(atm, atm->octrees_storage);
   1404   if(res != RES_OK) goto error;
   1405 
   1406 exit:
   1407   return res;
   1408 error:
   1409   goto exit;
   1410 }
   1411 
   1412 /*******************************************************************************
   1413  * Exported functions
   1414  ******************************************************************************/
   1415 res_T
   1416 rnatm_trace_ray
   1417   (struct rnatm* atm,
   1418    const struct rnatm_trace_ray_args* args,
   1419    struct svx_hit* hit)
   1420 {
   1421   const struct accel_struct* accel_struct = NULL;
   1422   size_t i;
   1423   size_t iaccel;
   1424   res_T res = RES_OK;
   1425 
   1426   if(!atm || !hit) { res = RES_BAD_ARG; goto error; }
   1427   res = check_trace_ray_args(atm, args);
   1428   if(res != RES_OK) goto error;
   1429 
   1430   /* Start calculating the id of the acceleration structure to consider */
   1431   i = args->iband - darray_band_cdata_get(&atm->bands)[0].index;
   1432   ASSERT(i < darray_band_size_get(&atm->bands));
   1433   ASSERT(args->iband == darray_band_cdata_get(&atm->bands)[i].index);
   1434 
   1435   /* Invalid quadrature point index */
   1436   if(args->iquad >= darray_band_cdata_get(&atm->bands)[i].nquad_pts)
   1437     return RES_BAD_ARG;
   1438 
   1439   /*  Find the id of the acceleration structure */
   1440   iaccel = darray_band_cdata_get(&atm->bands)[i].offset_accel_struct + args->iquad;
   1441 
   1442   /* Retrieve the acceleration structure */
   1443   ASSERT(i < darray_accel_struct_size_get(&atm->accel_structs));
   1444   accel_struct = darray_accel_struct_cdata_get(&atm->accel_structs) + iaccel;
   1445   ASSERT(args->iband == accel_struct->iband);
   1446   ASSERT(args->iquad == accel_struct->iquad_pt);
   1447 
   1448   res = make_sure_octree_is_loaded(atm, iaccel);
   1449   if(res != RES_OK) goto error;
   1450 
   1451   *hit = SVX_HIT_NULL;
   1452 
   1453   res = svx_tree_trace_ray
   1454     (accel_struct->octree,
   1455      args->ray_org,
   1456      args->ray_dir,
   1457      args->ray_range,
   1458      args->challenge,
   1459      args->filter,
   1460      args->context,
   1461      hit);
   1462   if(res != RES_OK) {
   1463     log_err(atm,
   1464       "%s: error tracing ray "
   1465       "(origin = %g, %g, %g; direction = %g, %g, %g; range = %g, %g) -- %s\n",
   1466       FUNC_NAME, SPLIT3(args->ray_org), SPLIT3(args->ray_dir),
   1467       SPLIT2(args->ray_range), res_to_cstr(res));
   1468     goto error;
   1469   }
   1470 
   1471 exit:
   1472   return res;
   1473 error:
   1474   goto exit;
   1475 }
   1476 
   1477 /*******************************************************************************
   1478  * Local functions
   1479  ******************************************************************************/
   1480 res_T
   1481 setup_octrees(struct rnatm* atm, const struct rnatm_create_args* args)
   1482 {
   1483   char buf[128];
   1484   struct time t0, t1;
   1485   size_t sz;
   1486   res_T res = RES_OK;
   1487   ASSERT(atm && args);
   1488 
   1489   /* Start time recording */
   1490   time_current(&t0);
   1491 
   1492   /* Create the Star-VoXel device */
   1493   res = svx_device_create
   1494     (atm->logger, &atm->svx_allocator, atm->verbose, &atm->svx);
   1495   if(res != RES_OK) goto error;
   1496 
   1497   /* Setup miscellaneous parameters */
   1498   atm->optical_thickness = args->optical_thickness;
   1499 
   1500   res = compute_grid_definition(atm, args);
   1501   if(res != RES_OK) goto error;
   1502 
   1503   /* Initialize storage *after* calculating the grid definition since it is
   1504    * saved in the storage file */
   1505   res = init_octrees_storage(atm, args);
   1506   if(res != RES_OK) goto error;
   1507 
   1508   res = setup_accel_structs(atm);
   1509   if(res != RES_OK) goto error;
   1510 
   1511   if(args->load_octrees_from_storage) {
   1512     res = load_octrees(atm);
   1513     if(res != RES_OK) goto error;
   1514   } else {
   1515     res = create_octrees(atm, args);
   1516     if(res != RES_OK) goto error;
   1517   }
   1518 
   1519   /* Log elapsed time */
   1520   time_sub(&t0, time_current(&t1), &t0);
   1521   time_dump(&t0, TIME_ALL, NULL, buf, sizeof(buf));
   1522   log_info(atm, "acceleration structures built in %s\n", buf);
   1523 
   1524   /* Log memory space used by Star-VX */
   1525   sz = MEM_ALLOCATED_SIZE(&atm->svx_allocator);
   1526   size_to_cstr(sz, SIZE_ALL, NULL, buf, sizeof(buf));
   1527   log_info(atm, "Star-VoXel memory space: %s\n", buf);
   1528 
   1529 exit:
   1530   return res;
   1531 error:
   1532   goto exit;
   1533 }
	rnatm Load and structure data describing an atmosphere
	git clone git://git.meso-star.fr/rnatm.git
	Log \| Files \| Refs \| README \| LICENSE