star-line

Structure for accelerating line importance sampling
git clone git://git.meso-star.fr/star-line.git
Log | Files | Refs | README | LICENSE

sln_tree_build.c (52963B)


      1 /* Copyright (C) 2022, 2026 |Méso|Star> (contact@meso-star.com)
      2  * Copyright (C) 2026 Université de Lorraine
      3  * Copyright (C) 2022 Centre National de la Recherche Scientifique
      4  * Copyright (C) 2022 Université Paul Sabatier
      5  *
      6  * This program is free software: you can redistribute it and/or modify
      7  * it under the terms of the GNU General Public License as published by
      8  * the Free Software Foundation, either version 3 of the License, or
      9  * (at your option) any later version.
     10  *
     11  * This program is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     14  * GNU General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU General Public License
     17  * along with this program. If not, see <http://www.gnu.org/licenses/>. */
     18 
     19 #include "sln.h"
     20 #include "sln_device_c.h"
     21 #include "sln_polyline.h"
     22 #include "sln_tree_c.h"
     23 
     24 #include <star/shtr.h>
     25 
     26 #include <rsys/cstr.h>
     27 #include <rsys/math.h>
     28 
     29 #include <omp.h>
     30 
     31 /* Structure used to track the progress of a construction phase */
     32 struct progress {
     33   /* Absolute progression */
     34   size_t total; /* Total number of items to process */
     35   size_t processed; /* Number of items already processed */
     36 
     37   /* Relative progression */
     38   int pcent; /* Currently displayed percentage */
     39   int pcent_step; /* Increment between displayed percentages */
     40 
     41   const char* msg; /* Message to add before the displayed percentage */
     42 };
     43 #define PROGRESS_DEFAULT__ {0,0,0,10,NULL}
     44 static const struct progress PROGRESS_DEFAULT = PROGRESS_DEFAULT__;
     45 
     46 /* Structure containing temporary variables used to create polyline on a leaf
     47  * These variables are not declared locally within the function responsible for
     48  * constructing this polyline in order to avoid having to allocate and
     49  * deallocate them with each call to the function. Instead, they are passed as
     50  * function inputs so as to share, as much as possible, the memory space
     51  * allocated during previous calls to the function, thereby reducing the cost of
     52  * allocation and deallocation. */
     53 struct build_leaf_polyline_scratch {
     54   struct darray_vertex vertices; /* Polyline vertices */
     55   struct darray_node nodes; /* Dummy leaf nodes */
     56 
     57   /* Temp vertex buffer used when merging polylines into a single one */
     58   struct darray_vertex vertices_tmp;
     59 };
     60 
     61 static res_T
     62 merge_child_polylines
     63   (struct sln_tree* tree,
     64    const size_t inode, /* Node at which child polylines are merged */
     65    const unsigned nchildren, /* #children to merge */
     66    struct darray_node* nodes, /* List of nodes */
     67    struct darray_vertex* vertices); /* List of polyline vertices */
     68 
     69 static res_T
     70 collapse_child_polylines
     71   (struct sln_tree* tree,
     72    const size_t inode, /* Node at which child polylines are merged */
     73    const unsigned nchildren, /* #children to collapse */
     74    struct darray_node* nodes, /* List of nodes */
     75    struct darray_vertex* scratch, /* Temporary buffer of vertices */
     76    struct darray_vertex* vertices); /* List of polyline vertices */
     77 
     78 /* A multiplier to be applied to the calculated ka values in order to mitigate
     79  * numerical issues related to ka interpolation in the case of a tree with an
     80  * upper bound. This interpolation must ensure that the interpolated ka value is
     81  * well above the actual ka value, which may not be the case due to numerical
     82  * inaccuracies. Hence this constant, which defines a percentage of ka to be
     83  * added to the calculated ka values to ensure that any interpolated value is
     84  * well above the actual ka value. */
     85 #define KA_ADJUSTEMENT 1.01
     86 
     87 /* Maximum queue size used for the breadth-first tree traversal. For a perfectly
     88  * balanced tree, this corresponds to the maximum number of leaves a tree can
     89  * have. Note that this value does not need to be too high, since the
     90  * breadth-first traversal is only used to partition the first few levels of the
     91  * tree until a sufficient number of subtrees have been identified so that they
     92  * can then be constructed in parallel using a depth-first algorithm */
     93 #define QUEUE_SIZE_MAX 256
     94 
     95 /*******************************************************************************
     96  * Helper functions
     97  ******************************************************************************/
     98 static FINLINE uint32_t
     99 ui64_to_ui32(const uint64_t ui64)
    100 {
    101   if(ui64 > UINT32_MAX)
    102     VFATAL("%s: overflow %lu.\n", ARG2(FUNC_NAME, ((unsigned long)ui64)));
    103   return (uint32_t)ui64;
    104 }
    105 
    106 static FINLINE unsigned
    107 size_t_to_unsigned(const size_t sz)
    108 {
    109   if(sz > UINT_MAX)
    110     VFATAL("%s: overflow %lu.\n", ARG2(FUNC_NAME, ((unsigned long)sz)));
    111   return (unsigned)sz;
    112 }
    113 
    114 static void
    115 progress_print(struct sln_device* dev, const struct progress* progress)
    116 {
    117   ASSERT(dev && progress);
    118   if(progress->msg) {
    119     INFO(dev, "%s%3d%%\n", progress->msg, progress->pcent);
    120   } else {
    121     INFO(dev, "%3d%%\n", progress->pcent);
    122   }
    123 }
    124 
    125 static void
    126 progress_update
    127   (struct sln_device* dev,
    128    struct progress* progress,
    129    const size_t count)
    130 {
    131   int pcent = 0;
    132   ASSERT(dev && progress);
    133 
    134   #pragma omp critical
    135   {
    136     progress->processed += count;
    137     ASSERT(progress->processed <= progress->total);
    138 
    139     pcent = (int)
    140       ( (double)progress->processed*100.0
    141       / (double)progress->total
    142       + 0.5 /* round */);
    143 
    144     if(pcent/progress->pcent_step > progress->pcent/progress->pcent_step) {
    145       progress->pcent = pcent;
    146       progress_print(dev, progress);
    147     }
    148   }
    149 }
    150 
    151 static INLINE void
    152 build_leaf_polyline_scratch_init
    153   (struct mem_allocator* allocator,
    154    struct build_leaf_polyline_scratch* scratch)
    155 {
    156   ASSERT(scratch);
    157   darray_vertex_init(allocator, &scratch->vertices);
    158   darray_vertex_init(allocator, &scratch->vertices_tmp);
    159   darray_node_init(allocator, &scratch->nodes);
    160 }
    161 
    162 static INLINE void
    163 build_leaf_polyline_scratch_release
    164   (struct build_leaf_polyline_scratch* scratch)
    165 {
    166   ASSERT(scratch);
    167   darray_vertex_release(&scratch->vertices);
    168   darray_vertex_release(&scratch->vertices_tmp);
    169   darray_node_release(&scratch->nodes);
    170 }
    171 
    172 static INLINE res_T
    173 build_leaf_polyline_from_1line
    174   (struct sln_tree* tree,
    175    struct sln_node* leaf,
    176    struct darray_vertex* vertices)
    177 {
    178   size_t vertices_range[2];
    179   res_T res = RES_OK;
    180 
    181   ASSERT(tree && leaf && vertices);
    182 
    183   /* Assume that there is only one line per leaf */
    184   ASSERT(leaf->range[0] == leaf->range[1]);
    185 
    186   /* Line meshing */
    187   res = line_mesh
    188     (/* in */  tree, leaf->range[0], tree->args.nvertices_hint,
    189      /* out */ vertices, vertices_range);
    190   if(res != RES_OK) goto error;
    191 
    192   /* Decimate the line mesh */
    193   res = polyline_decimate(tree->sln, darray_vertex_data_get(vertices),
    194      vertices_range, (float)tree->args.mesh_decimation_err, tree->args.mesh_type);
    195   if(res != RES_OK) goto error;
    196 
    197   /* Shrink the size of the vertices */
    198   darray_vertex_resize(vertices, vertices_range[1] + 1);
    199 
    200   /* Setup the leaf polyline  */
    201   leaf->ivertex = vertices_range[0];
    202   leaf->nvertices = ui64_to_ui32(vertices_range[1] - vertices_range[0] + 1);
    203 
    204 exit:
    205   return res;
    206 error:
    207   goto exit;
    208 }
    209 
    210 /* Build the polyline of a leaf node containing more than one line.
    211  *
    212  * Each line is drawn as if it were the only one on the leaf. This allows the
    213  * line meshing routine to be used, and consequently the algorithm that relies
    214  * on the properties of the lines (symmetry, monotonicity on one half). Next,
    215  * this set of polylines is merged/collapsed into a single polyline, which is
    216  * the leaf polyline.
    217  *
    218  * To reuse the merging/collapsing designed for internal nodes, the function
    219  * treats a leaf as if it were an internal node. A list of temporary nodes is
    220  * thus created, containing not only the leaf node but also its “children”
    221  * i.e., its lines.
    222  *
    223  * Once created, the leaf’s polyline is finally copied into the tree’s vertex
    224  * buffer */
    225 static INLINE res_T
    226 build_leaf_polyline_from_Nlines
    227   (struct sln_tree* tree,
    228    struct sln_node* leaf,
    229    struct build_leaf_polyline_scratch* scratch,
    230    struct darray_vertex* vertices)
    231 {
    232   const struct sln_vertex* src = NULL;
    233   struct sln_vertex* dst = NULL;
    234   size_t memsz = 0;
    235 
    236   size_t nnodes = 0;
    237   unsigned nlines = 0; /* Number of lines in the leaf */
    238   unsigned i = 0;
    239   res_T res = RES_OK;
    240 
    241   ASSERT(tree && leaf && scratch && vertices);
    242 
    243   /* Compute the number of lines of the leaf */
    244   nlines = size_t_to_unsigned(leaf->range[1] - leaf->range[0] + 1/*inclusive*/);
    245   ASSERT(nlines > 1); /* Assume there is more than one line per leaf */
    246 
    247   nnodes = nlines + 1/*leaf*/;
    248 
    249   /* Helper macro */
    250   #define NODE(Id) (darray_node_data_get(&scratch->nodes) + (Id))
    251 
    252   /* Allocate the temporary list of nodes, one node per leaf to which on
    253    * additionnal node is added for the leaf */
    254   res = darray_node_resize(&scratch->nodes, nnodes);
    255   if(res != RES_OK) goto error;
    256   memset(NODE(0), 0, sizeof(struct sln_node)*nnodes);
    257 
    258   /* Clean up the temporary list of vertices */
    259   darray_vertex_clear(&scratch->vertices);
    260 
    261   /* The leaf node will be the first one. Its "child" representing the node of
    262    * each lines, will be store after it in the node list */
    263   NODE(0)->offset = 1;
    264   NODE(0)->range[0] = leaf->range[0];
    265   NODE(0)->range[1] = leaf->range[1];
    266 
    267   FOR_EACH(i, 0, nlines) {
    268     size_t vertices_range[2] = {0, 0}; /* Range in the vertex buffer */
    269     const size_t ichild = NODE(0)->offset + i; /* Index of the child */
    270     const size_t iline = leaf->range[0] + i;
    271 
    272     /* Mesh the line in the temporary vertex buffer */
    273     res = line_mesh(tree, iline, tree->args.nvertices_hint, &scratch->vertices,
    274       vertices_range);
    275     if(res != RES_OK) goto error;
    276 
    277     /* Decimate the line mesh */
    278     res = polyline_decimate(tree->sln,
    279       darray_vertex_data_get(&scratch->vertices), vertices_range,
    280       (float)tree->args.mesh_decimation_err, tree->args.mesh_type);
    281     if(res != RES_OK) goto error;
    282 
    283     NODE(ichild)->ivertex = vertices_range[0];
    284     NODE(ichild)->nvertices = ui64_to_ui32(vertices_range[1] - vertices_range[0] + 1);
    285     NODE(ichild)->range[0] = iline;
    286     NODE(ichild)->range[1] = iline;
    287   }
    288 
    289   /* Merge/collapse the polylines of each lines associated to nodes.
    290    * These nodes are the "children" of the leaf */
    291   if(tree->args.collapse_polylines) {
    292     res = collapse_child_polylines(tree, 0, nlines,
    293       &scratch->nodes, &scratch->vertices_tmp, &scratch->vertices);
    294   } else {
    295     res = merge_child_polylines(tree, 0, nlines,
    296       &scratch->nodes, &scratch->vertices);
    297   }
    298   if(res != RES_OK) goto error;
    299 
    300   /* Setup the leaf */
    301   leaf->ivertex = darray_vertex_size_get(vertices);
    302   leaf->nvertices = NODE(0/*leaf*/)->nvertices;
    303 
    304   /* Copy the leaf vertices from temporary buffer to the vertex buffer of the
    305    * tree */
    306   res = darray_vertex_resize(vertices, leaf->ivertex + leaf->nvertices);
    307   if(res != RES_OK) goto error;
    308   src = darray_vertex_cdata_get(&scratch->vertices) + NODE(0/*leaf*/)->ivertex;
    309   dst = darray_vertex_data_get(vertices) + leaf->ivertex;
    310   memsz = leaf->nvertices * sizeof(*src);
    311   memcpy(dst, src, memsz);
    312 
    313   #undef NODE
    314 
    315 exit:
    316   return res;
    317 error:
    318   goto exit;
    319 }
    320 
    321 static INLINE res_T
    322 build_leaf_polyline
    323   (struct sln_tree* tree,
    324    struct sln_node* leaf,
    325    struct build_leaf_polyline_scratch* scratch,
    326    struct darray_vertex* vertices)
    327 {
    328   size_t nlines = 0; /* Number of lines in the leaf */
    329   ASSERT(tree && leaf);
    330 
    331   nlines = leaf->range[1] - leaf->range[0] + 1;
    332 
    333   if(nlines == 1) {
    334     return build_leaf_polyline_from_1line(tree, leaf, vertices);
    335   } else {
    336     return build_leaf_polyline_from_Nlines(tree, leaf, scratch, vertices);
    337   }
    338 }
    339 
    340 static INLINE double
    341 eval_ka
    342   (const struct sln_node* node,
    343    const struct sln_vertex* vertices,
    344    const double wavenumber)
    345 {
    346   struct sln_mesh mesh = SLN_MESH_NULL;
    347   double ka = 0;
    348   ASSERT(node && vertices);
    349 
    350   /* Whether the mesh to be constructed corresponds to the spectrum or its upper
    351    * limit, use the node mesh to calculate the value of ka at a given wave
    352    * number. Calculating the value from the node lines would take far too long*/
    353   mesh.vertices = vertices + node->ivertex;
    354   mesh.nvertices = node->nvertices;
    355   ka = sln_mesh_eval(&mesh, wavenumber);
    356   return ka;
    357 }
    358 
    359 /* Merge all child polylines in a single step. The polylines are merged before
    360  * being decimated */
    361 res_T
    362 merge_child_polylines
    363   (struct sln_tree* tree,
    364    const size_t inode,
    365    const unsigned nchildren,
    366    struct darray_node* nodes,
    367    struct darray_vertex* vertex_list)
    368 {
    369   /* Helper constant */
    370   static const size_t NO_MORE_VERTEX = SIZE_MAX;
    371 
    372   /* Polyline vertices */
    373   #define NCHILDREN_MAX \
    374     ( SLN_TREE_ARITY_MAX > SLN_LEAF_NLINES_MAX \
    375     ? SLN_TREE_ARITY_MAX : SLN_LEAF_NLINES_MAX)
    376   size_t children_ivtx[NCHILDREN_MAX] = {0};
    377 
    378   size_t vertices_range[2] = {0,0};
    379   struct sln_vertex* vertices = NULL;
    380   size_t ivtx = 0;
    381   size_t nvertices = 0;
    382 
    383   /* Miscellaneous */
    384   unsigned i = 0;
    385   res_T res = RES_OK;
    386 
    387   /* Pre-conditions */
    388   ASSERT(tree && inode < darray_node_size_get(nodes));
    389   ASSERT(nchildren >= 2 && nchildren <= NCHILDREN_MAX);
    390 
    391   #define NODE(Id) (darray_node_data_get(nodes) + (Id))
    392 
    393   /* Compute the number of vertices to be merged,
    394    * i.e., the sum of vertices of the children */
    395   nvertices = 0;
    396   FOR_EACH(i, 0, nchildren) {
    397     const size_t ichild = inode + NODE(inode)->offset + i;
    398     nvertices += NODE(ichild)->nvertices;
    399   }
    400 
    401   /* Define the vertices range of the merged polyline */
    402   vertices_range[0] = darray_vertex_size_get(vertex_list);
    403   vertices_range[1] = vertices_range[0] + nvertices - 1/*inclusive bound*/;
    404 
    405   /* Allocate the memory space to store the new polyline */
    406   res = darray_vertex_resize(vertex_list, vertices_range[1]+1);
    407   if(res != RES_OK) {
    408     ERROR(tree->sln, "Error in merging polylines -- %s.\n", res_to_cstr(res));
    409     goto error;
    410   }
    411   vertices = darray_vertex_data_get(vertex_list);
    412 
    413   /* Initialize the vertex index list. For each child, the initial value
    414    * corresponds to the index of its first vertex. This index will be
    415    * incremented as vertices are merged into the parent polyline. */
    416   FOR_EACH(i, 0, nchildren) {
    417     const size_t ichild = inode + NODE(inode)->offset + i;
    418     children_ivtx[i] = NODE(ichild)->ivertex;
    419   }
    420 
    421   FOR_EACH(ivtx, vertices_range[0], vertices_range[1]+1/*inclusive bound*/) {
    422     double ka = 0;
    423     double nu = INF;
    424 
    425     /* The number of vertices corresponding to the current wave number for which
    426      * the parent ka is calculated. It is at least equal to one, since this nu
    427      * is defined by the child vertices, but may be greater if multiple children
    428      * share the same vertex, i.e., a ka value calculated for the same nu */
    429     unsigned nvertices_merged = 0;
    430 
    431     /* Find the minimum wave number among the vertices of the child vertices
    432      * that are candidates for merging */
    433     FOR_EACH(i, 0, nchildren) {
    434       const size_t child_ivtx = children_ivtx[i];
    435       if(child_ivtx != NO_MORE_VERTEX) {
    436         nu = MMIN(nu, vertices[child_ivtx].wavenumber);
    437       }
    438     }
    439     ASSERT(nu != INF); /* At least one vertex must have been found */
    440 
    441     /* Compute the value of ka at the wave number determined above */
    442     FOR_EACH(i, 0, nchildren) {
    443       const size_t child_ivtx = children_ivtx[i];
    444       const struct sln_node* child = NODE(inode + NODE(inode)->offset + i);
    445 
    446       if(child_ivtx == NO_MORE_VERTEX
    447       || nu != vertices[child_ivtx].wavenumber) {
    448         /* The wave number does not correspond to a vertex in the current
    449          * child's mesh. Therefore, its contribution to the parent node's ka is
    450          * computed  */
    451         ka += eval_ka(child, vertices, nu);
    452 
    453       } else {
    454         /* The wave number is the one for which the child node stores a ka
    455          * value. Add it to the parent node's ka value and designate the child's
    456          * next vertex as a candidate for merging into the parent. The exception
    457          * is when all vertices of the child have already been merged. In this
    458          * case, report that the child no longer has any candidate vertices */
    459         ka += vertices[child_ivtx].ka;
    460         ++children_ivtx[i];
    461         if(children_ivtx[i] >= child->ivertex + child->nvertices) {
    462           children_ivtx[i] = NO_MORE_VERTEX;
    463         }
    464         ++nvertices_merged; /* Record that a vertex has been merged */
    465       }
    466     }
    467 
    468     /* Setup the parent vertex */
    469     vertices[ivtx].wavenumber = (float)nu;
    470     vertices[ivtx].ka = (float)ka;
    471 
    472     /* If multiple child vertices have been merged, then a single wave number
    473      * corresponds to a vertex with multiple children. The number of parent
    474      * vertices is therefore no longer the sum of the number of its children's
    475      * vertices, since some vertices are duplicated. Hence the following
    476      * adjustment, which removes the duplicate vertices. */
    477     vertices_range[1] -= (nvertices_merged-1);
    478   }
    479 
    480   /* Decimate the resulting polyline */
    481   res = polyline_decimate(tree->sln, darray_vertex_data_get(vertex_list),
    482      vertices_range, (float)tree->args.mesh_decimation_err, tree->args.mesh_type);
    483   if(res != RES_OK) goto error;
    484 
    485   /* Setup the node polyline */
    486   NODE(inode)->ivertex = vertices_range[0];
    487   NODE(inode)->nvertices = ui64_to_ui32(vertices_range[1] - vertices_range[0] + 1);
    488 
    489   /* It is necessary to ensure that the recorded vertices define a polyline
    490    * along which any value (calculated by linear interpolation) is well above
    491    * the sum of the corresponding values of the polylines to be merged. However,
    492    * although this is guaranteed by definition for the vertices of the polyline,
    493    * numerical uncertainty may nevertheless introduce errors that violate this
    494    * criterion. Hence the following adjustment, which slightly increases the ka
    495    * of the mesh so as to guarantee this constraint between the mesh of a node
    496    * and that of its children */
    497   if(tree->args.mesh_type == SLN_MESH_UPPER) {
    498     FOR_EACH(ivtx, vertices_range[0], vertices_range[1]+1/*inclusive bound*/) {
    499       const double ka = vertices[ivtx].ka;
    500       vertices[ivtx].ka = (float)(ka*KA_ADJUSTEMENT);
    501     }
    502   }
    503 
    504   /* Shrink the size of the vertices */
    505   darray_vertex_resize(vertex_list, vertices_range[1] + 1);
    506 
    507   #undef NODE
    508   #undef NCHILDREN_MAX
    509 
    510 exit:
    511   return res;
    512 error:
    513   goto exit;
    514 }
    515 
    516 /* Merge child polylines by combining them in pairs (the resulting polyline is
    517  * then simplified), and repeat this process until only a single polyline
    518  * remains. This polyline becomes the node's polyline */
    519 static res_T
    520 collapse_child_polylines
    521   (struct sln_tree* tree,
    522    const size_t inode,
    523    const unsigned nchildren,
    524    struct darray_node* nodes,
    525    struct darray_vertex* scratch,
    526    struct darray_vertex* vertex_list)
    527 {
    528   /* Polylines to be collapsed, i.e., the ids of their first and last vertices */
    529   #define NCHILDREN_MAX \
    530     ( SLN_TREE_ARITY_MAX > SLN_LEAF_NLINES_MAX \
    531     ? SLN_TREE_ARITY_MAX : SLN_LEAF_NLINES_MAX)
    532   size_t poly_parts[NCHILDREN_MAX][2] = {0};
    533   size_t nparts;
    534 
    535   /* Indices of the first and last vertices of the resulting polyline.
    536    * These indices are absolute to the tree's vertex list */
    537   size_t vertices_range[2] = {0,0};
    538 
    539   /* Redux double buffering */
    540   struct sln_vertex* buf[2] = {NULL, NULL};
    541   int r, w; /* Index of the buffer in read/write */
    542 
    543   /* Miscellaneous */
    544   struct sln_vertex* vertices = NULL; /* Pointer to the tree's vertex buffer */
    545   size_t range_merge[2] = {0,0}; /* vertex range of a merged polyline */
    546   size_t nvertices = 0; /* #vertices of the resulting polyline */
    547   size_t ivtx = 0;
    548   unsigned i = 0;
    549   int ncollapses = 0; /* Number of collapse steps */
    550   res_T res = RES_OK;
    551 
    552   /* Pre-conditions */
    553   ASSERT(tree && inode < darray_node_size_get(nodes));
    554   ASSERT(nchildren >= 2 && nchildren <= NCHILDREN_MAX);
    555 
    556   #define NODE(Id) (darray_node_data_get(nodes) + (Id))
    557 
    558   /* Compute the number of vertices to be merged,
    559    * i.e., the sum of vertices of the children */
    560   nvertices = 0;
    561   FOR_EACH(i, 0, nchildren) {
    562     const size_t ichild = inode + NODE(inode)->offset + i;
    563     nvertices += NODE(ichild)->nvertices;
    564   }
    565 
    566   /* Define the vertices range of the merged polyline */
    567   vertices_range[0] = darray_vertex_size_get(vertex_list);
    568   vertices_range[1] = vertices_range[0] + nvertices - 1/*inclusive bound*/;
    569 
    570   /* Allocate the memory space required to store the new polyline and the
    571    * temporary buffer. This is the memory space in which the collapse
    572    * procedure's double buffering will take place. Each buffer will be used
    573    * alternately as a read/write buffer when reducing the polylines of the
    574    * child nodes */
    575   if((res = darray_vertex_resize(vertex_list, vertices_range[1]+1)) != RES_OK
    576   || (res = darray_vertex_resize(scratch, nvertices)) != RES_OK) {
    577     ERROR(tree->sln, "Error in merging polylines -- %s.\n", res_to_cstr(res));
    578     goto error;
    579   }
    580 
    581   /* Recover the memory space of the tree's vertices */
    582   vertices = darray_vertex_data_get(vertex_list);
    583 
    584   /* Recover the memory space to be used in the Redux process.
    585    *
    586    * In the vertex buffer, this refers to the newly allocated memory space used
    587    * during the collapse. The beginning of the buffer contains the vertices of
    588    * the registered nodes and must therefore not be modified. At the end of the
    589    * collapse, this space will contain the vertices of the polylines resulting
    590    * from the collapse of the child nodes' polylines.
    591    *
    592    * The scratch buffer is used as-is, since its sole purpose is to temporarily
    593    * store the vertices of the polylines to be merged. */
    594   buf[0] = vertices + vertices_range[0];
    595   buf[1] = darray_vertex_data_get(scratch);
    596 
    597   /* Initially, the number of partitions to be reduced is equal to the number of
    598    * children */
    599   nparts = nchildren;
    600 
    601   /* Calculate the number of reduction steps, i.e., the logarithm of the power
    602    * of 2 that is greater than or equal to the number of polylines to be
    603    * reduced, to which one is added to obtain the number of steps, not the index
    604    * of the last step */
    605   ncollapses = log2i((int)round_up_pow2(nparts)) + 1;
    606 
    607   /* Set the index of the write buffer so that, once the collapse process is
    608    * complete, the last write buffer is the one whose memory space is allocated
    609    * in the tree's vertex buffer. This way, no additional copies will be needed
    610    * to store the result of the reduction */
    611   w = ncollapses % 2 ? 0 : 1;
    612 
    613   /* Initialize the polylines to be reduced, i.e., copy the child vertices into
    614    * a collapse buffer and record their index ranges */
    615   ivtx = 0;
    616   for(i = 0; i < nchildren; ++i) {
    617     const size_t ichild = inode + NODE(inode)->offset + i;
    618     const struct sln_node* child = NODE(ichild);
    619     const size_t memsz = child->nvertices * sizeof(struct sln_vertex);
    620     const struct sln_vertex* src = vertices + child->ivertex;
    621     struct sln_vertex* dst = buf[w] + ivtx;
    622 
    623     memcpy(dst, src, memsz);
    624     poly_parts[i][0] = ivtx;
    625     poly_parts[i][1] = ivtx + child->nvertices - 1/*inclusive bound*/;
    626 
    627     ivtx += child->nvertices;
    628   }
    629 
    630   r = w; /* Index of the buffer from which to read the data */
    631   w =!r; /* Index of the buffer into which to write the data  */
    632 
    633 
    634   /* As long as the number of segments is not equal to one, there are still
    635    * polylines to be merged in pairs */
    636   while(nparts > 1) {
    637     size_t ipart = 0;
    638 
    639     for(ipart=0; ipart < nparts-1; ipart+=2) {
    640       struct sln_mesh mesh0 = SLN_MESH_NULL;
    641       struct sln_mesh mesh1 = SLN_MESH_NULL;
    642 
    643       /* Retrieve the two polylines to be merged */
    644       const size_t* part0 = poly_parts[ipart+0];
    645       const size_t* part1 = poly_parts[ipart+1];
    646 
    647       /* Calculate the partition index of the merged polyline, that is, the index
    648        * that will contain the information for the polyline resulting from the
    649        * merge: the first and last indices of its in the vertex array currently
    650        * being written */
    651       const size_t ipart_merge = ipart/2;
    652 
    653       /* Compute the number of vertices of the merged polyline */
    654       const size_t nvtx =
    655         ((part0[1] - part0[0]) + 1/*inclusive bound*/)
    656       + ((part1[1] - part1[0]) + 1/*inclusive bound*/);
    657 
    658       /* For each child, initialized its vertex index to its first vertex. This
    659        * index will be incremented as vertices are merged into the merged
    660        * polyline  */
    661       size_t ivtx0 = part0[0];
    662       size_t ivtx1 = part1[0];
    663 
    664       /* Set the vertex index range for the merged polyline in the write buffer */
    665       range_merge[0] = part0[0];
    666       range_merge[1] = range_merge[0] + nvtx-1/*inclusive bound*/;
    667 
    668       /* Setup the mesh of the two polylines to be merged */
    669       mesh0.vertices = buf[r] + part0[0]; mesh0.nvertices = part0[1]-part0[0]+1;
    670       mesh1.vertices = buf[r] + part1[0]; mesh1.nvertices = part1[1]-part1[0]+1;
    671 
    672       /* Merge the polylines */
    673       FOR_EACH(ivtx, range_merge[0], range_merge[1]+1/*inclusive bound*/) {
    674         const double nu0 = ivtx0 <= part0[1] ? buf[r][ivtx0].wavenumber : INF;
    675         const double nu1 = ivtx1 <= part1[1] ? buf[r][ivtx1].wavenumber : INF;
    676         double ka = 0;
    677         double nu = 0;
    678 
    679         /* Find the minimum wave number among the vertices of the child vertices
    680          * that are candidates for merging */
    681         if(nu0 < nu1) { /* The vertex comes from the child0 */
    682           nu = buf[r][ivtx0].wavenumber;
    683           ka = buf[r][ivtx0].ka + sln_mesh_eval(&mesh1, nu);
    684           ++ivtx0;
    685 
    686         } else if(nu0 > nu1) { /* The vertex comes from the child1 */
    687           nu = buf[r][ivtx1].wavenumber;
    688           ka = buf[r][ivtx1].ka + sln_mesh_eval(&mesh0, nu);
    689           ++ivtx1;
    690 
    691         } else { /* The vertex is shared by node0 and node1 */
    692           nu = buf[r][ivtx0].wavenumber;
    693           ka = buf[r][ivtx0].ka + buf[r][ivtx1].ka;
    694           --range_merge[1]; /* Remove duplicate */
    695           ++ivtx0;
    696           ++ivtx1;
    697         }
    698         buf[w][ivtx].wavenumber = (float)nu;
    699         buf[w][ivtx].ka = (float)ka;
    700       }
    701 
    702       /* Decimate the resulting polyline */
    703       res = polyline_decimate(tree->sln, buf[w], range_merge,
    704         (float)tree->args.mesh_decimation_err, tree->args.mesh_type);
    705       if(res != RES_OK) goto error;
    706 
    707       /* Setup the partition of the merge polyline */
    708       poly_parts[ipart_merge][0] = range_merge[0];
    709       poly_parts[ipart_merge][1] = range_merge[1];
    710     }
    711 
    712     /* If there is a polyline that has not been merged, copy its vertices to the
    713      * write buffer so that it can be processed in the next reduction step */
    714     if(nparts % 2) {
    715       const size_t* remain_part = poly_parts[nparts-1];
    716       const size_t nvtx = remain_part[1] - remain_part[0] + 1/*inclusive*/;
    717       const size_t memsz = nvtx * sizeof(struct sln_vertex);
    718       const struct sln_vertex* src = buf[r] + remain_part[0];
    719       struct sln_vertex* dst = buf[w] + remain_part[0];
    720 
    721       memcpy(dst, src, memsz);
    722       poly_parts[nparts/2][0] = remain_part[0];
    723       poly_parts[nparts/2][1] = remain_part[1];
    724     }
    725 
    726 #ifndef NDEBUG
    727     FOR_EACH(i, 1, (nparts+1/*ceil*/)/2) {
    728       ASSERT(poly_parts[i][0] > poly_parts[i-1][1]);
    729     }
    730 #endif
    731 
    732     /* Update the number of partitions to be reduced */
    733     nparts = (nparts + 1/*ceil*/)/2;
    734 
    735     /* Swap read/write buffers */
    736     r = !r;
    737     w = !w;
    738   }
    739 
    740   nvertices = (range_merge[1] - range_merge[0]) + 1/*inclusive bound*/;
    741   vertices_range[1] = vertices_range[0] + nvertices - 1/*inclusive bound*/;
    742 
    743   /* Assumed that double buffering was configured to ensure that the resulting
    744    * polyline is stored in the tree vertex buffer */
    745   ASSERT(buf[r] != darray_vertex_cdata_get(scratch));
    746 
    747   /* Setup the node */
    748   NODE(inode)->ivertex = vertices_range[0];
    749   NODE(inode)->nvertices = ui64_to_ui32(nvertices);
    750 
    751   /* It is necessary to ensure that the recorded vertices define a polyline
    752    * along which any value (calculated by linear interpolation) is well above
    753    * the sum of the corresponding values of the polylines to be merged. However,
    754    * although this is guaranteed by definition for the vertices of the polyline,
    755    * numerical uncertainty may nevertheless introduce errors that violate this
    756    * criterion. Hence the following adjustment, which slightly increases the ka
    757    * of the mesh so as to guarantee this constraint between the mesh of a node
    758    * and that of its children */
    759   if(tree->args.mesh_type == SLN_MESH_UPPER) {
    760     FOR_EACH(ivtx, vertices_range[0], vertices_range[1]+1/*inclusive bound*/) {
    761       const double ka = vertices[ivtx].ka;
    762       vertices[ivtx].ka = (float)(ka*KA_ADJUSTEMENT);
    763     }
    764   }
    765 
    766   /* Shrink the size of the vertices */
    767   darray_vertex_resize(vertex_list, vertices_range[1] + 1);
    768 
    769   #undef NCHILDREN
    770   #undef NODE
    771 
    772 exit:
    773   return res;
    774 error:
    775   goto exit;
    776 }
    777 static res_T
    778 build_polylines
    779   (struct sln_tree* tree,
    780    const size_t root_index,
    781    const size_t nodes_count, /* Total number of nodes in the tree (for debug) */
    782    struct darray_vertex* vertices,
    783    struct progress* progress)
    784 {
    785   /* Stack */
    786   #define STACK_SIZE (SLN_TREE_DEPTH_MAX*SLN_TREE_ARITY_MAX)
    787   size_t stack[STACK_SIZE];
    788   size_t istack = 0;
    789 
    790   /* Progress */
    791   size_t nnodes_processed = 0;
    792 
    793   /* Miscellaneous */
    794   struct build_leaf_polyline_scratch scratch;
    795   size_t inode = 0;
    796   res_T res = RES_OK;
    797 
    798   ASSERT(tree && nodes_count != 0);
    799   ASSERT(root_index < darray_node_size_get(&tree->nodes));
    800   (void)nodes_count; /* Avoid "Unused variable" warning */
    801 
    802   build_leaf_polyline_scratch_init(tree->sln->allocator, &scratch);
    803 
    804   #define NODE(Id) (darray_node_data_get(&tree->nodes) + (Id))
    805   #define IS_LEAF(Id) (NODE(Id)->offset == 0)
    806 
    807   /* Push back SIZE_MAX which, once pop up, will mark the end of recursion */
    808   stack[istack++] = SIZE_MAX;
    809 
    810   inode = root_index; /* Root node */
    811   while(inode != SIZE_MAX) {
    812     const size_t istack_saved = istack;
    813 
    814     if(IS_LEAF(inode)) {
    815       res = build_leaf_polyline(tree, NODE(inode), &scratch, vertices);
    816       if(res != RES_OK) goto error;
    817 
    818       inode = stack[--istack]; /* Pop the next node */
    819 
    820     } else {
    821       const size_t ichild0 = inode + NODE(inode)->offset + 0;
    822       const struct sln_node* node = darray_node_cdata_get(&tree->nodes)+inode;
    823       const unsigned nchildren = node_child_count(node, tree->args.arity);
    824       int child_polylines_are_missing = 1;
    825       size_t i = 0;
    826 
    827       FOR_EACH(i, 0, nchildren) {
    828         child_polylines_are_missing = NODE(ichild0 + i)->nvertices == 0;
    829         if(child_polylines_are_missing) break;
    830       }
    831 
    832       /* Child nodes have their polyline created */
    833       if(!child_polylines_are_missing) {
    834 #ifndef NDEBUG
    835         /* Check that all children have their polylines created */
    836         FOR_EACH(i, 1, nchildren) {
    837           const size_t ichild = ichild0 + i;
    838           ASSERT(NODE(ichild)->nvertices != 0);
    839         }
    840 #endif
    841         if(tree->args.collapse_polylines) {
    842           res = collapse_child_polylines(tree, inode, nchildren, &tree->nodes,
    843             &scratch.vertices_tmp, vertices);
    844         } else {
    845           res = merge_child_polylines
    846             (tree, inode, nchildren, &tree->nodes, vertices);
    847         }
    848         if(res != RES_OK) goto error;
    849 
    850         inode = stack[--istack]; /* Pop the next node */
    851 
    852       /* Child nodes have NOT their polyline created */
    853       } else {
    854         ASSERT(istack + (nchildren - 1/*ichild0*/ + 1/*inode*/) <= STACK_SIZE);
    855         stack[istack++] = inode; /* Push the current node */
    856 
    857          /* Push the child nodes, except for those whose polyline has already
    858           * been constructed, as is the case when the child node was the root
    859           * of a separately constructed subtree */
    860         FOR_EACH_REVERSE(i, nchildren, 0) {
    861           const size_t ichild = ichild0 + i-1;
    862           if(NODE(ichild)->nvertices == 0) stack[istack++] = ichild;
    863         }
    864 
    865         /* Ensure that at least one child was pushed */
    866         ASSERT(stack[istack-1] != inode);
    867 
    868         inode = stack[--istack]; /* Recursively build the polyline of the 1st child */
    869       }
    870     }
    871 
    872     /* Handle progression bar */
    873     if(istack < istack_saved) {
    874       size_t nnodes = istack_saved - istack;
    875 
    876       nnodes_processed += nnodes;
    877       progress_update(tree->sln, progress, nnodes);
    878     }
    879   }
    880   ASSERT(nnodes_processed == nodes_count);
    881 
    882   #undef NODE
    883   #undef IS_LEAF
    884   #undef LOG_MSG
    885   #undef STACK_SIZE
    886 
    887 exit:
    888   build_leaf_polyline_scratch_release(&scratch);
    889   return res;
    890 error:
    891   goto exit;
    892 }
    893 
    894 static res_T
    895 partition_lines_depth_first
    896   (struct sln_tree* tree,
    897    const size_t root_index,
    898    struct darray_node* nodes,
    899    struct progress* progress)
    900 {
    901   /* Stack */
    902   #define STACK_SIZE (SLN_TREE_DEPTH_MAX*(SLN_TREE_ARITY_MAX-1))
    903   size_t stack[STACK_SIZE];
    904   size_t istack = 0;
    905 
    906   /* Progress */
    907   size_t nlines_total = 0;
    908   size_t nlines_processed = 0;
    909 
    910   /* Miscellaneous */
    911   size_t inode = 0;
    912   res_T res = RES_OK;
    913 
    914   /* Pre-condition */
    915   ASSERT(tree && nodes && progress);
    916   ASSERT(root_index < darray_node_size_get(nodes));
    917   (void)nlines_total; /* Avoid "Unused variable" warning */
    918 
    919   #define NODE(Id) (darray_node_data_get(nodes) + (Id))
    920   #define CREATE_NODE {                                                        \
    921     res = darray_node_push_back(nodes, &SLN_NODE_NULL);                        \
    922     if(res != RES_OK) goto error;                                              \
    923   } (void)0
    924 
    925   nlines_total = NODE(root_index)->range[1] - NODE(root_index)->range[0] + 1;
    926   nlines_processed = 0;
    927 
    928   /* Push back SIZE_MAX which, once pop up, will mark the end of recursion */
    929   stack[istack++] = SIZE_MAX;
    930 
    931   inode = root_index; /* Root node */
    932   while(inode != SIZE_MAX) {
    933     /* #lines into the node */
    934     size_t nlines_node = NODE(inode)->range[1] - NODE(inode)->range[0] + 1;
    935 
    936     /* Make a leaf */
    937     if(nlines_node <= tree->args.leaf_nlines) {
    938 
    939       NODE(inode)->offset = 0;
    940       inode = stack[--istack]; /* Pop the next node */
    941 
    942       nlines_processed += nlines_node; /* For debug */
    943 
    944       progress_update(tree->sln, progress, nlines_node);
    945 
    946     /* Split the node  */
    947     } else {
    948       size_t node_range[2] = {0,0};
    949       size_t nlines_child_min = 0; /* Min #lines per child */
    950       size_t nlines_remain = 0;
    951       size_t nchildren = 0;
    952       size_t iline = 0;
    953       size_t i = 0;
    954 
    955       /* Calculate the index of the first child */
    956       size_t ichildren = darray_node_size_get(nodes);
    957 
    958       /* Compute how the number of children that node has */
    959       nchildren = node_child_count(NODE(inode), tree->args.arity);
    960 
    961       ASSERT(nchildren <= tree->args.arity);
    962       ASSERT(ichildren > inode);
    963 
    964       node_range[0] = NODE(inode)->range[0];
    965       node_range[1] = NODE(inode)->range[1];
    966 
    967       /* Define the offset from the current node to its children */
    968       NODE(inode)->offset = ui64_to_ui32((uint64_t)(ichildren - inode));
    969 
    970       nlines_child_min = nlines_node / nchildren;
    971       nlines_remain = nlines_node % nchildren;
    972 
    973       iline = node_range[0];
    974       FOR_EACH(i, 0, nchildren) {
    975         /* Compute the number of lines per child. Start by assigning the minimum
    976          * number of lines to each child, then distribute the remaining lines
    977          * among the first children */
    978         size_t nlines_child = nlines_child_min + (i < nlines_remain);
    979 
    980         CREATE_NODE;
    981 
    982         /* Set the range of lines line for the newly created child. Note that
    983          * the boundaries of the range are inclusive, which is why 1 is
    984          * subtracted to the upper bound */
    985         NODE(ichildren+i)->range[0] = iline;
    986         NODE(ichildren+i)->range[1] = iline + nlines_child - 1/*inclusive bound*/;
    987         iline += nlines_child;
    988 
    989         /* Check that the child's lines are a subset of the parent's lines */
    990         ASSERT(NODE(ichildren+i)->range[0] >= node_range[0]);
    991         ASSERT(NODE(ichildren+i)->range[1] <= node_range[1]);
    992       }
    993 
    994       inode = ichildren; /* Make the first child the current node */
    995 
    996       /* Push the other children */
    997       ASSERT(istack + (nchildren-1/*1st child*/) <= STACK_SIZE);
    998       FOR_EACH_REVERSE(i, nchildren-1, 0) stack[istack++] = ichildren + i;
    999     }
   1000   }
   1001   ASSERT(nlines_processed == nlines_total);
   1002 
   1003   #undef NODE
   1004   #undef CREATE_NODE
   1005   #undef STACK_SIZE
   1006 
   1007 exit:
   1008   return res;
   1009 error:
   1010   goto exit;
   1011 }
   1012 
   1013 static res_T
   1014 partition_lines_breadth_first
   1015   (struct sln_tree* tree,
   1016    const size_t root_index,
   1017    const unsigned nleaves_max_hint) /* Advice on the maxium of leafs to create */
   1018 {
   1019   /* Static memory space of the queue */
   1020   struct item {
   1021     struct list_node link;
   1022     size_t inode;
   1023   } items[QUEUE_SIZE_MAX];
   1024 
   1025   /* Linked lists for managing queue data */
   1026   struct list_node free_items;
   1027   struct list_node queue;
   1028   size_t nnodes = 0; /* Number of enqueud nodes */
   1029 
   1030   /* Miscellaneous */
   1031   size_t nlines_total = 0;
   1032   size_t nleaves = 0;
   1033   size_t i = 0;
   1034   res_T res = RES_OK;
   1035 
   1036   /* Pre-conditions */
   1037   ASSERT(tree);
   1038   ASSERT(root_index < darray_node_size_get(&tree->nodes));
   1039 
   1040   /* Macros to help in managing nodes */
   1041   #define NODE(Id) (darray_node_data_get(&tree->nodes) + (Id))
   1042   #define CREATE_NODE {                                                        \
   1043     res = darray_node_push_back(&tree->nodes, &SLN_NODE_NULL);                 \
   1044     if(res != RES_OK) goto error;                                              \
   1045   } (void)0
   1046 
   1047   /* Helper macro for the queue data structure */
   1048   #define ENQUEUE(INode) {                                                     \
   1049     struct item* item__ = NULL;                                                \
   1050     ASSERT(!is_list_empty(&free_items));                                       \
   1051     item__ = CONTAINER_OF(list_head(&free_items), struct item, link);          \
   1052     item__->inode = INode;                                                     \
   1053     list_move_tail(&item__->link, &queue);                                     \
   1054     ++nnodes;                                                                  \
   1055   }
   1056   #define DEQUEUE                                                              \
   1057     (list_move_tail(list_head(&queue), &free_items),                           \
   1058     --nnodes,                                                                  \
   1059     CONTAINER_OF(list_tail(&free_items), struct item, link)->inode)
   1060 
   1061   /* Setup the linked lists */
   1062   list_init(&free_items);
   1063   list_init(&queue);
   1064   FOR_EACH(i, 0, sizeof(items)/sizeof(items[0])) {
   1065     items[i].inode = SIZE_MAX;
   1066     list_init(&items[i].link);
   1067     list_add_tail(&free_items, &items[i].link);
   1068   }
   1069 
   1070   SHTR(line_list_get_size(tree->args.lines, &nlines_total));
   1071 
   1072   /* Setup the root node */
   1073   NODE(root_index)->range[0] = 0;
   1074   NODE(root_index)->range[1] = nlines_total - 1;
   1075 
   1076   /* Register the root node in the queue */
   1077   ENQUEUE(root_index);
   1078 
   1079   /* Breadth-first partitioning */
   1080   while(!is_list_empty(&queue)) {
   1081     size_t node_range[2] = {0,0};
   1082     size_t nlines_node = 0; /* #lines in the node */
   1083     size_t nlines_child_min = 0; /* Min #lines per child */
   1084     size_t nlines_remain = 0; /* Lines to be distributed among the children */
   1085     size_t nchildren = 0; /* #children of the node */
   1086     size_t ichildren = 0; /* Index of the node's first child */
   1087     size_t iline = 0;
   1088     size_t inode = 0;
   1089 
   1090 
   1091     /* Check whether the number of registered leaves and currently processed
   1092      * nodes is greater than or equal to the recommended maximum number of
   1093      * leaves. If so, stop breadth-first partitioning and treat all registered
   1094      * nodes as leaves. The policy is therefore to accept more leaves than
   1095      * expected */
   1096     if(nnodes + nleaves >= nleaves_max_hint) {
   1097       nleaves += nnodes;
   1098       break;
   1099     }
   1100 
   1101     inode = DEQUEUE; /* Get the current node */
   1102 
   1103     /* Retrieve the range of lines contained within the node */
   1104     node_range[0] = NODE(inode)->range[0];
   1105     node_range[1] = NODE(inode)->range[1];
   1106     nlines_node = node_range[1] - node_range[0] + 1/*inclusive bound*/;
   1107     ASSERT(nlines_node >= tree->args.leaf_nlines);
   1108 
   1109     if(nlines_node <= tree->args.leaf_nlines) {
   1110       /* Make a leaf */
   1111       ++nleaves;
   1112       continue;
   1113     }
   1114 
   1115     /* Compute the number of children that node has */
   1116     nchildren = node_child_count(NODE(inode), tree->args.arity);
   1117     ASSERT(nchildren <= tree->args.arity);
   1118 
   1119     /* Check whether, after adding the child nodes to the queue, the total
   1120      * number of nodes in the queue, plus the number of leaves already created,
   1121      * does not exceed the max size of the queue. If so, the breadth-first
   1122      * partitioning is complete and the nodes currently in the queue are all
   1123      * leaves */
   1124     if(nnodes + nchildren + nleaves > QUEUE_SIZE_MAX) {
   1125       nleaves += nnodes + 1/*Do not forget the the current node*/;
   1126       break;
   1127     }
   1128 
   1129     /* Calculate the index of the first child */
   1130     ichildren = darray_node_size_get(&tree->nodes);
   1131     ASSERT(ichildren > inode);
   1132 
   1133     /* Define the offset from the current node to its children */
   1134     NODE(inode)->offset = ui64_to_ui32((uint64_t)(ichildren - inode));
   1135 
   1136     /* Calculate the minimum number of lines per child and the number of
   1137      * remaining lines to be distributed equally among them */
   1138     nlines_child_min = nlines_node / nchildren;
   1139     nlines_remain = nlines_node % nchildren;
   1140 
   1141     iline = node_range[0];
   1142     FOR_EACH(i, 0, nchildren) {
   1143       /* Compute the number of lines per child. Start by assigning the minimum
   1144        * number of lines to each child, then distribute the remaining lines
   1145        * among the first children */
   1146       size_t nlines_child = nlines_child_min + (i < nlines_remain);
   1147 
   1148       CREATE_NODE;
   1149 
   1150       /* Set the range of lines line for the newly created child. Note that
   1151        * the boundaries of the range are inclusive, which is why 1 is
   1152        * subtracted to the upper bound */
   1153       NODE(ichildren+i)->range[0] = iline;
   1154       NODE(ichildren+i)->range[1] = iline + nlines_child - 1/*inclusive bound*/;
   1155       iline += nlines_child;
   1156 
   1157       /* Check that the child's lines are a subset of the parent's lines */
   1158       ASSERT(NODE(ichildren+i)->range[0] >= node_range[0]);
   1159       ASSERT(NODE(ichildren+i)->range[1] <= node_range[1]);
   1160 
   1161       ENQUEUE(ichildren+i); /* Register the child node */
   1162     }
   1163   }
   1164 
   1165   #undef NODE
   1166   #undef CREATE_NODE
   1167   #undef ENQUEUE
   1168   #undef DEQUEUE
   1169 
   1170 exit:
   1171   return res;
   1172 error:
   1173   goto exit;
   1174 }
   1175 
   1176 static res_T
   1177 build_subtrees
   1178   (struct sln_tree* tree,
   1179    const size_t subtrees[],
   1180    unsigned nsubtrees,
   1181    struct progress* meshing)
   1182 {
   1183   /* Subtree temporary data */
   1184   struct scratch {
   1185     struct darray_node nodes;
   1186     struct darray_vertex vertices;
   1187     size_t nnodes;
   1188   }* scratches;
   1189 
   1190   /* Miscellaneous */
   1191   struct progress partitioning = PROGRESS_DEFAULT;
   1192   struct mem_allocator* allocator = NULL;
   1193   size_t nlines = 0;
   1194   unsigned nthreads = 0;
   1195   int i = 0;
   1196   ATOMIC res = RES_OK;
   1197 
   1198   /* Pre-conditions */
   1199   ASSERT(tree && subtrees && nsubtrees >= 1);
   1200   ASSERT(nsubtrees < INT_MAX);
   1201 
   1202   #define NODE(Id) (darray_node_data_get(&tree->nodes) + (Id))
   1203   #define IS_LEAF(Buf, Id) (darray_node_cdata_get(Buf)[Id].offset == 0)
   1204 
   1205   allocator = tree->sln->allocator;
   1206 
   1207   /* Allocate the per subtree scratch */
   1208   scratches = MEM_CALLOC(allocator, nsubtrees, sizeof(*scratches));
   1209   if(!scratches) { res = RES_MEM_ERR; goto error; }
   1210   FOR_EACH(i, 0, (int)nsubtrees) {
   1211     darray_node_init(allocator, &scratches[i].nodes);
   1212     darray_vertex_init(allocator, &scratches[i].vertices);
   1213   }
   1214 
   1215   /* Setup the progress bar and print that although nothing has been done yet,
   1216    * the calculation is nevertheless in progress */
   1217   SHTR(line_list_get_size(tree->args.lines, &nlines));
   1218   partitioning.total = nlines;
   1219   partitioning.msg = "Partitioning: ";
   1220   progress_print(tree->sln, &partitioning);
   1221 
   1222   /* Set the number of threads to use. The advice on the number of threads must
   1223    * not exceed the number of threads available on the machine (see the
   1224    * sln_tree_create function); the caller can only specify a lower number of
   1225    * threads. urthermore, the number of subtrees is calculated so that each subtree
   1226    * corresponds to at least one thread, ensuring that there are no more
   1227    * subtrees than available threads.
   1228    *
   1229    * The actual number of threads to be used for constructing the subtrees in
   1230    * parallel should therefore always be set to the number of subtrees. However,
   1231    * to provide greater flexibility in the distribution of threads or the number
   1232    * of subtrees, the number of threads is calculated below as the minimum
   1233    * between the number of available threads and the number of subtrees */
   1234   nthreads = MMIN(tree->args.nthreads_hint, nsubtrees);
   1235   omp_set_num_threads((int)nthreads);
   1236 
   1237   #pragma omp parallel for
   1238   for(i=0; i < (int)nsubtrees; ++i) {
   1239     struct sln_node root = SLN_NODE_NULL;
   1240     struct scratch* scratch = scratches + i;
   1241     size_t nnodes_s = 0;
   1242     size_t nnodes_t = 0;
   1243     res_T res2 = RES_OK;
   1244 
   1245     /* Skip the remaining subtrees in case of an error */
   1246     if(ATOMIC_GET(&res) != RES_OK) continue;
   1247 
   1248     /* Copy the subtree root in the temporary node buffer */
   1249     #pragma omp critical
   1250     {
   1251       root = *NODE(subtrees[i]);
   1252     }
   1253     res2 = darray_node_push_back(&scratch->nodes, &root);
   1254     if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; }
   1255 
   1256     /* Partition the line */
   1257     res2 = partition_lines_depth_first
   1258       (tree, 0/*root*/, &scratch->nodes, &partitioning);
   1259     if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; }
   1260 
   1261     #pragma omp critical
   1262     {
   1263       struct sln_node* dst = NULL;
   1264       const struct sln_node* src = NULL;
   1265 
   1266       /* Increase the node buffer to store the subtree nodes */
   1267       nnodes_s = darray_node_size_get(&scratch->nodes);
   1268       nnodes_t = darray_node_size_get(&tree->nodes);
   1269       res2 = darray_node_resize(&tree->nodes, nnodes_t + nnodes_s - 1/*root*/);
   1270       if(res2 == RES_OK) {
   1271 
   1272         if(!IS_LEAF(&scratch->nodes, 0)) {
   1273           /* Set the offset to the first child of the root node of the subtree */
   1274           NODE(subtrees[i])->offset = ui64_to_ui32(nnodes_t - subtrees[i]);
   1275         }
   1276 
   1277         /* Copy the subtree nodes in the main node buffer */
   1278         src = darray_node_cdata_get(&scratch->nodes) + 1/*discard root*/;
   1279         dst = darray_node_data_get(&tree->nodes) + nnodes_t;
   1280         memcpy(dst, src, (nnodes_s-1/*discard root*/)*sizeof(*src));
   1281       }
   1282     }
   1283     if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; }
   1284 
   1285     /* Free the temporary buffer but retain the number of nodes in the subtree.
   1286      * This number is used later when constructing polylines (see below) */
   1287     darray_node_purge(&scratch->nodes);
   1288     scratch->nnodes = nnodes_s;
   1289   }
   1290 
   1291   /* Setup the progress bar and print that although nothing has been done yet,
   1292    * the calculation is nevertheless in progress */
   1293   *meshing = PROGRESS_DEFAULT;
   1294   meshing->total = darray_node_size_get(&tree->nodes);
   1295   meshing->msg = "Meshing: ";
   1296   progress_print(tree->sln, meshing);
   1297 
   1298   #pragma omp parallel for
   1299   for(i=0; i < (int)nsubtrees; ++i) {
   1300     struct scratch* scratch = scratches + i;
   1301     size_t inode = subtrees[i];
   1302     size_t nverts_s = 0;
   1303     size_t nverts_t = 0;
   1304     size_t j = 0;
   1305     res_T res2 = RES_OK;
   1306 
   1307     /* Skip the remaining subtrees in case of an error */
   1308     if(ATOMIC_GET(&res) != RES_OK) continue;
   1309 
   1310     res2 = build_polylines
   1311       (tree, inode, scratch->nnodes, &scratch->vertices, meshing);
   1312     if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; }
   1313 
   1314     #pragma omp critical
   1315     {
   1316       struct sln_vertex* dst = NULL;
   1317       const struct sln_vertex* src = NULL;
   1318 
   1319       /* Increase the vertex buffer to store the subtree polylines */
   1320       nverts_s = darray_vertex_size_get(&scratch->vertices);
   1321       nverts_t = darray_vertex_size_get(&tree->vertices);
   1322       res2 = darray_vertex_resize(&tree->vertices, nverts_t + nverts_s);
   1323       if(res2 == RES_OK) {
   1324         /* Copy the subtree nodes in the main vertex buffer */
   1325         src = darray_vertex_cdata_get(&scratch->vertices);
   1326         dst = darray_vertex_data_get(&tree->vertices) + nverts_t;
   1327         memcpy(dst, src, nverts_s*sizeof(*src));
   1328       }
   1329     }
   1330     if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; }
   1331 
   1332     /* Update the index of the first vertex of the polyline for the nodes in the
   1333      * subtree based on their new locations, i.e., the main vertex buffer */
   1334     NODE(subtrees[i])->ivertex += nverts_t;
   1335     FOR_EACH(j, 0, scratch->nnodes-1/*The root has been just treated*/) {
   1336       inode = subtrees[i] + NODE(subtrees[i])->offset + j;
   1337       NODE(inode)->ivertex += nverts_t;
   1338     }
   1339 
   1340     /* Free the temporary vertex buffer */
   1341     darray_node_purge(&scratch->nodes);
   1342   }
   1343 
   1344   #undef NODE
   1345   #undef IS_LEAF
   1346 
   1347 exit:
   1348   /* Clean up temporary data */
   1349   FOR_EACH(i, 0, (int)nsubtrees) {
   1350     darray_node_release(&scratches[i].nodes);
   1351     darray_vertex_release(&scratches[i].vertices);
   1352   }
   1353   MEM_RM(allocator, scratches);
   1354   return (res_T)res;
   1355 error:
   1356   goto exit;
   1357 }
   1358 
   1359 static res_T
   1360 tree_build_sequential(struct sln_tree* tree)
   1361 {
   1362   /* Progress statuses */
   1363   struct progress partitioning = PROGRESS_DEFAULT;
   1364   struct progress meshing = PROGRESS_DEFAULT;
   1365 
   1366   struct sln_node node = SLN_NODE_NULL;
   1367   size_t nlines = 0;
   1368   size_t nnodes = 0;
   1369   size_t iroot = 0;
   1370   res_T res = RES_OK;
   1371 
   1372   SHTR(line_list_get_size(tree->args.lines, &nlines));
   1373 
   1374   iroot = darray_node_size_get(&tree->nodes);
   1375   ASSERT(iroot == 0); /* assume that the tree contains no data */
   1376 
   1377   /* Setup the root node */
   1378   node.range[0] = 0;
   1379   node.range[1] = nlines - 1/* inclusive bound */;
   1380   res = darray_node_push_back(&tree->nodes, &node);
   1381   if(res != RES_OK) goto error;
   1382 
   1383   /* Partition lines */
   1384   partitioning.total = nlines;
   1385   partitioning.msg = "Partitioning: ";
   1386   progress_print(tree->sln, &partitioning);
   1387   res = partition_lines_depth_first(tree, iroot, &tree->nodes, &partitioning);
   1388   if(res != RES_OK) goto error;
   1389 
   1390   /* Create polylines */
   1391   nnodes = darray_node_size_get(&tree->nodes);
   1392   meshing.total = nnodes;
   1393   meshing.msg = "Meshing: ";
   1394   progress_print(tree->sln, &meshing);
   1395   res = build_polylines(tree, iroot, nnodes, &tree->vertices, &meshing);
   1396   if(res != RES_OK) goto error;
   1397 
   1398 exit:
   1399   return res;
   1400 error:
   1401   goto exit;
   1402 }
   1403 
   1404 static res_T
   1405 tree_build_parallel(struct sln_tree* tree)
   1406 {
   1407   /* Stack data structure */
   1408   #define STACK_SIZE (SLN_TREE_DEPTH_MAX*(SLN_TREE_ARITY_MAX-1))
   1409   size_t stack[STACK_SIZE];
   1410   size_t istack = 0;
   1411 
   1412   /* Subtrees */
   1413   size_t subtrees[QUEUE_SIZE_MAX]; /* List of sub-tree root indices */
   1414   unsigned nsubtrees = 0;
   1415 
   1416   /* Progress message of the meshing step */
   1417   struct progress meshing = PROGRESS_DEFAULT;
   1418 
   1419   /* Miscellaneous */
   1420   struct sln_node root = SLN_NODE_NULL;
   1421   size_t nlines = 0;
   1422   size_t nnodes_upper_tree = 0; /* #nodes from the root to the subtrees */
   1423   size_t iroot = 0;
   1424   size_t inode = 0;
   1425   size_t i = 0;
   1426   res_T res = RES_OK;
   1427 
   1428   ASSERT(tree);
   1429 
   1430   iroot = darray_node_size_get(&tree->nodes);
   1431   ASSERT(iroot == 0); /* assume that the tree contains no data */
   1432 
   1433   SHTR(line_list_get_size(tree->args.lines, &nlines));
   1434 
   1435   /* Setup the root node */
   1436   root.range[0] = 0;
   1437   root.range[1] = nlines;
   1438   res = darray_node_push_back(&tree->nodes, &root);
   1439   if(res != RES_OK) goto error;
   1440 
   1441   /* Partition the lines in bread-first fixing the maximum number of leafs to
   1442    * the available number of threads */
   1443   res = partition_lines_breadth_first(tree, iroot, tree->args.nthreads_hint);
   1444   if(res != RES_OK) goto error;
   1445 
   1446   /* Push back SIZE_MAX which, once pop up, will mark the end of recursion */
   1447   stack[istack++] = SIZE_MAX;
   1448 
   1449   /* Retrieve the leaf indices generated by bread-first partitioning. These
   1450    * correspond to the roots of the subtree to be built */
   1451   inode = iroot; /* Root node */
   1452   while(inode != SIZE_MAX) {
   1453     const struct sln_node* node = darray_node_cdata_get(&tree->nodes) + inode;
   1454     ASSERT(inode < darray_node_size_get(&tree->nodes));
   1455 
   1456     if(sln_node_is_leaf(node)) {
   1457       ASSERT(nsubtrees < QUEUE_SIZE_MAX);
   1458       subtrees[nsubtrees++] = inode;
   1459       inode = stack[--istack]; /* Pop the next node */
   1460 
   1461     } else {
   1462       const size_t ichild0 = inode + node->offset;
   1463       const unsigned nchildren = node_child_count(node, tree->args.arity);
   1464 
   1465       /* Push the children except the 1st one */
   1466       ASSERT(istack + (nchildren-1/*ichild0*/) <= STACK_SIZE);
   1467       FOR_EACH_REVERSE(i, nchildren-1, 0) stack[istack++] = ichild0 + i;
   1468 
   1469       /* Recursively traverse the 1st child */
   1470       inode = ichild0;
   1471     }
   1472   }
   1473 
   1474   /* Retrieves the number of registered nodes. This is the number of nodes from
   1475    * the root to the subtrees, excluding the roots of those subtrees */
   1476   nnodes_upper_tree = darray_node_size_get(&tree->nodes);
   1477   nnodes_upper_tree -= nsubtrees; /* Exclude the root node of the subtrees */
   1478 
   1479   res = build_subtrees(tree, subtrees, nsubtrees, &meshing);
   1480   if(res != RES_OK) goto error;
   1481 
   1482   /* Build the polylines of the upper level if necessary */
   1483   if(nnodes_upper_tree != 0) {
   1484     res = build_polylines
   1485       (tree, 0/*root*/, nnodes_upper_tree, &tree->vertices, &meshing);
   1486     if(res != RES_OK) goto error;
   1487   }
   1488 
   1489   #undef STACK_SIZE
   1490 
   1491 exit:
   1492   return res;
   1493 error:
   1494   goto exit;
   1495 }
   1496 
   1497 /*******************************************************************************
   1498  * Local functions
   1499  ******************************************************************************/
   1500 res_T
   1501 tree_build(struct sln_tree* tree)
   1502 {
   1503   res_T res = RES_OK;
   1504   ASSERT(tree);
   1505 
   1506   res = tree->args.nthreads_hint == 1
   1507     ? tree_build_sequential(tree)
   1508     : tree_build_parallel(tree);
   1509   if(res != RES_OK) goto error;
   1510 
   1511 exit:
   1512   return res;
   1513 error:
   1514   darray_node_purge(&tree->nodes);
   1515   darray_vertex_purge(&tree->vertices);
   1516   goto exit;
   1517 }