sln_tree_build.c (52963B)
1 /* Copyright (C) 2022, 2026 |Méso|Star> (contact@meso-star.com) 2 * Copyright (C) 2026 Université de Lorraine 3 * Copyright (C) 2022 Centre National de la Recherche Scientifique 4 * Copyright (C) 2022 Université Paul Sabatier 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 19 #include "sln.h" 20 #include "sln_device_c.h" 21 #include "sln_polyline.h" 22 #include "sln_tree_c.h" 23 24 #include <star/shtr.h> 25 26 #include <rsys/cstr.h> 27 #include <rsys/math.h> 28 29 #include <omp.h> 30 31 /* Structure used to track the progress of a construction phase */ 32 struct progress { 33 /* Absolute progression */ 34 size_t total; /* Total number of items to process */ 35 size_t processed; /* Number of items already processed */ 36 37 /* Relative progression */ 38 int pcent; /* Currently displayed percentage */ 39 int pcent_step; /* Increment between displayed percentages */ 40 41 const char* msg; /* Message to add before the displayed percentage */ 42 }; 43 #define PROGRESS_DEFAULT__ {0,0,0,10,NULL} 44 static const struct progress PROGRESS_DEFAULT = PROGRESS_DEFAULT__; 45 46 /* Structure containing temporary variables used to create polyline on a leaf 47 * These variables are not declared locally within the function responsible for 48 * constructing this polyline in order to avoid having to allocate and 49 * deallocate them with each call to the function. Instead, they are passed as 50 * function inputs so as to share, as much as possible, the memory space 51 * allocated during previous calls to the function, thereby reducing the cost of 52 * allocation and deallocation. */ 53 struct build_leaf_polyline_scratch { 54 struct darray_vertex vertices; /* Polyline vertices */ 55 struct darray_node nodes; /* Dummy leaf nodes */ 56 57 /* Temp vertex buffer used when merging polylines into a single one */ 58 struct darray_vertex vertices_tmp; 59 }; 60 61 static res_T 62 merge_child_polylines 63 (struct sln_tree* tree, 64 const size_t inode, /* Node at which child polylines are merged */ 65 const unsigned nchildren, /* #children to merge */ 66 struct darray_node* nodes, /* List of nodes */ 67 struct darray_vertex* vertices); /* List of polyline vertices */ 68 69 static res_T 70 collapse_child_polylines 71 (struct sln_tree* tree, 72 const size_t inode, /* Node at which child polylines are merged */ 73 const unsigned nchildren, /* #children to collapse */ 74 struct darray_node* nodes, /* List of nodes */ 75 struct darray_vertex* scratch, /* Temporary buffer of vertices */ 76 struct darray_vertex* vertices); /* List of polyline vertices */ 77 78 /* A multiplier to be applied to the calculated ka values in order to mitigate 79 * numerical issues related to ka interpolation in the case of a tree with an 80 * upper bound. This interpolation must ensure that the interpolated ka value is 81 * well above the actual ka value, which may not be the case due to numerical 82 * inaccuracies. Hence this constant, which defines a percentage of ka to be 83 * added to the calculated ka values to ensure that any interpolated value is 84 * well above the actual ka value. */ 85 #define KA_ADJUSTEMENT 1.01 86 87 /* Maximum queue size used for the breadth-first tree traversal. For a perfectly 88 * balanced tree, this corresponds to the maximum number of leaves a tree can 89 * have. Note that this value does not need to be too high, since the 90 * breadth-first traversal is only used to partition the first few levels of the 91 * tree until a sufficient number of subtrees have been identified so that they 92 * can then be constructed in parallel using a depth-first algorithm */ 93 #define QUEUE_SIZE_MAX 256 94 95 /******************************************************************************* 96 * Helper functions 97 ******************************************************************************/ 98 static FINLINE uint32_t 99 ui64_to_ui32(const uint64_t ui64) 100 { 101 if(ui64 > UINT32_MAX) 102 VFATAL("%s: overflow %lu.\n", ARG2(FUNC_NAME, ((unsigned long)ui64))); 103 return (uint32_t)ui64; 104 } 105 106 static FINLINE unsigned 107 size_t_to_unsigned(const size_t sz) 108 { 109 if(sz > UINT_MAX) 110 VFATAL("%s: overflow %lu.\n", ARG2(FUNC_NAME, ((unsigned long)sz))); 111 return (unsigned)sz; 112 } 113 114 static void 115 progress_print(struct sln_device* dev, const struct progress* progress) 116 { 117 ASSERT(dev && progress); 118 if(progress->msg) { 119 INFO(dev, "%s%3d%%\n", progress->msg, progress->pcent); 120 } else { 121 INFO(dev, "%3d%%\n", progress->pcent); 122 } 123 } 124 125 static void 126 progress_update 127 (struct sln_device* dev, 128 struct progress* progress, 129 const size_t count) 130 { 131 int pcent = 0; 132 ASSERT(dev && progress); 133 134 #pragma omp critical 135 { 136 progress->processed += count; 137 ASSERT(progress->processed <= progress->total); 138 139 pcent = (int) 140 ( (double)progress->processed*100.0 141 / (double)progress->total 142 + 0.5 /* round */); 143 144 if(pcent/progress->pcent_step > progress->pcent/progress->pcent_step) { 145 progress->pcent = pcent; 146 progress_print(dev, progress); 147 } 148 } 149 } 150 151 static INLINE void 152 build_leaf_polyline_scratch_init 153 (struct mem_allocator* allocator, 154 struct build_leaf_polyline_scratch* scratch) 155 { 156 ASSERT(scratch); 157 darray_vertex_init(allocator, &scratch->vertices); 158 darray_vertex_init(allocator, &scratch->vertices_tmp); 159 darray_node_init(allocator, &scratch->nodes); 160 } 161 162 static INLINE void 163 build_leaf_polyline_scratch_release 164 (struct build_leaf_polyline_scratch* scratch) 165 { 166 ASSERT(scratch); 167 darray_vertex_release(&scratch->vertices); 168 darray_vertex_release(&scratch->vertices_tmp); 169 darray_node_release(&scratch->nodes); 170 } 171 172 static INLINE res_T 173 build_leaf_polyline_from_1line 174 (struct sln_tree* tree, 175 struct sln_node* leaf, 176 struct darray_vertex* vertices) 177 { 178 size_t vertices_range[2]; 179 res_T res = RES_OK; 180 181 ASSERT(tree && leaf && vertices); 182 183 /* Assume that there is only one line per leaf */ 184 ASSERT(leaf->range[0] == leaf->range[1]); 185 186 /* Line meshing */ 187 res = line_mesh 188 (/* in */ tree, leaf->range[0], tree->args.nvertices_hint, 189 /* out */ vertices, vertices_range); 190 if(res != RES_OK) goto error; 191 192 /* Decimate the line mesh */ 193 res = polyline_decimate(tree->sln, darray_vertex_data_get(vertices), 194 vertices_range, (float)tree->args.mesh_decimation_err, tree->args.mesh_type); 195 if(res != RES_OK) goto error; 196 197 /* Shrink the size of the vertices */ 198 darray_vertex_resize(vertices, vertices_range[1] + 1); 199 200 /* Setup the leaf polyline */ 201 leaf->ivertex = vertices_range[0]; 202 leaf->nvertices = ui64_to_ui32(vertices_range[1] - vertices_range[0] + 1); 203 204 exit: 205 return res; 206 error: 207 goto exit; 208 } 209 210 /* Build the polyline of a leaf node containing more than one line. 211 * 212 * Each line is drawn as if it were the only one on the leaf. This allows the 213 * line meshing routine to be used, and consequently the algorithm that relies 214 * on the properties of the lines (symmetry, monotonicity on one half). Next, 215 * this set of polylines is merged/collapsed into a single polyline, which is 216 * the leaf polyline. 217 * 218 * To reuse the merging/collapsing designed for internal nodes, the function 219 * treats a leaf as if it were an internal node. A list of temporary nodes is 220 * thus created, containing not only the leaf node but also its “children” 221 * i.e., its lines. 222 * 223 * Once created, the leaf’s polyline is finally copied into the tree’s vertex 224 * buffer */ 225 static INLINE res_T 226 build_leaf_polyline_from_Nlines 227 (struct sln_tree* tree, 228 struct sln_node* leaf, 229 struct build_leaf_polyline_scratch* scratch, 230 struct darray_vertex* vertices) 231 { 232 const struct sln_vertex* src = NULL; 233 struct sln_vertex* dst = NULL; 234 size_t memsz = 0; 235 236 size_t nnodes = 0; 237 unsigned nlines = 0; /* Number of lines in the leaf */ 238 unsigned i = 0; 239 res_T res = RES_OK; 240 241 ASSERT(tree && leaf && scratch && vertices); 242 243 /* Compute the number of lines of the leaf */ 244 nlines = size_t_to_unsigned(leaf->range[1] - leaf->range[0] + 1/*inclusive*/); 245 ASSERT(nlines > 1); /* Assume there is more than one line per leaf */ 246 247 nnodes = nlines + 1/*leaf*/; 248 249 /* Helper macro */ 250 #define NODE(Id) (darray_node_data_get(&scratch->nodes) + (Id)) 251 252 /* Allocate the temporary list of nodes, one node per leaf to which on 253 * additionnal node is added for the leaf */ 254 res = darray_node_resize(&scratch->nodes, nnodes); 255 if(res != RES_OK) goto error; 256 memset(NODE(0), 0, sizeof(struct sln_node)*nnodes); 257 258 /* Clean up the temporary list of vertices */ 259 darray_vertex_clear(&scratch->vertices); 260 261 /* The leaf node will be the first one. Its "child" representing the node of 262 * each lines, will be store after it in the node list */ 263 NODE(0)->offset = 1; 264 NODE(0)->range[0] = leaf->range[0]; 265 NODE(0)->range[1] = leaf->range[1]; 266 267 FOR_EACH(i, 0, nlines) { 268 size_t vertices_range[2] = {0, 0}; /* Range in the vertex buffer */ 269 const size_t ichild = NODE(0)->offset + i; /* Index of the child */ 270 const size_t iline = leaf->range[0] + i; 271 272 /* Mesh the line in the temporary vertex buffer */ 273 res = line_mesh(tree, iline, tree->args.nvertices_hint, &scratch->vertices, 274 vertices_range); 275 if(res != RES_OK) goto error; 276 277 /* Decimate the line mesh */ 278 res = polyline_decimate(tree->sln, 279 darray_vertex_data_get(&scratch->vertices), vertices_range, 280 (float)tree->args.mesh_decimation_err, tree->args.mesh_type); 281 if(res != RES_OK) goto error; 282 283 NODE(ichild)->ivertex = vertices_range[0]; 284 NODE(ichild)->nvertices = ui64_to_ui32(vertices_range[1] - vertices_range[0] + 1); 285 NODE(ichild)->range[0] = iline; 286 NODE(ichild)->range[1] = iline; 287 } 288 289 /* Merge/collapse the polylines of each lines associated to nodes. 290 * These nodes are the "children" of the leaf */ 291 if(tree->args.collapse_polylines) { 292 res = collapse_child_polylines(tree, 0, nlines, 293 &scratch->nodes, &scratch->vertices_tmp, &scratch->vertices); 294 } else { 295 res = merge_child_polylines(tree, 0, nlines, 296 &scratch->nodes, &scratch->vertices); 297 } 298 if(res != RES_OK) goto error; 299 300 /* Setup the leaf */ 301 leaf->ivertex = darray_vertex_size_get(vertices); 302 leaf->nvertices = NODE(0/*leaf*/)->nvertices; 303 304 /* Copy the leaf vertices from temporary buffer to the vertex buffer of the 305 * tree */ 306 res = darray_vertex_resize(vertices, leaf->ivertex + leaf->nvertices); 307 if(res != RES_OK) goto error; 308 src = darray_vertex_cdata_get(&scratch->vertices) + NODE(0/*leaf*/)->ivertex; 309 dst = darray_vertex_data_get(vertices) + leaf->ivertex; 310 memsz = leaf->nvertices * sizeof(*src); 311 memcpy(dst, src, memsz); 312 313 #undef NODE 314 315 exit: 316 return res; 317 error: 318 goto exit; 319 } 320 321 static INLINE res_T 322 build_leaf_polyline 323 (struct sln_tree* tree, 324 struct sln_node* leaf, 325 struct build_leaf_polyline_scratch* scratch, 326 struct darray_vertex* vertices) 327 { 328 size_t nlines = 0; /* Number of lines in the leaf */ 329 ASSERT(tree && leaf); 330 331 nlines = leaf->range[1] - leaf->range[0] + 1; 332 333 if(nlines == 1) { 334 return build_leaf_polyline_from_1line(tree, leaf, vertices); 335 } else { 336 return build_leaf_polyline_from_Nlines(tree, leaf, scratch, vertices); 337 } 338 } 339 340 static INLINE double 341 eval_ka 342 (const struct sln_node* node, 343 const struct sln_vertex* vertices, 344 const double wavenumber) 345 { 346 struct sln_mesh mesh = SLN_MESH_NULL; 347 double ka = 0; 348 ASSERT(node && vertices); 349 350 /* Whether the mesh to be constructed corresponds to the spectrum or its upper 351 * limit, use the node mesh to calculate the value of ka at a given wave 352 * number. Calculating the value from the node lines would take far too long*/ 353 mesh.vertices = vertices + node->ivertex; 354 mesh.nvertices = node->nvertices; 355 ka = sln_mesh_eval(&mesh, wavenumber); 356 return ka; 357 } 358 359 /* Merge all child polylines in a single step. The polylines are merged before 360 * being decimated */ 361 res_T 362 merge_child_polylines 363 (struct sln_tree* tree, 364 const size_t inode, 365 const unsigned nchildren, 366 struct darray_node* nodes, 367 struct darray_vertex* vertex_list) 368 { 369 /* Helper constant */ 370 static const size_t NO_MORE_VERTEX = SIZE_MAX; 371 372 /* Polyline vertices */ 373 #define NCHILDREN_MAX \ 374 ( SLN_TREE_ARITY_MAX > SLN_LEAF_NLINES_MAX \ 375 ? SLN_TREE_ARITY_MAX : SLN_LEAF_NLINES_MAX) 376 size_t children_ivtx[NCHILDREN_MAX] = {0}; 377 378 size_t vertices_range[2] = {0,0}; 379 struct sln_vertex* vertices = NULL; 380 size_t ivtx = 0; 381 size_t nvertices = 0; 382 383 /* Miscellaneous */ 384 unsigned i = 0; 385 res_T res = RES_OK; 386 387 /* Pre-conditions */ 388 ASSERT(tree && inode < darray_node_size_get(nodes)); 389 ASSERT(nchildren >= 2 && nchildren <= NCHILDREN_MAX); 390 391 #define NODE(Id) (darray_node_data_get(nodes) + (Id)) 392 393 /* Compute the number of vertices to be merged, 394 * i.e., the sum of vertices of the children */ 395 nvertices = 0; 396 FOR_EACH(i, 0, nchildren) { 397 const size_t ichild = inode + NODE(inode)->offset + i; 398 nvertices += NODE(ichild)->nvertices; 399 } 400 401 /* Define the vertices range of the merged polyline */ 402 vertices_range[0] = darray_vertex_size_get(vertex_list); 403 vertices_range[1] = vertices_range[0] + nvertices - 1/*inclusive bound*/; 404 405 /* Allocate the memory space to store the new polyline */ 406 res = darray_vertex_resize(vertex_list, vertices_range[1]+1); 407 if(res != RES_OK) { 408 ERROR(tree->sln, "Error in merging polylines -- %s.\n", res_to_cstr(res)); 409 goto error; 410 } 411 vertices = darray_vertex_data_get(vertex_list); 412 413 /* Initialize the vertex index list. For each child, the initial value 414 * corresponds to the index of its first vertex. This index will be 415 * incremented as vertices are merged into the parent polyline. */ 416 FOR_EACH(i, 0, nchildren) { 417 const size_t ichild = inode + NODE(inode)->offset + i; 418 children_ivtx[i] = NODE(ichild)->ivertex; 419 } 420 421 FOR_EACH(ivtx, vertices_range[0], vertices_range[1]+1/*inclusive bound*/) { 422 double ka = 0; 423 double nu = INF; 424 425 /* The number of vertices corresponding to the current wave number for which 426 * the parent ka is calculated. It is at least equal to one, since this nu 427 * is defined by the child vertices, but may be greater if multiple children 428 * share the same vertex, i.e., a ka value calculated for the same nu */ 429 unsigned nvertices_merged = 0; 430 431 /* Find the minimum wave number among the vertices of the child vertices 432 * that are candidates for merging */ 433 FOR_EACH(i, 0, nchildren) { 434 const size_t child_ivtx = children_ivtx[i]; 435 if(child_ivtx != NO_MORE_VERTEX) { 436 nu = MMIN(nu, vertices[child_ivtx].wavenumber); 437 } 438 } 439 ASSERT(nu != INF); /* At least one vertex must have been found */ 440 441 /* Compute the value of ka at the wave number determined above */ 442 FOR_EACH(i, 0, nchildren) { 443 const size_t child_ivtx = children_ivtx[i]; 444 const struct sln_node* child = NODE(inode + NODE(inode)->offset + i); 445 446 if(child_ivtx == NO_MORE_VERTEX 447 || nu != vertices[child_ivtx].wavenumber) { 448 /* The wave number does not correspond to a vertex in the current 449 * child's mesh. Therefore, its contribution to the parent node's ka is 450 * computed */ 451 ka += eval_ka(child, vertices, nu); 452 453 } else { 454 /* The wave number is the one for which the child node stores a ka 455 * value. Add it to the parent node's ka value and designate the child's 456 * next vertex as a candidate for merging into the parent. The exception 457 * is when all vertices of the child have already been merged. In this 458 * case, report that the child no longer has any candidate vertices */ 459 ka += vertices[child_ivtx].ka; 460 ++children_ivtx[i]; 461 if(children_ivtx[i] >= child->ivertex + child->nvertices) { 462 children_ivtx[i] = NO_MORE_VERTEX; 463 } 464 ++nvertices_merged; /* Record that a vertex has been merged */ 465 } 466 } 467 468 /* Setup the parent vertex */ 469 vertices[ivtx].wavenumber = (float)nu; 470 vertices[ivtx].ka = (float)ka; 471 472 /* If multiple child vertices have been merged, then a single wave number 473 * corresponds to a vertex with multiple children. The number of parent 474 * vertices is therefore no longer the sum of the number of its children's 475 * vertices, since some vertices are duplicated. Hence the following 476 * adjustment, which removes the duplicate vertices. */ 477 vertices_range[1] -= (nvertices_merged-1); 478 } 479 480 /* Decimate the resulting polyline */ 481 res = polyline_decimate(tree->sln, darray_vertex_data_get(vertex_list), 482 vertices_range, (float)tree->args.mesh_decimation_err, tree->args.mesh_type); 483 if(res != RES_OK) goto error; 484 485 /* Setup the node polyline */ 486 NODE(inode)->ivertex = vertices_range[0]; 487 NODE(inode)->nvertices = ui64_to_ui32(vertices_range[1] - vertices_range[0] + 1); 488 489 /* It is necessary to ensure that the recorded vertices define a polyline 490 * along which any value (calculated by linear interpolation) is well above 491 * the sum of the corresponding values of the polylines to be merged. However, 492 * although this is guaranteed by definition for the vertices of the polyline, 493 * numerical uncertainty may nevertheless introduce errors that violate this 494 * criterion. Hence the following adjustment, which slightly increases the ka 495 * of the mesh so as to guarantee this constraint between the mesh of a node 496 * and that of its children */ 497 if(tree->args.mesh_type == SLN_MESH_UPPER) { 498 FOR_EACH(ivtx, vertices_range[0], vertices_range[1]+1/*inclusive bound*/) { 499 const double ka = vertices[ivtx].ka; 500 vertices[ivtx].ka = (float)(ka*KA_ADJUSTEMENT); 501 } 502 } 503 504 /* Shrink the size of the vertices */ 505 darray_vertex_resize(vertex_list, vertices_range[1] + 1); 506 507 #undef NODE 508 #undef NCHILDREN_MAX 509 510 exit: 511 return res; 512 error: 513 goto exit; 514 } 515 516 /* Merge child polylines by combining them in pairs (the resulting polyline is 517 * then simplified), and repeat this process until only a single polyline 518 * remains. This polyline becomes the node's polyline */ 519 static res_T 520 collapse_child_polylines 521 (struct sln_tree* tree, 522 const size_t inode, 523 const unsigned nchildren, 524 struct darray_node* nodes, 525 struct darray_vertex* scratch, 526 struct darray_vertex* vertex_list) 527 { 528 /* Polylines to be collapsed, i.e., the ids of their first and last vertices */ 529 #define NCHILDREN_MAX \ 530 ( SLN_TREE_ARITY_MAX > SLN_LEAF_NLINES_MAX \ 531 ? SLN_TREE_ARITY_MAX : SLN_LEAF_NLINES_MAX) 532 size_t poly_parts[NCHILDREN_MAX][2] = {0}; 533 size_t nparts; 534 535 /* Indices of the first and last vertices of the resulting polyline. 536 * These indices are absolute to the tree's vertex list */ 537 size_t vertices_range[2] = {0,0}; 538 539 /* Redux double buffering */ 540 struct sln_vertex* buf[2] = {NULL, NULL}; 541 int r, w; /* Index of the buffer in read/write */ 542 543 /* Miscellaneous */ 544 struct sln_vertex* vertices = NULL; /* Pointer to the tree's vertex buffer */ 545 size_t range_merge[2] = {0,0}; /* vertex range of a merged polyline */ 546 size_t nvertices = 0; /* #vertices of the resulting polyline */ 547 size_t ivtx = 0; 548 unsigned i = 0; 549 int ncollapses = 0; /* Number of collapse steps */ 550 res_T res = RES_OK; 551 552 /* Pre-conditions */ 553 ASSERT(tree && inode < darray_node_size_get(nodes)); 554 ASSERT(nchildren >= 2 && nchildren <= NCHILDREN_MAX); 555 556 #define NODE(Id) (darray_node_data_get(nodes) + (Id)) 557 558 /* Compute the number of vertices to be merged, 559 * i.e., the sum of vertices of the children */ 560 nvertices = 0; 561 FOR_EACH(i, 0, nchildren) { 562 const size_t ichild = inode + NODE(inode)->offset + i; 563 nvertices += NODE(ichild)->nvertices; 564 } 565 566 /* Define the vertices range of the merged polyline */ 567 vertices_range[0] = darray_vertex_size_get(vertex_list); 568 vertices_range[1] = vertices_range[0] + nvertices - 1/*inclusive bound*/; 569 570 /* Allocate the memory space required to store the new polyline and the 571 * temporary buffer. This is the memory space in which the collapse 572 * procedure's double buffering will take place. Each buffer will be used 573 * alternately as a read/write buffer when reducing the polylines of the 574 * child nodes */ 575 if((res = darray_vertex_resize(vertex_list, vertices_range[1]+1)) != RES_OK 576 || (res = darray_vertex_resize(scratch, nvertices)) != RES_OK) { 577 ERROR(tree->sln, "Error in merging polylines -- %s.\n", res_to_cstr(res)); 578 goto error; 579 } 580 581 /* Recover the memory space of the tree's vertices */ 582 vertices = darray_vertex_data_get(vertex_list); 583 584 /* Recover the memory space to be used in the Redux process. 585 * 586 * In the vertex buffer, this refers to the newly allocated memory space used 587 * during the collapse. The beginning of the buffer contains the vertices of 588 * the registered nodes and must therefore not be modified. At the end of the 589 * collapse, this space will contain the vertices of the polylines resulting 590 * from the collapse of the child nodes' polylines. 591 * 592 * The scratch buffer is used as-is, since its sole purpose is to temporarily 593 * store the vertices of the polylines to be merged. */ 594 buf[0] = vertices + vertices_range[0]; 595 buf[1] = darray_vertex_data_get(scratch); 596 597 /* Initially, the number of partitions to be reduced is equal to the number of 598 * children */ 599 nparts = nchildren; 600 601 /* Calculate the number of reduction steps, i.e., the logarithm of the power 602 * of 2 that is greater than or equal to the number of polylines to be 603 * reduced, to which one is added to obtain the number of steps, not the index 604 * of the last step */ 605 ncollapses = log2i((int)round_up_pow2(nparts)) + 1; 606 607 /* Set the index of the write buffer so that, once the collapse process is 608 * complete, the last write buffer is the one whose memory space is allocated 609 * in the tree's vertex buffer. This way, no additional copies will be needed 610 * to store the result of the reduction */ 611 w = ncollapses % 2 ? 0 : 1; 612 613 /* Initialize the polylines to be reduced, i.e., copy the child vertices into 614 * a collapse buffer and record their index ranges */ 615 ivtx = 0; 616 for(i = 0; i < nchildren; ++i) { 617 const size_t ichild = inode + NODE(inode)->offset + i; 618 const struct sln_node* child = NODE(ichild); 619 const size_t memsz = child->nvertices * sizeof(struct sln_vertex); 620 const struct sln_vertex* src = vertices + child->ivertex; 621 struct sln_vertex* dst = buf[w] + ivtx; 622 623 memcpy(dst, src, memsz); 624 poly_parts[i][0] = ivtx; 625 poly_parts[i][1] = ivtx + child->nvertices - 1/*inclusive bound*/; 626 627 ivtx += child->nvertices; 628 } 629 630 r = w; /* Index of the buffer from which to read the data */ 631 w =!r; /* Index of the buffer into which to write the data */ 632 633 634 /* As long as the number of segments is not equal to one, there are still 635 * polylines to be merged in pairs */ 636 while(nparts > 1) { 637 size_t ipart = 0; 638 639 for(ipart=0; ipart < nparts-1; ipart+=2) { 640 struct sln_mesh mesh0 = SLN_MESH_NULL; 641 struct sln_mesh mesh1 = SLN_MESH_NULL; 642 643 /* Retrieve the two polylines to be merged */ 644 const size_t* part0 = poly_parts[ipart+0]; 645 const size_t* part1 = poly_parts[ipart+1]; 646 647 /* Calculate the partition index of the merged polyline, that is, the index 648 * that will contain the information for the polyline resulting from the 649 * merge: the first and last indices of its in the vertex array currently 650 * being written */ 651 const size_t ipart_merge = ipart/2; 652 653 /* Compute the number of vertices of the merged polyline */ 654 const size_t nvtx = 655 ((part0[1] - part0[0]) + 1/*inclusive bound*/) 656 + ((part1[1] - part1[0]) + 1/*inclusive bound*/); 657 658 /* For each child, initialized its vertex index to its first vertex. This 659 * index will be incremented as vertices are merged into the merged 660 * polyline */ 661 size_t ivtx0 = part0[0]; 662 size_t ivtx1 = part1[0]; 663 664 /* Set the vertex index range for the merged polyline in the write buffer */ 665 range_merge[0] = part0[0]; 666 range_merge[1] = range_merge[0] + nvtx-1/*inclusive bound*/; 667 668 /* Setup the mesh of the two polylines to be merged */ 669 mesh0.vertices = buf[r] + part0[0]; mesh0.nvertices = part0[1]-part0[0]+1; 670 mesh1.vertices = buf[r] + part1[0]; mesh1.nvertices = part1[1]-part1[0]+1; 671 672 /* Merge the polylines */ 673 FOR_EACH(ivtx, range_merge[0], range_merge[1]+1/*inclusive bound*/) { 674 const double nu0 = ivtx0 <= part0[1] ? buf[r][ivtx0].wavenumber : INF; 675 const double nu1 = ivtx1 <= part1[1] ? buf[r][ivtx1].wavenumber : INF; 676 double ka = 0; 677 double nu = 0; 678 679 /* Find the minimum wave number among the vertices of the child vertices 680 * that are candidates for merging */ 681 if(nu0 < nu1) { /* The vertex comes from the child0 */ 682 nu = buf[r][ivtx0].wavenumber; 683 ka = buf[r][ivtx0].ka + sln_mesh_eval(&mesh1, nu); 684 ++ivtx0; 685 686 } else if(nu0 > nu1) { /* The vertex comes from the child1 */ 687 nu = buf[r][ivtx1].wavenumber; 688 ka = buf[r][ivtx1].ka + sln_mesh_eval(&mesh0, nu); 689 ++ivtx1; 690 691 } else { /* The vertex is shared by node0 and node1 */ 692 nu = buf[r][ivtx0].wavenumber; 693 ka = buf[r][ivtx0].ka + buf[r][ivtx1].ka; 694 --range_merge[1]; /* Remove duplicate */ 695 ++ivtx0; 696 ++ivtx1; 697 } 698 buf[w][ivtx].wavenumber = (float)nu; 699 buf[w][ivtx].ka = (float)ka; 700 } 701 702 /* Decimate the resulting polyline */ 703 res = polyline_decimate(tree->sln, buf[w], range_merge, 704 (float)tree->args.mesh_decimation_err, tree->args.mesh_type); 705 if(res != RES_OK) goto error; 706 707 /* Setup the partition of the merge polyline */ 708 poly_parts[ipart_merge][0] = range_merge[0]; 709 poly_parts[ipart_merge][1] = range_merge[1]; 710 } 711 712 /* If there is a polyline that has not been merged, copy its vertices to the 713 * write buffer so that it can be processed in the next reduction step */ 714 if(nparts % 2) { 715 const size_t* remain_part = poly_parts[nparts-1]; 716 const size_t nvtx = remain_part[1] - remain_part[0] + 1/*inclusive*/; 717 const size_t memsz = nvtx * sizeof(struct sln_vertex); 718 const struct sln_vertex* src = buf[r] + remain_part[0]; 719 struct sln_vertex* dst = buf[w] + remain_part[0]; 720 721 memcpy(dst, src, memsz); 722 poly_parts[nparts/2][0] = remain_part[0]; 723 poly_parts[nparts/2][1] = remain_part[1]; 724 } 725 726 #ifndef NDEBUG 727 FOR_EACH(i, 1, (nparts+1/*ceil*/)/2) { 728 ASSERT(poly_parts[i][0] > poly_parts[i-1][1]); 729 } 730 #endif 731 732 /* Update the number of partitions to be reduced */ 733 nparts = (nparts + 1/*ceil*/)/2; 734 735 /* Swap read/write buffers */ 736 r = !r; 737 w = !w; 738 } 739 740 nvertices = (range_merge[1] - range_merge[0]) + 1/*inclusive bound*/; 741 vertices_range[1] = vertices_range[0] + nvertices - 1/*inclusive bound*/; 742 743 /* Assumed that double buffering was configured to ensure that the resulting 744 * polyline is stored in the tree vertex buffer */ 745 ASSERT(buf[r] != darray_vertex_cdata_get(scratch)); 746 747 /* Setup the node */ 748 NODE(inode)->ivertex = vertices_range[0]; 749 NODE(inode)->nvertices = ui64_to_ui32(nvertices); 750 751 /* It is necessary to ensure that the recorded vertices define a polyline 752 * along which any value (calculated by linear interpolation) is well above 753 * the sum of the corresponding values of the polylines to be merged. However, 754 * although this is guaranteed by definition for the vertices of the polyline, 755 * numerical uncertainty may nevertheless introduce errors that violate this 756 * criterion. Hence the following adjustment, which slightly increases the ka 757 * of the mesh so as to guarantee this constraint between the mesh of a node 758 * and that of its children */ 759 if(tree->args.mesh_type == SLN_MESH_UPPER) { 760 FOR_EACH(ivtx, vertices_range[0], vertices_range[1]+1/*inclusive bound*/) { 761 const double ka = vertices[ivtx].ka; 762 vertices[ivtx].ka = (float)(ka*KA_ADJUSTEMENT); 763 } 764 } 765 766 /* Shrink the size of the vertices */ 767 darray_vertex_resize(vertex_list, vertices_range[1] + 1); 768 769 #undef NCHILDREN 770 #undef NODE 771 772 exit: 773 return res; 774 error: 775 goto exit; 776 } 777 static res_T 778 build_polylines 779 (struct sln_tree* tree, 780 const size_t root_index, 781 const size_t nodes_count, /* Total number of nodes in the tree (for debug) */ 782 struct darray_vertex* vertices, 783 struct progress* progress) 784 { 785 /* Stack */ 786 #define STACK_SIZE (SLN_TREE_DEPTH_MAX*SLN_TREE_ARITY_MAX) 787 size_t stack[STACK_SIZE]; 788 size_t istack = 0; 789 790 /* Progress */ 791 size_t nnodes_processed = 0; 792 793 /* Miscellaneous */ 794 struct build_leaf_polyline_scratch scratch; 795 size_t inode = 0; 796 res_T res = RES_OK; 797 798 ASSERT(tree && nodes_count != 0); 799 ASSERT(root_index < darray_node_size_get(&tree->nodes)); 800 (void)nodes_count; /* Avoid "Unused variable" warning */ 801 802 build_leaf_polyline_scratch_init(tree->sln->allocator, &scratch); 803 804 #define NODE(Id) (darray_node_data_get(&tree->nodes) + (Id)) 805 #define IS_LEAF(Id) (NODE(Id)->offset == 0) 806 807 /* Push back SIZE_MAX which, once pop up, will mark the end of recursion */ 808 stack[istack++] = SIZE_MAX; 809 810 inode = root_index; /* Root node */ 811 while(inode != SIZE_MAX) { 812 const size_t istack_saved = istack; 813 814 if(IS_LEAF(inode)) { 815 res = build_leaf_polyline(tree, NODE(inode), &scratch, vertices); 816 if(res != RES_OK) goto error; 817 818 inode = stack[--istack]; /* Pop the next node */ 819 820 } else { 821 const size_t ichild0 = inode + NODE(inode)->offset + 0; 822 const struct sln_node* node = darray_node_cdata_get(&tree->nodes)+inode; 823 const unsigned nchildren = node_child_count(node, tree->args.arity); 824 int child_polylines_are_missing = 1; 825 size_t i = 0; 826 827 FOR_EACH(i, 0, nchildren) { 828 child_polylines_are_missing = NODE(ichild0 + i)->nvertices == 0; 829 if(child_polylines_are_missing) break; 830 } 831 832 /* Child nodes have their polyline created */ 833 if(!child_polylines_are_missing) { 834 #ifndef NDEBUG 835 /* Check that all children have their polylines created */ 836 FOR_EACH(i, 1, nchildren) { 837 const size_t ichild = ichild0 + i; 838 ASSERT(NODE(ichild)->nvertices != 0); 839 } 840 #endif 841 if(tree->args.collapse_polylines) { 842 res = collapse_child_polylines(tree, inode, nchildren, &tree->nodes, 843 &scratch.vertices_tmp, vertices); 844 } else { 845 res = merge_child_polylines 846 (tree, inode, nchildren, &tree->nodes, vertices); 847 } 848 if(res != RES_OK) goto error; 849 850 inode = stack[--istack]; /* Pop the next node */ 851 852 /* Child nodes have NOT their polyline created */ 853 } else { 854 ASSERT(istack + (nchildren - 1/*ichild0*/ + 1/*inode*/) <= STACK_SIZE); 855 stack[istack++] = inode; /* Push the current node */ 856 857 /* Push the child nodes, except for those whose polyline has already 858 * been constructed, as is the case when the child node was the root 859 * of a separately constructed subtree */ 860 FOR_EACH_REVERSE(i, nchildren, 0) { 861 const size_t ichild = ichild0 + i-1; 862 if(NODE(ichild)->nvertices == 0) stack[istack++] = ichild; 863 } 864 865 /* Ensure that at least one child was pushed */ 866 ASSERT(stack[istack-1] != inode); 867 868 inode = stack[--istack]; /* Recursively build the polyline of the 1st child */ 869 } 870 } 871 872 /* Handle progression bar */ 873 if(istack < istack_saved) { 874 size_t nnodes = istack_saved - istack; 875 876 nnodes_processed += nnodes; 877 progress_update(tree->sln, progress, nnodes); 878 } 879 } 880 ASSERT(nnodes_processed == nodes_count); 881 882 #undef NODE 883 #undef IS_LEAF 884 #undef LOG_MSG 885 #undef STACK_SIZE 886 887 exit: 888 build_leaf_polyline_scratch_release(&scratch); 889 return res; 890 error: 891 goto exit; 892 } 893 894 static res_T 895 partition_lines_depth_first 896 (struct sln_tree* tree, 897 const size_t root_index, 898 struct darray_node* nodes, 899 struct progress* progress) 900 { 901 /* Stack */ 902 #define STACK_SIZE (SLN_TREE_DEPTH_MAX*(SLN_TREE_ARITY_MAX-1)) 903 size_t stack[STACK_SIZE]; 904 size_t istack = 0; 905 906 /* Progress */ 907 size_t nlines_total = 0; 908 size_t nlines_processed = 0; 909 910 /* Miscellaneous */ 911 size_t inode = 0; 912 res_T res = RES_OK; 913 914 /* Pre-condition */ 915 ASSERT(tree && nodes && progress); 916 ASSERT(root_index < darray_node_size_get(nodes)); 917 (void)nlines_total; /* Avoid "Unused variable" warning */ 918 919 #define NODE(Id) (darray_node_data_get(nodes) + (Id)) 920 #define CREATE_NODE { \ 921 res = darray_node_push_back(nodes, &SLN_NODE_NULL); \ 922 if(res != RES_OK) goto error; \ 923 } (void)0 924 925 nlines_total = NODE(root_index)->range[1] - NODE(root_index)->range[0] + 1; 926 nlines_processed = 0; 927 928 /* Push back SIZE_MAX which, once pop up, will mark the end of recursion */ 929 stack[istack++] = SIZE_MAX; 930 931 inode = root_index; /* Root node */ 932 while(inode != SIZE_MAX) { 933 /* #lines into the node */ 934 size_t nlines_node = NODE(inode)->range[1] - NODE(inode)->range[0] + 1; 935 936 /* Make a leaf */ 937 if(nlines_node <= tree->args.leaf_nlines) { 938 939 NODE(inode)->offset = 0; 940 inode = stack[--istack]; /* Pop the next node */ 941 942 nlines_processed += nlines_node; /* For debug */ 943 944 progress_update(tree->sln, progress, nlines_node); 945 946 /* Split the node */ 947 } else { 948 size_t node_range[2] = {0,0}; 949 size_t nlines_child_min = 0; /* Min #lines per child */ 950 size_t nlines_remain = 0; 951 size_t nchildren = 0; 952 size_t iline = 0; 953 size_t i = 0; 954 955 /* Calculate the index of the first child */ 956 size_t ichildren = darray_node_size_get(nodes); 957 958 /* Compute how the number of children that node has */ 959 nchildren = node_child_count(NODE(inode), tree->args.arity); 960 961 ASSERT(nchildren <= tree->args.arity); 962 ASSERT(ichildren > inode); 963 964 node_range[0] = NODE(inode)->range[0]; 965 node_range[1] = NODE(inode)->range[1]; 966 967 /* Define the offset from the current node to its children */ 968 NODE(inode)->offset = ui64_to_ui32((uint64_t)(ichildren - inode)); 969 970 nlines_child_min = nlines_node / nchildren; 971 nlines_remain = nlines_node % nchildren; 972 973 iline = node_range[0]; 974 FOR_EACH(i, 0, nchildren) { 975 /* Compute the number of lines per child. Start by assigning the minimum 976 * number of lines to each child, then distribute the remaining lines 977 * among the first children */ 978 size_t nlines_child = nlines_child_min + (i < nlines_remain); 979 980 CREATE_NODE; 981 982 /* Set the range of lines line for the newly created child. Note that 983 * the boundaries of the range are inclusive, which is why 1 is 984 * subtracted to the upper bound */ 985 NODE(ichildren+i)->range[0] = iline; 986 NODE(ichildren+i)->range[1] = iline + nlines_child - 1/*inclusive bound*/; 987 iline += nlines_child; 988 989 /* Check that the child's lines are a subset of the parent's lines */ 990 ASSERT(NODE(ichildren+i)->range[0] >= node_range[0]); 991 ASSERT(NODE(ichildren+i)->range[1] <= node_range[1]); 992 } 993 994 inode = ichildren; /* Make the first child the current node */ 995 996 /* Push the other children */ 997 ASSERT(istack + (nchildren-1/*1st child*/) <= STACK_SIZE); 998 FOR_EACH_REVERSE(i, nchildren-1, 0) stack[istack++] = ichildren + i; 999 } 1000 } 1001 ASSERT(nlines_processed == nlines_total); 1002 1003 #undef NODE 1004 #undef CREATE_NODE 1005 #undef STACK_SIZE 1006 1007 exit: 1008 return res; 1009 error: 1010 goto exit; 1011 } 1012 1013 static res_T 1014 partition_lines_breadth_first 1015 (struct sln_tree* tree, 1016 const size_t root_index, 1017 const unsigned nleaves_max_hint) /* Advice on the maxium of leafs to create */ 1018 { 1019 /* Static memory space of the queue */ 1020 struct item { 1021 struct list_node link; 1022 size_t inode; 1023 } items[QUEUE_SIZE_MAX]; 1024 1025 /* Linked lists for managing queue data */ 1026 struct list_node free_items; 1027 struct list_node queue; 1028 size_t nnodes = 0; /* Number of enqueud nodes */ 1029 1030 /* Miscellaneous */ 1031 size_t nlines_total = 0; 1032 size_t nleaves = 0; 1033 size_t i = 0; 1034 res_T res = RES_OK; 1035 1036 /* Pre-conditions */ 1037 ASSERT(tree); 1038 ASSERT(root_index < darray_node_size_get(&tree->nodes)); 1039 1040 /* Macros to help in managing nodes */ 1041 #define NODE(Id) (darray_node_data_get(&tree->nodes) + (Id)) 1042 #define CREATE_NODE { \ 1043 res = darray_node_push_back(&tree->nodes, &SLN_NODE_NULL); \ 1044 if(res != RES_OK) goto error; \ 1045 } (void)0 1046 1047 /* Helper macro for the queue data structure */ 1048 #define ENQUEUE(INode) { \ 1049 struct item* item__ = NULL; \ 1050 ASSERT(!is_list_empty(&free_items)); \ 1051 item__ = CONTAINER_OF(list_head(&free_items), struct item, link); \ 1052 item__->inode = INode; \ 1053 list_move_tail(&item__->link, &queue); \ 1054 ++nnodes; \ 1055 } 1056 #define DEQUEUE \ 1057 (list_move_tail(list_head(&queue), &free_items), \ 1058 --nnodes, \ 1059 CONTAINER_OF(list_tail(&free_items), struct item, link)->inode) 1060 1061 /* Setup the linked lists */ 1062 list_init(&free_items); 1063 list_init(&queue); 1064 FOR_EACH(i, 0, sizeof(items)/sizeof(items[0])) { 1065 items[i].inode = SIZE_MAX; 1066 list_init(&items[i].link); 1067 list_add_tail(&free_items, &items[i].link); 1068 } 1069 1070 SHTR(line_list_get_size(tree->args.lines, &nlines_total)); 1071 1072 /* Setup the root node */ 1073 NODE(root_index)->range[0] = 0; 1074 NODE(root_index)->range[1] = nlines_total - 1; 1075 1076 /* Register the root node in the queue */ 1077 ENQUEUE(root_index); 1078 1079 /* Breadth-first partitioning */ 1080 while(!is_list_empty(&queue)) { 1081 size_t node_range[2] = {0,0}; 1082 size_t nlines_node = 0; /* #lines in the node */ 1083 size_t nlines_child_min = 0; /* Min #lines per child */ 1084 size_t nlines_remain = 0; /* Lines to be distributed among the children */ 1085 size_t nchildren = 0; /* #children of the node */ 1086 size_t ichildren = 0; /* Index of the node's first child */ 1087 size_t iline = 0; 1088 size_t inode = 0; 1089 1090 1091 /* Check whether the number of registered leaves and currently processed 1092 * nodes is greater than or equal to the recommended maximum number of 1093 * leaves. If so, stop breadth-first partitioning and treat all registered 1094 * nodes as leaves. The policy is therefore to accept more leaves than 1095 * expected */ 1096 if(nnodes + nleaves >= nleaves_max_hint) { 1097 nleaves += nnodes; 1098 break; 1099 } 1100 1101 inode = DEQUEUE; /* Get the current node */ 1102 1103 /* Retrieve the range of lines contained within the node */ 1104 node_range[0] = NODE(inode)->range[0]; 1105 node_range[1] = NODE(inode)->range[1]; 1106 nlines_node = node_range[1] - node_range[0] + 1/*inclusive bound*/; 1107 ASSERT(nlines_node >= tree->args.leaf_nlines); 1108 1109 if(nlines_node <= tree->args.leaf_nlines) { 1110 /* Make a leaf */ 1111 ++nleaves; 1112 continue; 1113 } 1114 1115 /* Compute the number of children that node has */ 1116 nchildren = node_child_count(NODE(inode), tree->args.arity); 1117 ASSERT(nchildren <= tree->args.arity); 1118 1119 /* Check whether, after adding the child nodes to the queue, the total 1120 * number of nodes in the queue, plus the number of leaves already created, 1121 * does not exceed the max size of the queue. If so, the breadth-first 1122 * partitioning is complete and the nodes currently in the queue are all 1123 * leaves */ 1124 if(nnodes + nchildren + nleaves > QUEUE_SIZE_MAX) { 1125 nleaves += nnodes + 1/*Do not forget the the current node*/; 1126 break; 1127 } 1128 1129 /* Calculate the index of the first child */ 1130 ichildren = darray_node_size_get(&tree->nodes); 1131 ASSERT(ichildren > inode); 1132 1133 /* Define the offset from the current node to its children */ 1134 NODE(inode)->offset = ui64_to_ui32((uint64_t)(ichildren - inode)); 1135 1136 /* Calculate the minimum number of lines per child and the number of 1137 * remaining lines to be distributed equally among them */ 1138 nlines_child_min = nlines_node / nchildren; 1139 nlines_remain = nlines_node % nchildren; 1140 1141 iline = node_range[0]; 1142 FOR_EACH(i, 0, nchildren) { 1143 /* Compute the number of lines per child. Start by assigning the minimum 1144 * number of lines to each child, then distribute the remaining lines 1145 * among the first children */ 1146 size_t nlines_child = nlines_child_min + (i < nlines_remain); 1147 1148 CREATE_NODE; 1149 1150 /* Set the range of lines line for the newly created child. Note that 1151 * the boundaries of the range are inclusive, which is why 1 is 1152 * subtracted to the upper bound */ 1153 NODE(ichildren+i)->range[0] = iline; 1154 NODE(ichildren+i)->range[1] = iline + nlines_child - 1/*inclusive bound*/; 1155 iline += nlines_child; 1156 1157 /* Check that the child's lines are a subset of the parent's lines */ 1158 ASSERT(NODE(ichildren+i)->range[0] >= node_range[0]); 1159 ASSERT(NODE(ichildren+i)->range[1] <= node_range[1]); 1160 1161 ENQUEUE(ichildren+i); /* Register the child node */ 1162 } 1163 } 1164 1165 #undef NODE 1166 #undef CREATE_NODE 1167 #undef ENQUEUE 1168 #undef DEQUEUE 1169 1170 exit: 1171 return res; 1172 error: 1173 goto exit; 1174 } 1175 1176 static res_T 1177 build_subtrees 1178 (struct sln_tree* tree, 1179 const size_t subtrees[], 1180 unsigned nsubtrees, 1181 struct progress* meshing) 1182 { 1183 /* Subtree temporary data */ 1184 struct scratch { 1185 struct darray_node nodes; 1186 struct darray_vertex vertices; 1187 size_t nnodes; 1188 }* scratches; 1189 1190 /* Miscellaneous */ 1191 struct progress partitioning = PROGRESS_DEFAULT; 1192 struct mem_allocator* allocator = NULL; 1193 size_t nlines = 0; 1194 unsigned nthreads = 0; 1195 int i = 0; 1196 ATOMIC res = RES_OK; 1197 1198 /* Pre-conditions */ 1199 ASSERT(tree && subtrees && nsubtrees >= 1); 1200 ASSERT(nsubtrees < INT_MAX); 1201 1202 #define NODE(Id) (darray_node_data_get(&tree->nodes) + (Id)) 1203 #define IS_LEAF(Buf, Id) (darray_node_cdata_get(Buf)[Id].offset == 0) 1204 1205 allocator = tree->sln->allocator; 1206 1207 /* Allocate the per subtree scratch */ 1208 scratches = MEM_CALLOC(allocator, nsubtrees, sizeof(*scratches)); 1209 if(!scratches) { res = RES_MEM_ERR; goto error; } 1210 FOR_EACH(i, 0, (int)nsubtrees) { 1211 darray_node_init(allocator, &scratches[i].nodes); 1212 darray_vertex_init(allocator, &scratches[i].vertices); 1213 } 1214 1215 /* Setup the progress bar and print that although nothing has been done yet, 1216 * the calculation is nevertheless in progress */ 1217 SHTR(line_list_get_size(tree->args.lines, &nlines)); 1218 partitioning.total = nlines; 1219 partitioning.msg = "Partitioning: "; 1220 progress_print(tree->sln, &partitioning); 1221 1222 /* Set the number of threads to use. The advice on the number of threads must 1223 * not exceed the number of threads available on the machine (see the 1224 * sln_tree_create function); the caller can only specify a lower number of 1225 * threads. urthermore, the number of subtrees is calculated so that each subtree 1226 * corresponds to at least one thread, ensuring that there are no more 1227 * subtrees than available threads. 1228 * 1229 * The actual number of threads to be used for constructing the subtrees in 1230 * parallel should therefore always be set to the number of subtrees. However, 1231 * to provide greater flexibility in the distribution of threads or the number 1232 * of subtrees, the number of threads is calculated below as the minimum 1233 * between the number of available threads and the number of subtrees */ 1234 nthreads = MMIN(tree->args.nthreads_hint, nsubtrees); 1235 omp_set_num_threads((int)nthreads); 1236 1237 #pragma omp parallel for 1238 for(i=0; i < (int)nsubtrees; ++i) { 1239 struct sln_node root = SLN_NODE_NULL; 1240 struct scratch* scratch = scratches + i; 1241 size_t nnodes_s = 0; 1242 size_t nnodes_t = 0; 1243 res_T res2 = RES_OK; 1244 1245 /* Skip the remaining subtrees in case of an error */ 1246 if(ATOMIC_GET(&res) != RES_OK) continue; 1247 1248 /* Copy the subtree root in the temporary node buffer */ 1249 #pragma omp critical 1250 { 1251 root = *NODE(subtrees[i]); 1252 } 1253 res2 = darray_node_push_back(&scratch->nodes, &root); 1254 if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; } 1255 1256 /* Partition the line */ 1257 res2 = partition_lines_depth_first 1258 (tree, 0/*root*/, &scratch->nodes, &partitioning); 1259 if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; } 1260 1261 #pragma omp critical 1262 { 1263 struct sln_node* dst = NULL; 1264 const struct sln_node* src = NULL; 1265 1266 /* Increase the node buffer to store the subtree nodes */ 1267 nnodes_s = darray_node_size_get(&scratch->nodes); 1268 nnodes_t = darray_node_size_get(&tree->nodes); 1269 res2 = darray_node_resize(&tree->nodes, nnodes_t + nnodes_s - 1/*root*/); 1270 if(res2 == RES_OK) { 1271 1272 if(!IS_LEAF(&scratch->nodes, 0)) { 1273 /* Set the offset to the first child of the root node of the subtree */ 1274 NODE(subtrees[i])->offset = ui64_to_ui32(nnodes_t - subtrees[i]); 1275 } 1276 1277 /* Copy the subtree nodes in the main node buffer */ 1278 src = darray_node_cdata_get(&scratch->nodes) + 1/*discard root*/; 1279 dst = darray_node_data_get(&tree->nodes) + nnodes_t; 1280 memcpy(dst, src, (nnodes_s-1/*discard root*/)*sizeof(*src)); 1281 } 1282 } 1283 if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; } 1284 1285 /* Free the temporary buffer but retain the number of nodes in the subtree. 1286 * This number is used later when constructing polylines (see below) */ 1287 darray_node_purge(&scratch->nodes); 1288 scratch->nnodes = nnodes_s; 1289 } 1290 1291 /* Setup the progress bar and print that although nothing has been done yet, 1292 * the calculation is nevertheless in progress */ 1293 *meshing = PROGRESS_DEFAULT; 1294 meshing->total = darray_node_size_get(&tree->nodes); 1295 meshing->msg = "Meshing: "; 1296 progress_print(tree->sln, meshing); 1297 1298 #pragma omp parallel for 1299 for(i=0; i < (int)nsubtrees; ++i) { 1300 struct scratch* scratch = scratches + i; 1301 size_t inode = subtrees[i]; 1302 size_t nverts_s = 0; 1303 size_t nverts_t = 0; 1304 size_t j = 0; 1305 res_T res2 = RES_OK; 1306 1307 /* Skip the remaining subtrees in case of an error */ 1308 if(ATOMIC_GET(&res) != RES_OK) continue; 1309 1310 res2 = build_polylines 1311 (tree, inode, scratch->nnodes, &scratch->vertices, meshing); 1312 if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; } 1313 1314 #pragma omp critical 1315 { 1316 struct sln_vertex* dst = NULL; 1317 const struct sln_vertex* src = NULL; 1318 1319 /* Increase the vertex buffer to store the subtree polylines */ 1320 nverts_s = darray_vertex_size_get(&scratch->vertices); 1321 nverts_t = darray_vertex_size_get(&tree->vertices); 1322 res2 = darray_vertex_resize(&tree->vertices, nverts_t + nverts_s); 1323 if(res2 == RES_OK) { 1324 /* Copy the subtree nodes in the main vertex buffer */ 1325 src = darray_vertex_cdata_get(&scratch->vertices); 1326 dst = darray_vertex_data_get(&tree->vertices) + nverts_t; 1327 memcpy(dst, src, nverts_s*sizeof(*src)); 1328 } 1329 } 1330 if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; } 1331 1332 /* Update the index of the first vertex of the polyline for the nodes in the 1333 * subtree based on their new locations, i.e., the main vertex buffer */ 1334 NODE(subtrees[i])->ivertex += nverts_t; 1335 FOR_EACH(j, 0, scratch->nnodes-1/*The root has been just treated*/) { 1336 inode = subtrees[i] + NODE(subtrees[i])->offset + j; 1337 NODE(inode)->ivertex += nverts_t; 1338 } 1339 1340 /* Free the temporary vertex buffer */ 1341 darray_node_purge(&scratch->nodes); 1342 } 1343 1344 #undef NODE 1345 #undef IS_LEAF 1346 1347 exit: 1348 /* Clean up temporary data */ 1349 FOR_EACH(i, 0, (int)nsubtrees) { 1350 darray_node_release(&scratches[i].nodes); 1351 darray_vertex_release(&scratches[i].vertices); 1352 } 1353 MEM_RM(allocator, scratches); 1354 return (res_T)res; 1355 error: 1356 goto exit; 1357 } 1358 1359 static res_T 1360 tree_build_sequential(struct sln_tree* tree) 1361 { 1362 /* Progress statuses */ 1363 struct progress partitioning = PROGRESS_DEFAULT; 1364 struct progress meshing = PROGRESS_DEFAULT; 1365 1366 struct sln_node node = SLN_NODE_NULL; 1367 size_t nlines = 0; 1368 size_t nnodes = 0; 1369 size_t iroot = 0; 1370 res_T res = RES_OK; 1371 1372 SHTR(line_list_get_size(tree->args.lines, &nlines)); 1373 1374 iroot = darray_node_size_get(&tree->nodes); 1375 ASSERT(iroot == 0); /* assume that the tree contains no data */ 1376 1377 /* Setup the root node */ 1378 node.range[0] = 0; 1379 node.range[1] = nlines - 1/* inclusive bound */; 1380 res = darray_node_push_back(&tree->nodes, &node); 1381 if(res != RES_OK) goto error; 1382 1383 /* Partition lines */ 1384 partitioning.total = nlines; 1385 partitioning.msg = "Partitioning: "; 1386 progress_print(tree->sln, &partitioning); 1387 res = partition_lines_depth_first(tree, iroot, &tree->nodes, &partitioning); 1388 if(res != RES_OK) goto error; 1389 1390 /* Create polylines */ 1391 nnodes = darray_node_size_get(&tree->nodes); 1392 meshing.total = nnodes; 1393 meshing.msg = "Meshing: "; 1394 progress_print(tree->sln, &meshing); 1395 res = build_polylines(tree, iroot, nnodes, &tree->vertices, &meshing); 1396 if(res != RES_OK) goto error; 1397 1398 exit: 1399 return res; 1400 error: 1401 goto exit; 1402 } 1403 1404 static res_T 1405 tree_build_parallel(struct sln_tree* tree) 1406 { 1407 /* Stack data structure */ 1408 #define STACK_SIZE (SLN_TREE_DEPTH_MAX*(SLN_TREE_ARITY_MAX-1)) 1409 size_t stack[STACK_SIZE]; 1410 size_t istack = 0; 1411 1412 /* Subtrees */ 1413 size_t subtrees[QUEUE_SIZE_MAX]; /* List of sub-tree root indices */ 1414 unsigned nsubtrees = 0; 1415 1416 /* Progress message of the meshing step */ 1417 struct progress meshing = PROGRESS_DEFAULT; 1418 1419 /* Miscellaneous */ 1420 struct sln_node root = SLN_NODE_NULL; 1421 size_t nlines = 0; 1422 size_t nnodes_upper_tree = 0; /* #nodes from the root to the subtrees */ 1423 size_t iroot = 0; 1424 size_t inode = 0; 1425 size_t i = 0; 1426 res_T res = RES_OK; 1427 1428 ASSERT(tree); 1429 1430 iroot = darray_node_size_get(&tree->nodes); 1431 ASSERT(iroot == 0); /* assume that the tree contains no data */ 1432 1433 SHTR(line_list_get_size(tree->args.lines, &nlines)); 1434 1435 /* Setup the root node */ 1436 root.range[0] = 0; 1437 root.range[1] = nlines; 1438 res = darray_node_push_back(&tree->nodes, &root); 1439 if(res != RES_OK) goto error; 1440 1441 /* Partition the lines in bread-first fixing the maximum number of leafs to 1442 * the available number of threads */ 1443 res = partition_lines_breadth_first(tree, iroot, tree->args.nthreads_hint); 1444 if(res != RES_OK) goto error; 1445 1446 /* Push back SIZE_MAX which, once pop up, will mark the end of recursion */ 1447 stack[istack++] = SIZE_MAX; 1448 1449 /* Retrieve the leaf indices generated by bread-first partitioning. These 1450 * correspond to the roots of the subtree to be built */ 1451 inode = iroot; /* Root node */ 1452 while(inode != SIZE_MAX) { 1453 const struct sln_node* node = darray_node_cdata_get(&tree->nodes) + inode; 1454 ASSERT(inode < darray_node_size_get(&tree->nodes)); 1455 1456 if(sln_node_is_leaf(node)) { 1457 ASSERT(nsubtrees < QUEUE_SIZE_MAX); 1458 subtrees[nsubtrees++] = inode; 1459 inode = stack[--istack]; /* Pop the next node */ 1460 1461 } else { 1462 const size_t ichild0 = inode + node->offset; 1463 const unsigned nchildren = node_child_count(node, tree->args.arity); 1464 1465 /* Push the children except the 1st one */ 1466 ASSERT(istack + (nchildren-1/*ichild0*/) <= STACK_SIZE); 1467 FOR_EACH_REVERSE(i, nchildren-1, 0) stack[istack++] = ichild0 + i; 1468 1469 /* Recursively traverse the 1st child */ 1470 inode = ichild0; 1471 } 1472 } 1473 1474 /* Retrieves the number of registered nodes. This is the number of nodes from 1475 * the root to the subtrees, excluding the roots of those subtrees */ 1476 nnodes_upper_tree = darray_node_size_get(&tree->nodes); 1477 nnodes_upper_tree -= nsubtrees; /* Exclude the root node of the subtrees */ 1478 1479 res = build_subtrees(tree, subtrees, nsubtrees, &meshing); 1480 if(res != RES_OK) goto error; 1481 1482 /* Build the polylines of the upper level if necessary */ 1483 if(nnodes_upper_tree != 0) { 1484 res = build_polylines 1485 (tree, 0/*root*/, nnodes_upper_tree, &tree->vertices, &meshing); 1486 if(res != RES_OK) goto error; 1487 } 1488 1489 #undef STACK_SIZE 1490 1491 exit: 1492 return res; 1493 error: 1494 goto exit; 1495 } 1496 1497 /******************************************************************************* 1498 * Local functions 1499 ******************************************************************************/ 1500 res_T 1501 tree_build(struct sln_tree* tree) 1502 { 1503 res_T res = RES_OK; 1504 ASSERT(tree); 1505 1506 res = tree->args.nthreads_hint == 1 1507 ? tree_build_sequential(tree) 1508 : tree_build_parallel(tree); 1509 if(res != RES_OK) goto error; 1510 1511 exit: 1512 return res; 1513 error: 1514 darray_node_purge(&tree->nodes); 1515 darray_vertex_purge(&tree->vertices); 1516 goto exit; 1517 }