commit dfa5deae6526586f0a3b4687fe0c1da79a869907
parent fd528771c888a8fc1ba4053eb9edc9de7eaf387d
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Thu, 30 Apr 2026 12:02:16 +0200
Addition of parallel tree construction
In reality, most of the work was completed in previous commits. This
commit simply enables parallel execution of the loops used to partition
the lines and construct the sub-tree polylines.
By default, parallel construction is enabled using as many threads as
there are available cores on the machine.
Note that while the parallel construction algorithm works well with a
single thread, explicitly using a single thread switches to the
sequential construction algorithm used previously. This is because not
only does this algorithm does not have the overhead (in this case
unnecessary) of the parallel algorithm, but retaining it provides a
simpler algorithm as a reference. Its maintenance should also be
minimal, as it relies on exactly the same functions as those used by the
parallel algorithm.
However, if the caller does not explicitly disable parallelism, but the
input data does not allow for parallel tree construction, the parallel
algorithm is indeed executed. This is because determining whether
parallel processing is possible is precisely the responsibility of this
algorithm. There is therefore already a computational overhead involved
in detecting that no parallelism is possible, and it therefore seems
rather futile to attempt to reduce it by switching to the sequential
algorithm midway through. This is all the more true given that this
situation will almost certainly be an exception, considering the number
of lines actually to be processed.
Finally, the sln-build utility has been updated to add an option for
controlling the number of threads to use. Its man page has been updated
accordingly.
Diffstat:
5 files changed, 76 insertions(+), 31 deletions(-)
diff --git a/doc/sln-build.1 b/doc/sln-build.1
@@ -15,7 +15,7 @@
.\"
.\" You should have received a copy of the GNU General Public License
.\" along with this program. If not, see <http://www.gnu.org/licenses/>.
-.Dd April 17, 2026
+.Dd April 30, 2026
.Dt SLN-BUILD 1
.Os
.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
@@ -31,9 +31,10 @@
.Op Fl L Ar leaf_nlines
.Op Fl l Ar line_profile
.Op Fl o Ar accel_struct
+.Op Fl t Ar thread_count
+.Fl m Ar molparams
.Fl P Ar pressure
.Fl T Ar temperature
-.Fl m Ar molparams
.Fl x Ar mixture
.Op Ar lines
.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
@@ -113,6 +114,9 @@ Currently,
.Cm voigt
is the only supported profile and therefore the default value.
.\""""""""""""""""""""""""""""""""""
+.It Fl m Ar molparams
+Isotopologue metadata in HITRAN format.
+.\""""""""""""""""""""""""""""""""""
.It Fl o Ar accel_struct
Output file.
If not defined, the acceleration structure is written to standard output.
@@ -131,8 +135,11 @@ This format is more compact, allowing for faster loading of line data.
.It Fl T Ar temperature
Temperature of the gaz mixture, in Kelvin.
.\""""""""""""""""""""""""""""""""""
-.It Fl m Ar molparams
-Isotopologue metadata in HITRAN format.
+.It Fl t Ar thread_count
+Advice on the number of threads to use.
+By default,
+.Nm
+uses as many threads as processor cores.
.\""""""""""""""""""""""""""""""""""
.It Fl v
Make
diff --git a/src/sln_build.c b/src/sln_build.c
@@ -58,6 +58,7 @@ struct args {
int verbose;
unsigned arity; /* tree arity */
unsigned leaf_nlines; /* Maximum number of lines per leaf */
+ unsigned nthreads_hint; /* Advice on the number of threads to use */
enum line_list_format line_format;
};
#define ARGS_DEFAULT__ { \
@@ -84,6 +85,7 @@ struct args {
0, /* Verbose */ \
2, /* Tree arity */ \
1, /* Number of lines per leaf */ \
+ (unsigned)(-1), /* #threads hint */ \
LINE_LIST_HITRAN /* lines_format */ \
}
static const struct args ARGS_DEFAULT = ARGS_DEFAULT__;
@@ -111,7 +113,8 @@ usage(FILE* stream)
fprintf(stream,
"usage: sln-build [-chsv] [-a arity] [-e polyline_opt[:polyline_opt ...]]\n"
" [-L leaf_nlines] [-l line_profile] [-o accel_struct]\n"
-" -P pressure -T temperature -m molparams -x mixture [lines]\n");
+" [-t thread_count] -P pressure -T temperature -m molparms\n"
+" -x mixture [lines]\n");
}
static res_T
@@ -229,7 +232,7 @@ args_init(struct args* args, int argc, char** argv)
*args = ARGS_DEFAULT;
- while((opt = getopt(argc, argv, "a:ce:hL:l:o:P:sT:m:vx:")) != -1) {
+ while((opt = getopt(argc, argv, "a:ce:hL:l:m:o:P:sT:t:vx:")) != -1) {
switch(opt) {
case 'a':
res = cstr_to_uint(optarg, &args->arity);
@@ -260,6 +263,10 @@ args_init(struct args* args, int argc, char** argv)
res = cstr_to_double(optarg, &args->temperature);
if(res == RES_OK && args->temperature < 0) res = RES_BAD_ARG;
break;
+ case 't':
+ res = cstr_to_uint(optarg, &args->nthreads_hint);
+ if(res == RES_OK && args->nthreads_hint < 1) res = RES_BAD_ARG;
+ break;
case 'm': args->molparams = optarg; break;
case 'v': args->verbose += (args->verbose < 3); break;
case 'x': args->mixture = optarg; break;
@@ -460,6 +467,7 @@ cmd_run(struct cmd* cmd)
tree_args.arity = cmd->args.arity;
tree_args.leaf_nlines = cmd->args.leaf_nlines;
tree_args.collapse_polylines = cmd->args.collapse_polylines;
+ tree_args.nthreads_hint = cmd->args.nthreads_hint;
if(cmd->args.output) {
write_args.filename = cmd->args.output;
diff --git a/src/sln_tree.c b/src/sln_tree.c
@@ -453,11 +453,7 @@ sln_tree_create
nthreads_max = (unsigned)MMAX(omp_get_max_threads(), omp_get_num_procs());
tree->args.nthreads_hint = MMIN(tree->args.nthreads_hint, nthreads_max);
-#if 1
res = tree_build(tree);
-#else
- res = tree_build2(tree);
-#endif
if(res != RES_OK) goto error;
exit:
diff --git a/src/sln_tree_build.c b/src/sln_tree_build.c
@@ -26,6 +26,8 @@
#include <rsys/cstr.h>
#include <rsys/math.h>
+#include <omp.h>
+
/* Structure used to track the progress of a construction phase */
struct progress {
/* Absolute progression */
@@ -1162,7 +1164,7 @@ static res_T
build_subtrees
(struct sln_tree* tree,
const size_t subtrees[],
- size_t nsubtrees,
+ unsigned nsubtrees,
struct progress* meshing)
{
/* Subtree temporary data */
@@ -1176,11 +1178,13 @@ build_subtrees
struct progress partitioning = PROGRESS_DEFAULT;
struct mem_allocator* allocator = NULL;
size_t nlines = 0;
- size_t i = 0;
+ unsigned nthreads = 0;
+ int i = 0;
ATOMIC res = RES_OK;
/* Pre-conditions */
ASSERT(tree && subtrees && nsubtrees >= 1);
+ ASSERT(nsubtrees < INT_MAX);
#define NODE(Id) (darray_node_data_get(&tree->nodes) + (Id))
@@ -1189,12 +1193,11 @@ build_subtrees
/* Allocate the per subtree scratch */
scratches = MEM_CALLOC(allocator, nsubtrees, sizeof(*scratches));
if(!scratches) { res = RES_MEM_ERR; goto error; }
- FOR_EACH(i, 0, nsubtrees) {
+ FOR_EACH(i, 0, (int)nsubtrees) {
darray_node_init(allocator, &scratches[i].nodes);
darray_vertex_init(allocator, &scratches[i].vertices);
}
-
/* Setup the progress bar and print that although nothing has been done yet,
* the calculation is nevertheless in progress */
SHTR(line_list_get_size(tree->args.lines, &nlines));
@@ -1202,8 +1205,23 @@ build_subtrees
partitioning.msg = "Partitioning: ";
progress_print(tree->sln, &partitioning);
- /* This loop should be parallelized, with a block size of 1 */
- for(i=0; i < nsubtrees; ++i) {
+ /* Set the number of threads to use. The advice on the number of threads must
+ * not exceed the number of threads available on the machine (see the
+ * sln_tree_create function); the caller can only specify a lower number of
+ * threads. urthermore, the number of subtrees is calculated so that each subtree
+ * corresponds to at least one thread, ensuring that there are no more
+ * subtrees than available threads.
+ *
+ * The actual number of threads to be used for constructing the subtrees in
+ * parallel should therefore always be set to the number of subtrees. However,
+ * to provide greater flexibility in the distribution of threads or the number
+ * of subtrees, the number of threads is calculated below as the minimum
+ * between the number of available threads and the number of subtrees */
+ nthreads = MMIN(tree->args.nthreads_hint, nsubtrees);
+ omp_set_num_threads((int)nthreads);
+
+ #pragma omp parallel for
+ for(i=0; i < (int)nsubtrees; ++i) {
struct sln_node root = SLN_NODE_NULL;
struct scratch* scratch = scratches + i;
size_t nnodes_s = 0;
@@ -1260,8 +1278,8 @@ build_subtrees
meshing->msg = "Meshing: ";
progress_print(tree->sln, meshing);
- /* This loop should be parallelized, with a block size of 1 */
- for(i=0; i < nsubtrees; ++i) {
+ #pragma omp parallel for
+ for(i=0; i < (int)nsubtrees; ++i) {
struct scratch* scratch = scratches + i;
size_t inode = subtrees[i];
size_t nverts_s = 0;
@@ -1307,10 +1325,11 @@ build_subtrees
}
#undef NODE
+ #undef IS_LEAF
exit:
/* Clean up temporary data */
- FOR_EACH(i, 0, nsubtrees) {
+ FOR_EACH(i, 0, (int)nsubtrees) {
darray_node_release(&scratches[i].nodes);
darray_vertex_release(&scratches[i].vertices);
}
@@ -1320,11 +1339,8 @@ error:
goto exit;
}
-/*******************************************************************************
- * Local functions
- ******************************************************************************/
-res_T
-tree_build(struct sln_tree* tree)
+static res_T
+tree_build_sequential(struct sln_tree* tree)
{
/* Progress statuses */
struct progress partitioning = PROGRESS_DEFAULT;
@@ -1368,8 +1384,8 @@ error:
goto exit;
}
-res_T
-tree_build2(struct sln_tree* tree)
+static res_T
+tree_build_parallel(struct sln_tree* tree)
{
/* Stack data structure */
#define STACK_SIZE (SLN_TREE_DEPTH_MAX*(SLN_TREE_ARITY_MAX-1))
@@ -1378,7 +1394,7 @@ tree_build2(struct sln_tree* tree)
/* Subtrees */
size_t subtrees[QUEUE_SIZE_MAX]; /* List of sub-tree root indices */
- size_t nsubtrees = 0;
+ unsigned nsubtrees = 0;
/* Progress message of the meshing step */
struct progress meshing = PROGRESS_DEFAULT;
@@ -1460,3 +1476,25 @@ exit:
error:
goto exit;
}
+
+/*******************************************************************************
+ * Local functions
+ ******************************************************************************/
+res_T
+tree_build(struct sln_tree* tree)
+{
+ res_T res = RES_OK;
+ ASSERT(tree);
+
+ res = tree->args.nthreads_hint == 1
+ ? tree_build_sequential(tree)
+ : tree_build_parallel(tree);
+ if(res != RES_OK) goto error;
+
+exit:
+ return res;
+error:
+ darray_node_purge(&tree->nodes);
+ darray_vertex_purge(&tree->vertices);
+ goto exit;
+}
diff --git a/src/sln_tree_c.h b/src/sln_tree_c.h
@@ -70,10 +70,6 @@ extern LOCAL_SYM res_T
tree_build
(struct sln_tree* tree);
-extern LOCAL_SYM res_T
-tree_build2
- (struct sln_tree* tree);
-
/* Assume that the node is an internal node */
extern LOCAL_SYM unsigned
node_child_count