commit b7cfffc80eb3a2bd761f052be68f4abce343f027
parent 82dc8990419d2d5329110828b63e8a303b83b1ee
Author: Christophe Coustet <christophe.coustet@meso-star.com>
Date: Thu, 22 Feb 2018 09:51:41 +0100
First try with openmp; only efficient with 2 threads.
With a lot of printf.
Diffstat:
5 files changed, 312 insertions(+), 2 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -26,6 +26,7 @@ option(NO_TEST "Do not build tests" OFF)
find_package(RCMake 0.4 REQUIRED)
find_package(Star3D 0.5 REQUIRED)
find_package(RSys 0.6.1 REQUIRED)
+find_package(OpenMP 1.2 REQUIRED)
if(NOT NO_TEST)
find_package(StarSP 0.7 REQUIRED)
@@ -81,11 +82,12 @@ target_link_libraries(senc RSys Star3D)
set_target_properties(senc PROPERTIES
DEFINE_SYMBOL SENC_SHARED_BUILD
VERSION ${VERSION}
+ COMPILE_FLAGS ${OpenMP_C_FLAGS}
SOVERSION ${VERSION_MAJOR})
rcmake_copy_runtime_libraries(senc)
if(CMAKE_COMPILER_IS_GNUCC)
- set_target_properties(senc PROPERTIES LINK_FLAGS "-lm")
+ set_target_properties(senc PROPERTIES LINK_FLAGS "${OpenMP_C_FLAGS} -lm")
endif()
rcmake_setup_devel(senc StarEnc ${VERSION} senc_version.h)
diff --git a/src/senc_device.c b/src/senc_device.c
@@ -19,6 +19,8 @@
#include <rsys/logger.h>
#include <rsys/mem_allocator.h>
+#include <omp.h>
+
/*******************************************************************************
* Helper functions
******************************************************************************/
@@ -104,6 +106,8 @@ senc_device_create
dev->logger = log;
dev->allocator = allocator;
dev->verbose = verbose;
+ dev->nthreads = MMIN(nthreads_hint, (unsigned)omp_get_num_procs());
+ omp_set_num_threads((int)dev->nthreads);
ref_init(&dev->ref);
exit:
diff --git a/src/senc_device_c.h b/src/senc_device_c.h
@@ -31,6 +31,7 @@ struct senc_device {
struct logger* logger;
struct mem_allocator* allocator;
int verbose;
+ unsigned nthreads;
ref_T ref;
};
diff --git a/src/senc_scene_analyze.c b/src/senc_scene_analyze.c
@@ -29,6 +29,7 @@
#include <star/s3d.h>
+#include <omp.h>
#include <limits.h>
#include <stdlib.h>
@@ -714,6 +715,10 @@ scan_edges
}
}
+
+ printf("Collected %zu edges.\n", htable_edge_id_size_get(&edge_ids));
+
+
exit:
htable_edge_id_release(&edge_ids);
return res;
@@ -722,6 +727,205 @@ error:
}
static res_T
+collect_and_link_neighbours
+ (struct senc_scene* scn,
+ struct trgside* trgsides,
+ struct darray_triangle_tmp* triangles_tmp_array)
+{
+ trg_id_t t;
+ const struct triangle_in *triangles_in;
+ struct triangle_tmp *triangles_tmp;
+ edge_id_t approx_nbedges, e, edge_count;
+ const union double3* vertices;
+ size_t tmp;
+ const int num_thread = omp_get_num_threads();
+ const int rank = omp_get_thread_num();
+ struct darray_neighbourhood neighbourhood_by_edge;
+ /* Htable used to give an id to edges */
+ struct htable_edge_id edge_ids;
+ res_T res = RES_OK;
+
+ ASSERT(scn && trgsides && triangles_tmp_array);
+ ASSERT((size_t) scn->nuverts + (size_t) scn->nutris + 2 <= EDGE_MAX__);
+
+ darray_neighbourhood_init(scn->dev->allocator, &neighbourhood_by_edge);
+
+ /* Make some room for edges. */
+ approx_nbedges = (edge_id_t) ((scn->nuverts + scn->nutris) / num_thread);
+ htable_edge_id_init(scn->dev->allocator, &edge_ids);
+ OK(htable_edge_id_reserve(&edge_ids, approx_nbedges));
+ OK(darray_neighbourhood_reserve(&neighbourhood_by_edge, approx_nbedges));
+
+ /* Loop on triangles to register edges. */
+ triangles_in = darray_triangle_in_cdata_get(&scn->triangles_in);
+ triangles_tmp = darray_triangle_tmp_data_get(triangles_tmp_array);
+ ASSERT(scn->nutris == darray_triangle_tmp_size_get(triangles_tmp_array));
+ FOR_EACH(t, 0, scn->nutris) {
+ struct darray_neighbour* neighbour_list;
+ struct neighbour_info neighbour;
+ struct edge_neighbourhood neighbourhood;
+ char ee;
+ FOR_EACH(ee, 0, 3) {
+ edge_id_t* p_id;
+ edge_id_t id;
+ const vrtx_id_t v0 = triangles_in[t].vertice_id[ee];
+ const vrtx_id_t v1 = triangles_in[t].vertice_id[(ee + 1) % 3];
+ /* Process only "my" edges! */
+ const int h = v0 + v1 + MMIN(v0,v1); /* v0,v1 and v1,v0 must give the same hash!!! */
+ if (h % num_thread != rank) continue;
+ /* Create edge. */
+ set_edge(v0, v1, &neighbourhood.edge, &triangles_tmp[t].reversed_edge[ee]);
+ /* Find edge id; create it if not already done. */
+ p_id = htable_edge_id_find(&edge_ids, &neighbourhood.edge);
+ if (p_id) {
+ id = *p_id;
+ }
+ else {
+ /* Create id */
+ tmp = htable_edge_id_size_get(&edge_ids);
+ ASSERT(tmp <= EDGE_MAX__);
+ id = (edge_id_t)tmp;
+ OK(htable_edge_id_set(&edge_ids, &neighbourhood.edge, &id));
+ darray_neighbour_init(scn->dev->allocator, &neighbourhood.neighbours);
+ OK(darray_neighbourhood_push_back(&neighbourhood_by_edge,
+ &neighbourhood));
+ }
+ /* Add neighbour info to edge's neighbour list */
+ neighbour_list
+ = &darray_neighbourhood_data_get(&neighbourhood_by_edge)[id].neighbours;
+ neighbour.trg_id = t;
+ neighbour.edge_rank = ee;
+ darray_neighbour_push_back(neighbour_list, &neighbour);
+ }
+ }
+
+ /* Loop on collected edges.
+ * For each edge sort triangle sides by rotation angle
+ * and connect neighbours. */
+ vertices = darray_position_cdata_get(&scn->vertices);
+ tmp = darray_neighbourhood_size_get(&neighbourhood_by_edge);
+ ASSERT(tmp <= EDGE_MAX__);
+ edge_count = (edge_id_t)tmp;
+
+
+ printf("Thread %d/%d collected %zu edges.\n", rank, num_thread,
+ htable_edge_id_size_get(&edge_ids));
+
+
+ FOR_EACH(e, 0, edge_count) {
+ double edge[3], common_edge[3], basis[9], norm, mz, mz_edge;
+ vrtx_id_t v0, v1, v2;
+ struct edge_neighbourhood* neighbourhood
+ = darray_neighbourhood_data_get(&neighbourhood_by_edge) + e;
+ struct darray_neighbour* neighbour_list = &neighbourhood->neighbours;
+ side_id_t i, neighbour_count;
+ char mz_vid, mz_vid_edge;
+ tmp = darray_neighbour_size_get(neighbour_list);
+ ASSERT(tmp <= SIDE_MAX__);
+ neighbour_count = (side_id_t)tmp;
+ ASSERT(neighbour_count);
+ v0 = neighbourhood->edge.vrtx0;
+ v1 = neighbourhood->edge.vrtx1;
+ d3_sub(common_edge, vertices[v1].vec, vertices[v0].vec);
+ if (vertices[v0].pos.z > vertices[v1].pos.z) {
+ mz_edge = vertices[v0].pos.z;
+ mz_vid_edge = 0;
+ }
+ else {
+ mz_edge = vertices[v1].pos.z;
+ mz_vid_edge = 1;
+ }
+ norm = d3_normalize(common_edge, common_edge);
+ ASSERT(norm);
+ d33_basis(basis, common_edge);
+ d33_inverse(basis, basis);
+ FOR_EACH(i, 0, neighbour_count) {
+ struct neighbour_info* neighbour_info
+ = darray_neighbour_data_get(neighbour_list) + i;
+ const struct triangle_in *trg_in = triangles_in + neighbour_info->trg_id;
+ struct triangle_tmp *neighbour = triangles_tmp + neighbour_info->trg_id;
+ char actual_vid;
+ v2 = trg_in->vertice_id[(neighbour_info->edge_rank + 2) % 3];
+ if (vertices[v2].pos.z > mz_edge) {
+ mz = vertices[v2].pos.z;
+ mz_vid = 2;
+ }
+ else {
+ mz = mz_edge;
+ mz_vid = mz_vid_edge;
+ }
+ /* Compute the actual vertex id
+ * as vertices are not in the v0 v1 v2 order in the actual triangle */
+ if (mz_vid == 2) {
+ actual_vid = (2 + neighbour_info->edge_rank) % 3;
+ }
+ else {
+ int is_r = neighbour->reversed_edge[neighbour_info->edge_rank];
+ ASSERT(mz_vid == 0 || mz_vid == 1);
+ actual_vid = ((is_r ? 1 - mz_vid : mz_vid) + neighbour_info->edge_rank) % 3;
+ }
+ ASSERT(neighbour->max_z <= mz);
+ neighbour->max_z = mz;
+ ASSERT(0 <= actual_vid && actual_vid <= 2);
+ neighbour->max_z_vrtx_id = actual_vid;
+ /* Compute rotation angle around common edge */
+ d3_sub(edge, vertices[v2].vec, vertices[v0].vec);
+ d33_muld3(edge, basis, edge);
+ ASSERT(d3_len(edge) && (edge[0] || edge[1]));
+ neighbour_info->angle = atan2(edge[1], edge[0]);
+ if (neighbour_info->angle < 0) neighbour_info->angle += 2 * PI;
+ /* Due to catastrophic cancelation, -eps+2pi translates to 2pi */
+ ASSERT(0 <= neighbour_info->angle && neighbour_info->angle <= 2 * PI);
+ }
+ /* Sort triangles by rotation angle */
+ qsort(darray_neighbour_data_get(neighbour_list), neighbour_count,
+ sizeof(struct neighbour_info), neighbour_cmp);
+ /* Link sides.
+ * Create cycles of sides by neighbourhood around common edge. */
+ FOR_EACH(i, 0, neighbour_count) {
+ /* Neighbourhood info for current pair of triangles */
+ const struct neighbour_info* current
+ = darray_neighbour_cdata_get(neighbour_list) + i;
+ const struct neighbour_info* ccw_neighbour
+ = darray_neighbour_cdata_get(neighbour_list) + (i + 1) % neighbour_count;
+ /* Rank of the edge of interest in triangles */
+ const char crt_edge = current->edge_rank;
+ const char ccw_edge = ccw_neighbour->edge_rank;
+ /* User id of current triangles */
+ const trg_id_t crt_id = current->trg_id;
+ const trg_id_t ccw_id = ccw_neighbour->trg_id;
+ /* Facing sides of triangles */
+ const enum side_id crt_side
+ = triangles_tmp[crt_id].reversed_edge[crt_edge] ? SIDE_BACK : SIDE_FRONT;
+ const enum side_id ccw_side
+ = triangles_tmp[ccw_id].reversed_edge[ccw_edge] ? SIDE_FRONT : SIDE_BACK;
+ /* Index of sides in trgsides */
+ const side_id_t crt_side_idx = TRGIDxSIDE_2_TRGSIDE(crt_id, crt_side);
+ const side_id_t ccw_side_idx = TRGIDxSIDE_2_TRGSIDE(ccw_id, ccw_side);
+ /* Side ptrs */
+ struct trgside* const p_crt_side = trgsides + crt_side_idx;
+ struct trgside* const p_ccw_side = trgsides + ccw_side_idx;
+ /* Link sides */
+ p_crt_side->facing_side_id[crt_edge] = ccw_side_idx;
+ p_ccw_side->facing_side_id[ccw_edge] = crt_side_idx;
+ /* Record media */
+ p_crt_side->medium = triangles_in[crt_id].medium[crt_side];
+ p_ccw_side->medium = triangles_in[ccw_id].medium[ccw_side];
+ ASSERT(p_crt_side->medium < scn->nmeds);
+ ASSERT(p_ccw_side->medium < scn->nmeds);
+ p_crt_side->list_id = FLAG_LIST_SIDE_LIST;
+ p_ccw_side->list_id = FLAG_LIST_SIDE_LIST;
+ }
+ }
+exit:
+ darray_neighbourhood_release(&neighbourhood_by_edge);
+ htable_edge_id_release(&edge_ids);
+ return res;
+error:
+ goto exit;
+}
+
+static res_T
link_neighbours
(struct senc_scene* scn,
struct trgside* trgsides,
@@ -1002,6 +1206,8 @@ error:
goto exit;
}
+#include <rsys/clock_time.h>
+
/*******************************************************************************
* Exported functions
******************************************************************************/
@@ -1026,6 +1232,21 @@ senc_scene_analyze(struct senc_scene* scn, struct senc_descriptor** out_desc)
/* Array of triangle sides. */
struct trgside* trgsides = NULL;
+
+
+
+
+ struct time t0, t1, t0_;
+ char dump[64];
+
+ time_current(&t0);
+ t0_ = t0;
+
+
+
+
+
+
if(!scn || !out_desc) return RES_BAD_ARG;
desc = descriptor_create(scn);
@@ -1041,6 +1262,32 @@ senc_scene_analyze(struct senc_scene* scn, struct senc_descriptor** out_desc)
darray_neighbourhood_init(scn->dev->allocator, &neighbourhood_by_edge);
neighbourhood_by_edge_initialized = 1;
+
+
+#ifdef PARALLEL_EDGES
+
+ OK(darray_triangle_tmp_resize(&triangles_tmp, scn->nutris));
+ trgsides
+ = MEM_CALLOC(scn->dev->allocator, 2 * scn->nutris, sizeof(struct trgside));
+ if (!trgsides) {
+ res = RES_MEM_ERR;
+ goto error;
+ }
+#pragma omp parallel
+ {
+ collect_and_link_neighbours(scn, trgsides, &triangles_tmp);
+ }
+
+
+
+ time_sub(&t0, time_current(&t1), &t0);
+ time_dump(&t0, TIME_MSEC | TIME_SEC | TIME_MIN, NULL, dump, sizeof(dump));
+ printf("collect_and_link_neighbours: %s\n", dump);
+ time_current(&t0);
+
+#else
+
+
/* Step 1: list edges and connect triangles to edges */
res = scan_edges(scn, &neighbourhood_by_edge, &triangles_tmp);
if(res != RES_OK) {
@@ -1055,6 +1302,14 @@ senc_scene_analyze(struct senc_scene* scn, struct senc_descriptor** out_desc)
goto error;
}
+
+
+ time_sub(&t0, time_current(&t1), &t0);
+ time_dump(&t0, TIME_MSEC | TIME_SEC | TIME_MIN, NULL, dump, sizeof(dump));
+ printf("scan_edges: %s\n", dump);
+ time_current(&t0);
+
+
/* Step 2: link triangles by neighbourhood */
res = link_neighbours(scn, trgsides, &neighbourhood_by_edge,
&triangles_tmp);
@@ -1065,6 +1320,17 @@ senc_scene_analyze(struct senc_scene* scn, struct senc_descriptor** out_desc)
darray_neighbourhood_release(&neighbourhood_by_edge);
neighbourhood_by_edge_initialized = 0;
+
+
+
+ time_sub(&t0, time_current(&t1), &t0);
+ time_dump(&t0, TIME_MSEC | TIME_SEC | TIME_MIN, NULL, dump, sizeof(dump));
+ printf("link_neighbours: %s\n", dump);
+ time_current(&t0);
+
+
+#endif
+
darray_cc_descriptor_init(scn->dev->allocator, &connex_components);
connex_components_initialized = 1;
darray_triangle_comp_init(scn->dev->allocator, &triangles_comp);
@@ -1083,6 +1349,15 @@ senc_scene_analyze(struct senc_scene* scn, struct senc_descriptor** out_desc)
darray_triangle_tmp_release(&triangles_tmp);
triangles_tmp_initialized = 0;
+
+
+ time_sub(&t0, time_current(&t1), &t0);
+ time_dump(&t0, TIME_MSEC | TIME_SEC | TIME_MIN, NULL, dump, sizeof(dump));
+ printf("extract_connex_components: %s\n", dump);
+ time_current(&t0);
+
+
+
/* Step 4: group components */
res = group_connex_components(desc, trgsides, &triangles_comp,
&connex_components);
@@ -1095,6 +1370,15 @@ senc_scene_analyze(struct senc_scene* scn, struct senc_descriptor** out_desc)
darray_triangle_comp_release(&triangles_comp);
triangles_comp_initialized = 0;
+
+
+ time_sub(&t0, time_current(&t1), &t0);
+ time_dump(&t0, TIME_MSEC | TIME_SEC | TIME_MIN, NULL, dump, sizeof(dump));
+ printf("group_connex_components: %s\n", dump);
+ time_current(&t0);
+
+
+
/* Build result. */
res = build_result(desc, &connex_components);
if(res != RES_OK) {
@@ -1102,6 +1386,15 @@ senc_scene_analyze(struct senc_scene* scn, struct senc_descriptor** out_desc)
goto error;
}
+
+
+ time_sub(&t0, time_current(&t1), &t0);
+ time_dump(&t0, TIME_MSEC | TIME_SEC | TIME_MIN, NULL, dump, sizeof(dump));
+ printf("build_result: %s\n", dump);
+ time_current(&t0);
+
+
+
exit:
if(connex_components_initialized)
darray_cc_descriptor_release(&connex_components);
@@ -1111,6 +1404,16 @@ exit:
if(triangles_comp_initialized) darray_triangle_comp_release(&triangles_comp);
if(trgsides) MEM_RM(scn->dev->allocator, trgsides);
if(desc) *out_desc = desc;
+
+
+
+ time_sub(&t0_, time_current(&t1), &t0_);
+ time_dump(&t0_, TIME_MSEC | TIME_SEC | TIME_MIN, NULL, dump, sizeof(dump));
+ printf("Total analyse: %s\n", dump);
+ time_current(&t0);
+
+
+
return res;
error:
if(desc) SENC(descriptor_ref_put(desc));
diff --git a/src/test_senc_many_triangles.c b/src/test_senc_many_triangles.c
@@ -83,7 +83,7 @@ main(int argc, char** argv)
CHK(mem_init_proxy_allocator(&allocator, &mem_default_allocator) == RES_OK);
CHK(senc_device_create
- (NULL, &allocator, SENC_NTHREADS_DEFAULT, 1, &dev) == RES_OK);
+ (NULL, &allocator, 2, 1, &dev) == RES_OK);
/* Create the scene */
CHK(senc_scene_create(dev, 2, &scn) == RES_OK);