commit cb78d579a6197178a06e03db2f433daa0298ebfb
parent 45d2b6456f86b17923d4d3b5c0d2830d5fe52b67
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Fri, 25 May 2018 16:10:08 +0200
Implement the htrdr_solve_transmission_buffer function
Diffstat:
9 files changed, 159 insertions(+), 121 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -28,6 +28,7 @@ find_package(RSys 0.6 REQUIRED)
find_package(StarSP 0.7 REQUIRED)
find_package(SVX REQUIRED)
find_package(HTCP REQUIRED)
+find_package(OpenMP 1.2 REQUIRED)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${RCMAKE_SOURCE_DIR})
include(rcmake)
@@ -70,12 +71,15 @@ rcmake_prepend_path(HTRDR_FILES_DOC ${PROJECT_SOURCE_DIR}/../)
add_executable(htrdr ${HTRDR_FILES_SRC} ${HTRDR_FILES_INC})
target_link_libraries(htrdr HTCP RSys SVX StarSP)
+
if(CMAKE_COMPILER_IS_GNUCC)
target_link_libraries(htrdr m)
+ set_target_properties(htrdr PROPERTIES LINK_FLAGS ${OpenMP_C_FLAGS})
endif()
set_target_properties(htrdr PROPERTIES
DEFINE_SYMBOL HTRDR_SHARED_BUILD
+ COMPILE_FLAGS "${OpenMP_C_FLAGS}"
VERSION ${VERSION}
SOVERSION ${VERSION_MAJOR})
diff --git a/src/htrdr.c b/src/htrdr.c
@@ -22,6 +22,7 @@
#include "htrdr_rectangle.h"
#include "htrdr_solve.h"
+#include <rsys/clock_time.h>
#include <rsys/mem_allocator.h>
#include <star/svx.h>
@@ -33,6 +34,8 @@
#include <fcntl.h> /* open */
#include <sys/stat.h> /* S_IRUSR & S_IWUSR */
+#include <omp.h>
+
/*******************************************************************************
* Helper functions
******************************************************************************/
@@ -123,40 +126,41 @@ error:
}
static void
-release_htrdr(ref_T* ref)
+dump_buffer(struct htrdr* htrdr)
{
- struct htrdr* htrdr = CONTAINER_OF(ref, struct htrdr, ref);
+ struct htrdr_buffer_layout buf_layout;
+ size_t x, y;
ASSERT(htrdr);
- if(htrdr->svx) SVX(device_ref_put(htrdr->svx));
- if(htrdr->clouds) SVX(tree_ref_put(htrdr->clouds));
- if(htrdr->buf) htrdr_buffer_ref_put(htrdr->buf);
- if(htrdr->rect) htrdr_rectangle_ref_put(htrdr->rect);
- logger_release(&htrdr->logger);
- MEM_RM(htrdr->allocator, htrdr);
+
+ htrdr_buffer_get_layout(htrdr->buf, &buf_layout);
+
+ fprintf(htrdr->output, "P3 %lu %lu\n255\n",
+ (unsigned long)buf_layout.width,
+ (unsigned long)buf_layout.height);
+ FOR_EACH(y, 0, buf_layout.height) {
+ FOR_EACH(x, 0, buf_layout.width) {
+ if(*((double*)htrdr_buffer_at(htrdr->buf, x, y)) < 1.0) {
+ fprintf(htrdr->output, "0 0 0\n");
+ } else {
+ fprintf(htrdr->output, "255 255 255\n");
+ }
+ }
+ }
}
/*******************************************************************************
* Local functions
******************************************************************************/
res_T
-htrdr_create
+htrdr_init
(struct mem_allocator* mem_allocator,
const struct htrdr_args* args,
- struct htrdr** out_htrdr)
+ struct htrdr* htrdr)
{
- struct htrdr* htrdr = NULL;
- struct mem_allocator* allocator = NULL;
res_T res = RES_OK;
- ASSERT(args && out_htrdr);
+ ASSERT(args && htrdr);
- allocator = mem_allocator ? mem_allocator : &mem_default_allocator;
- htrdr = MEM_CALLOC(allocator, 1, sizeof(*htrdr));
- if(!htrdr) {
- fprintf(stderr, "Could not allocat the htrdr main structure.\n");
- goto error;
- }
- ref_init(&htrdr->ref);
- htrdr->allocator = allocator;
+ htrdr->allocator = mem_allocator ? mem_allocator : &mem_default_allocator;
logger_init(htrdr->allocator, &htrdr->logger);
logger_set_stream(&htrdr->logger, LOG_ERROR, print_err, NULL);
@@ -164,27 +168,15 @@ htrdr_create
htrdr->dump_vtk = args->dump_vtk;
htrdr->verbose = args->verbose;
+ htrdr->nthreads = MMIN(args->nthreads, (unsigned)omp_get_num_procs());
- /* Integration plane */
- res = htrdr_rectangle_create(htrdr, args->rectangle.pos, args->rectangle.tgt,
- args->rectangle.up, args->rectangle.sz, &htrdr->rect);
- if(res != RES_OK) goto error;
-
- res = svx_device_create(&htrdr->logger, allocator, args->verbose, &htrdr->svx);
+ res = svx_device_create
+ (&htrdr->logger, htrdr->allocator, args->verbose, &htrdr->svx);
if(res != RES_OK) {
htrdr_log_err(htrdr, "could not create the Star-VoXel device.\n");
goto error;
}
- if(!args->output) {
- htrdr->output = stdout;
- } else {
- res = open_output_stream(htrdr, args);
- if(res != RES_OK) goto error;
- }
- res = clouds_load(htrdr, args->verbose, args->input);
- if(res != RES_OK) goto error;
-
if(!args->dump_vtk) { /* Legacy mode */
const size_t elmtsz = sizeof(double);
const size_t pitch = elmtsz * args->image.definition[0];
@@ -204,32 +196,33 @@ htrdr_create
res = htrdr_rectangle_create(htrdr, args->rectangle.pos, args->rectangle.tgt,
args->rectangle.up, args->rectangle.sz, &htrdr->rect);
if(res != RES_OK) goto error;
+ }
+ if(!args->output) {
+ htrdr->output = stdout;
+ } else {
+ res = open_output_stream(htrdr, args);
+ if(res != RES_OK) goto error;
}
+ res = clouds_load(htrdr, args->verbose, args->input);
+ if(res != RES_OK) goto error;
exit:
- *out_htrdr = htrdr;
return res;
error:
- if(htrdr) {
- htrdr_ref_put(htrdr);
- htrdr = NULL;
- }
+ htrdr_release(htrdr);
goto exit;
}
void
-htrdr_ref_get(struct htrdr* htrdr)
-{
- ASSERT(htrdr);
- ref_get(&htrdr->ref);
-}
-
-void
-htrdr_ref_put(struct htrdr* htrdr)
+htrdr_release(struct htrdr* htrdr)
{
ASSERT(htrdr);
- ref_put(&htrdr->ref, release_htrdr);
+ if(htrdr->svx) SVX(device_ref_put(htrdr->svx));
+ if(htrdr->clouds) SVX(tree_ref_put(htrdr->clouds));
+ if(htrdr->buf) htrdr_buffer_ref_put(htrdr->buf);
+ if(htrdr->rect) htrdr_rectangle_ref_put(htrdr->rect);
+ logger_release(&htrdr->logger);
}
res_T
@@ -240,12 +233,17 @@ htrdr_run(struct htrdr* htrdr)
res = clouds_dump_vtk(htrdr, htrdr->output);
if(res != RES_OK) goto error;
} else {
- double pos[3] = {3.4,2.2,0};
- double dir[3] = {0,0,1};
- double val = 0;
- res = htrdr_solve_transmission(htrdr, pos, dir, &val);
+ struct time t0, t1;
+ char buf[128];
+
+ time_current(&t0);
+ res = htrdr_solve_transmission_buffer(htrdr);
if(res != RES_OK) goto error;
- printf(">>>> T = %g\n", val);
+ time_sub(&t0, time_current(&t1), &t0);
+ time_dump(&t0, TIME_ALL, NULL, buf, sizeof(buf));
+ fprintf(stderr, "Elapsed time: %s\n", buf);
+
+ dump_buffer(htrdr);
}
exit:
return res;
diff --git a/src/htrdr.h b/src/htrdr.h
@@ -38,26 +38,22 @@ struct htrdr {
FILE* output;
+ unsigned nthreads;
int dump_vtk;
int verbose;
struct logger logger;
struct mem_allocator* allocator;
- ref_T ref;
};
extern LOCAL_SYM res_T
-htrdr_create
+htrdr_init
(struct mem_allocator* allocator,
const struct htrdr_args* args,
- struct htrdr** htrdr);
+ struct htrdr* htrdr);
extern LOCAL_SYM void
-htrdr_ref_get
- (struct htrdr* htrdr);
-
-extern LOCAL_SYM void
-htrdr_ref_put
+htrdr_release
(struct htrdr* htrdr);
extern LOCAL_SYM res_T
diff --git a/src/htrdr_args.c b/src/htrdr_args.c
@@ -49,6 +49,9 @@ print_help(const char* cmd)
printf(
" -r <rectangle> define the integration plane.\n");
printf(
+" -t THREADS hint on the number of threads to use. By default use as\n"
+" many threads as CPU cores.\n");
+ printf(
" -v make the program more verbose.\n");
printf("\n");
printf(
@@ -171,7 +174,7 @@ parse_image_parameter(struct htrdr_args* args, const char* str)
} (void)0
if(!strcmp(key, "def")) {
PARSE("definition", parse_definition(val, args->image.definition));
- } else if(!strcmp(key, "ssp")) {
+ } else if(!strcmp(key, "spp")) {
PARSE("#samples per pixel", cstr_to_uint(val, &args->image.spp));
} else {
fprintf(stderr, "Invalid image parameter `%s'.\n", key);
@@ -191,7 +194,6 @@ parse_image_parameter(struct htrdr_args* args, const char* str)
goto error;
}
-
exit:
return res;
error:
@@ -268,7 +270,7 @@ htrdr_args_init(struct htrdr_args* args, int argc, char** argv)
*args = HTRDR_ARGS_DEFAULT;
- while((opt = getopt(argc, argv, "D:dfhI:i:o:r:v")) != -1) {
+ while((opt = getopt(argc, argv, "D:dfhI:i:o:r:t:v")) != -1) {
switch(opt) {
case 'D': res = parse_sun_dir(args, optarg); break;
case 'd': args->dump_vtk = 1; break;
@@ -288,16 +290,20 @@ htrdr_args_init(struct htrdr_args* args, int argc, char** argv)
res = parse_multiple_parameters
(args, optarg, parse_rectangle_parameter);
break;
+ case 't': /* Submit an hint on the number of threads to use */
+ res = cstr_to_uint(optarg, &args->nthreads);
+ if(res == RES_OK && !args->nthreads) res = RES_BAD_ARG;
+ break;
case 'v': args->verbose = 1; break;
default: res = RES_BAD_ARG; break;
}
- }
- if(res != RES_OK) {
- if(optarg) {
- fprintf(stderr, "%s: invalid option argumet '%s' -- '%c'\n",
- argv[0], optarg, opt);
+ if(res != RES_OK) {
+ if(optarg) {
+ fprintf(stderr, "%s: invalid option argument '%s' -- '%c'\n",
+ argv[0], optarg, opt);
+ }
+ goto error;
}
- goto error;
}
if(!args->input) {
fprintf(stderr, "Missing input file.\n");
diff --git a/src/htrdr_args.h b/src/htrdr_args.h
@@ -36,6 +36,7 @@ struct htrdr_args {
double main_dir[3];
+ unsigned nthreads; /* Hint on the number of threads to use */
int force_overwriting;
int dump_vtk; /* Dump the loaded cloud properties in a VTK file */
int verbose; /* Verbosity level */
@@ -55,6 +56,7 @@ struct htrdr_args {
1 /* #samples per pixel */ \
}, \
{0, 0, -1}, /* Main direction */ \
+ (unsigned)~0, /* #threads */ \
0, /* Force overwriting */ \
0, /* dump VTK */ \
0, /* Verbose flag */ \
diff --git a/src/htrdr_buffer.c b/src/htrdr_buffer.c
@@ -39,13 +39,10 @@ static void
buffer_release(ref_T* ref)
{
struct htrdr_buffer* buf = NULL;
- struct htrdr* htrdr = NULL;
ASSERT(ref);
buf = CONTAINER_OF(ref, struct htrdr_buffer, ref);
- htrdr = buf->htrdr;
- if(buf->mem) MEM_RM(htrdr->allocator, buf->mem);
- MEM_RM(htrdr->allocator, buf);
- htrdr_ref_put(htrdr);
+ if(buf->mem) MEM_RM(buf->htrdr->allocator, buf->mem);
+ MEM_RM(buf->htrdr->allocator, buf);
}
/*******************************************************************************
@@ -80,7 +77,7 @@ htrdr_buffer_create
res = RES_BAD_ARG;
goto error;
}
- if(elmtsz) {
+ if(!elmtsz) {
htrdr_log_err(htrdr,
"the size of the buffer's elements cannot be null.\n");
res = RES_BAD_ARG;
@@ -100,13 +97,13 @@ htrdr_buffer_create
goto error;
}
ref_init(&buf->ref);
- htrdr_ref_get(htrdr);
buf->htrdr = htrdr;
buf->width = width;
buf->height = height;
buf->pitch = pitch;
buf->elmtsz = elmtsz;
buf->align = align;
+ buf->htrdr = htrdr;
memsz = buf->pitch * buf->height * buf->elmtsz;
buf->mem = MEM_ALLOC_ALIGNED(htrdr->allocator, memsz, align);
diff --git a/src/htrdr_main.c b/src/htrdr_main.c
@@ -24,25 +24,26 @@
int
main(int argc, char** argv)
{
- struct htrdr* htrdr = NULL;
- struct htrdr_args args;
+ struct htrdr htrdr;
+ struct htrdr_args args = HTRDR_ARGS_DEFAULT;
size_t memsz = 0;
int err = 0;
+ int is_htrdr_init = 0;
res_T res = RES_OK;
res = htrdr_args_init(&args, argc, argv);
if(res != RES_OK) goto error;
if(args.quit) goto exit;
- res = htrdr_create(NULL, &args, &htrdr);
+ res = htrdr_init(NULL, &args, &htrdr);
if(res != RES_OK) goto error;
+ is_htrdr_init = 1;
- res = htrdr_run(htrdr);
+ res = htrdr_run(&htrdr);
if(res != RES_OK) goto error;
exit:
- if(htrdr) htrdr_ref_put(htrdr);
-
+ if(is_htrdr_init) htrdr_release(&htrdr);
htrdr_args_release(&args);
if((memsz = mem_allocated_size()) != 0) {
fprintf(stderr, "Memory leaks: %lu Bytes\n", (unsigned long)memsz);
diff --git a/src/htrdr_rectangle.c b/src/htrdr_rectangle.c
@@ -37,12 +37,9 @@ static void
rectangle_release(ref_T* ref)
{
struct htrdr_rectangle* rect;
- struct htrdr* htrdr;
ASSERT(ref);
rect = CONTAINER_OF(ref, struct htrdr_rectangle, ref);
- htrdr = rect->htrdr;
- MEM_RM(htrdr->allocator, rect);
- htrdr_ref_put(htrdr);
+ MEM_RM(rect->htrdr->allocator, rect);
}
/*******************************************************************************
@@ -69,10 +66,9 @@ htrdr_rectangle_create
goto error;
}
ref_init(&rect->ref);
- htrdr_ref_get(htrdr);
rect->htrdr = htrdr;
- if(sz[0] <= 0 || sz[1] <= 1) {
+ if(sz[0] <= 0 || sz[1] <= 0) {
htrdr_log_err(htrdr,
"invalid rectangle size `%g %g'. It must be strictly positive.\n",
SPLIT2(sz));
diff --git a/src/htrdr_transmission.c b/src/htrdr_transmission.c
@@ -22,6 +22,8 @@
#include <star/svx.h>
#include <rsys/math.h>
+#include <omp.h>
+
struct transmit_context {
double tau;
};
@@ -30,6 +32,17 @@ static const struct transmit_context TRANSMIT_CONTEXT_NULL = {0};
/*******************************************************************************
* Helper functions
******************************************************************************/
+static FINLINE uint16_t
+morton2D_decode(const uint32_t u32)
+{
+ uint32_t x = u32 & 0x55555555;
+ x = (x | (x >> 1)) & 0x33333333;
+ x = (x | (x >> 2)) & 0x0F0F0F0F;
+ x = (x | (x >> 4)) & 0x00FF00FF;
+ x = (x | (x >> 8)) & 0x0000FFFF;
+ return (uint16_t)x;
+}
+
static int
discard_hit
(const struct svx_hit* hit,
@@ -91,8 +104,9 @@ htrdr_solve_transmission_buffer(struct htrdr* htrdr)
struct htrdr_buffer_layout buf_layout;
struct ssp_rng* rng = NULL;
double pixsz[2];
- size_t ix, iy, ispp;
- res_T res = RES_OK;
+ int64_t mcode_max;
+ int64_t mcode;
+ ATOMIC res = RES_OK;
ASSERT(htrdr && htrdr->rect && htrdr->buf);
htrdr_buffer_get_layout(htrdr->buf, &buf_layout);
@@ -107,40 +121,64 @@ htrdr_solve_transmission_buffer(struct htrdr* htrdr)
pixsz[0] = 1.0 / (double)buf_layout.width;
pixsz[1] = 1.0 / (double)buf_layout.height;
- FOR_EACH(iy, 0, buf_layout.height) {
- const double y = (double)iy * pixsz[1]; /* Y in normalized image space */
-
- FOR_EACH(ix, 0, buf_layout.width) {
- const double x = (double)ix * pixsz[0]; /* X in normalized image space */
- double* val = htrdr_buffer_at(htrdr->buf, ix, iy);
- double accum = 0;
-
- FOR_EACH(ispp, 0, htrdr->spp) {
- double pixsamp[2]; /* Pixel sample */
- double pos[3]; /* World space position */
- double T;
-
- /* Uniformaly sample the pixel in the normalized image space */
- pixsamp[0] = x + ssp_rng_canonical(rng)*pixsz[0];
- pixsamp[1] = y + ssp_rng_canonical(rng)*pixsz[1];
-
- /* Compute the world space position of the sample according to the
- * image plane */
- htrdr_rectangle_sample_pos(htrdr->rect, pixsamp, pos);
-
- /* Solve the transmission for the sample position wrt the main dir */
- res = htrdr_solve_transmission(htrdr, pos, htrdr->main_dir, &T);
- if(res != RES_OK) goto error;
-
- accum += T;
+ /* Compute the maximum morton code */
+ mcode_max = (int64_t)round_up_pow2(MMAX(buf_layout.height, buf_layout.width));
+ mcode_max = mcode_max*mcode_max;
+
+ /* Setup the number of threads */
+ omp_set_num_threads((int)htrdr->nthreads);
+
+ #pragma omp parallel for schedule(dynamic, 32/*chunck size*/)
+ for(mcode=0; mcode<mcode_max; ++mcode) {
+ size_t ispp;
+ double* val;
+ double x, y;
+ double accum;
+ uint16_t ix, iy;
+ res_T res_local = RES_OK;
+
+ if(ATOMIC_GET(&res) != RES_OK) continue;
+
+ ix = morton2D_decode((uint32_t)(mcode>>0));
+ if(ix > buf_layout.width) continue;
+ iy = morton2D_decode((uint32_t)(mcode>>1));
+ if(iy > buf_layout.height) continue;
+
+ val = htrdr_buffer_at(htrdr->buf, ix, iy);
+
+ /* Define lower left the pixel coordinate in the normalized image space */
+ x = (double)ix*pixsz[0];
+ y = (double)iy*pixsz[1];
+
+ accum = 0;
+ FOR_EACH(ispp, 0, htrdr->spp) {
+ double pixsamp[2]; /* Pixel sample */
+ double pos[3]; /* World space position */
+ double T;
+
+ /* Uniformaly sample the pixel in the normalized image space */
+ pixsamp[0] = x + ssp_rng_canonical(rng)*pixsz[0];
+ pixsamp[1] = y + ssp_rng_canonical(rng)*pixsz[1];
+
+ /* Compute the world space position of the sample according to the
+ * image plane */
+ htrdr_rectangle_sample_pos(htrdr->rect, pixsamp, pos);
+
+ /* Solve the transmission for the sample position wrt the main dir */
+ res_local = htrdr_solve_transmission(htrdr, pos, htrdr->main_dir, &T);
+ if(res_local != RES_OK) {
+ ATOMIC_SET(&res, res_local);
+ continue;
}
- *val = accum / (double)htrdr->spp;
+
+ accum += T;
}
+ *val = accum / (double)htrdr->spp;
}
exit:
if(rng) SSP(rng_ref_put(rng));
- return res;
+ return (res_T)res;
error:
goto exit;
}