commit 56fa5639dc8b223bf8b25a561ab6174f961bb504
parent b9aba36680492ebc8f20769da4f6f90d3c63619f
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Tue, 17 Mar 2020 10:36:41 +0100
Plug the long wave realisation in the draw function
Diffstat:
9 files changed, 1047 insertions(+), 922 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -93,7 +93,7 @@ set(HTRDR_FILES_SRC
htrdr_camera.c
htrdr_compute_radiance_sw.c
htrdr_compute_radiance_lw.c
- htrdr_draw_radiance_sw.c
+ htrdr_draw_radiance.c
htrdr_grid.c
htrdr_ground.c
htrdr_interface.c
diff --git a/src/htrdr.c b/src/htrdr.c
@@ -357,6 +357,74 @@ error:
goto exit;
}
+static res_T
+setup_lw_cdf(struct htrdr* htrdr)
+{
+ /* Reference temperature used to compute the Long Wave cumulative */
+ const double Tref = 290; /* In Kelvin */
+ double* cdf = NULL;
+ double* pdf = NULL;
+ double sum = 0;
+ size_t i;
+ size_t nbands;
+ res_T res = RES_OK;
+ ASSERT(htrdr && htsky_is_long_wave(htrdr->sky));
+
+ nbands = htsky_get_spectral_bands_count(htrdr->sky);
+ res = darray_double_resize(&htrdr->lw_cdf, nbands);
+ if(res != RES_OK) {
+ htrdr_log_err(htrdr,
+ "error allocating the CDF of the long wave spectral bands -- %s.\n",
+ res_to_cstr(res));
+ goto error;
+ }
+
+ /* Alias the same array by the cdf and the pdf variable to make easier the
+ * reading of the code */
+ pdf = darray_double_data_get(&htrdr->lw_cdf);
+ cdf = darray_double_data_get(&htrdr->lw_cdf);
+
+ /* Compute the *unormalized* probability to sample a long wave band */
+ sum = 0;
+ FOR_EACH(i, 0, nbands) {
+ const size_t iband = htsky_get_spectral_band_id(htrdr->sky, i);
+ double wlens[2];
+ HTSKY(get_spectral_band_bounds(htrdr->sky, iband, wlens));
+
+ /* Convert from nanometer to meter */
+ wlens[0] = wlens[0] * 1.e9;
+ wlens[1] = wlens[1] * 1.e9;
+
+ /* Compute the probability of the current band */
+ pdf[i] = planck(wlens[0], wlens[1], Tref);
+
+ /* Update the norm */
+ sum += pdf[i];
+ }
+
+ /* Compute the cumulative of the previously computed probabilities */
+ FOR_EACH(i, 0, nbands) {
+ /* Normalize the probability */
+ pdf[i] /= sum;
+
+ /* Setup the cumulative */
+ if(i == 0) {
+ cdf[i] = pdf[i];
+ } else {
+ cdf[i] = pdf[i] + cdf[i-1];
+ ASSERT(cdf[i] >= cdf[i-1]);
+ }
+ }
+
+ cdf[nbands - 1] = 1.0;
+
+exit:
+ return res;
+error:
+ darray_double_clear(&htrdr->lw_cdf);
+ goto exit;
+}
+
/*******************************************************************************
* Local functions
******************************************************************************/
@@ -386,6 +454,8 @@ htrdr_init
str_init(htrdr->allocator, &htrdr->output_name);
+ darray_double_init(htrdr->allocator, &htrdr->lw_cdf);
+
nthreads_max = MMAX(omp_get_max_threads(), omp_get_num_procs());
htrdr->dump_vtk = args->dump_vtk;
htrdr->verbose = args->verbose;
@@ -482,6 +552,12 @@ htrdr_init
res = htsky_create(&htrdr->logger, htrdr->allocator, &htsky_args, &htrdr->sky);
if(res != RES_OK) goto error;
+ if(htsky_is_long_wave(htrdr->sky)) {
+ /* Define the CDF used to sample a long wave band */
+ res = setup_lw_cdf(htrdr);
+ if(res != RES_OK) goto error;
+ }
+
htrdr->lifo_allocators = MEM_CALLOC
(htrdr->allocator, htrdr->nthreads, sizeof(*htrdr->lifo_allocators));
if(!htrdr->lifo_allocators) {
@@ -530,6 +606,7 @@ htrdr_release(struct htrdr* htrdr)
MEM_RM(htrdr->allocator, htrdr->lifo_allocators);
}
str_release(&htrdr->output_name);
+ darray_double_release(&htrdr->lw_cdf);
logger_release(&htrdr->logger);
}
@@ -556,7 +633,7 @@ htrdr_run(struct htrdr* htrdr)
}
}
} else {
- res = htrdr_draw_radiance_sw(htrdr, htrdr->cam, htrdr->width,
+ res = htrdr_draw_radiance(htrdr, htrdr->cam, htrdr->width,
htrdr->height, htrdr->spp, htrdr->buf);
if(res != RES_OK) goto error;
if(htrdr->mpi_rank == 0) {
diff --git a/src/htrdr.h b/src/htrdr.h
@@ -17,6 +17,7 @@
#ifndef HTRDR_H
#define HTRDR_H
+#include <rsys/dynamic_array_double.h>
#include <rsys/logger.h>
#include <rsys/ref_count.h>
#include <rsys/str.h>
@@ -54,6 +55,8 @@ struct htrdr {
struct htsky* sky;
+ struct darray_double lw_cdf; /* CDF to sample a Long Waves band */
+
size_t spp; /* #samples per pixel */
size_t width; /* Image width */
size_t height; /* Image height */
diff --git a/src/htrdr_c.h b/src/htrdr_c.h
@@ -103,6 +103,65 @@ morton_xyz_decode_u21(const uint64_t code, uint32_t xyz[3])
xyz[2] = (uint32_t)morton3D_decode_u21(code >> 0);
}
+static INLINE double
+wiebelt(const double v)
+{
+ int m;
+ double w, v2, v4;
+ /*.153989717364e+00;*/
+ const double fifteen_over_pi_power_4 = 15.0/(PI*PI*PI*PI);
+ const double z0 = 1.0/3.0;
+ const double z1 = 1.0/8.0;
+ const double z2 = 1.0/60.0;
+ const double z4 = 1.0/5040.0;
+ const double z6 = 1.0/272160.0;
+ const double z8 = 1.0/13305600.0;
+
+ if(v >= 2.) {
+ w = 0;
+ for(m=1; m<6 ;m++)
+ w+=exp(-m*v)/(m*m*m*m) * (((m*v+3)*m*v+6)*m*v+6);
+ w = w * fifteen_over_pi_power_4;
+ } else {
+ v2 = v*v;
+ v4 = v2*v2;
+ w = z0 - z1*v + z2*v2 - z4*v2*v2 + z6*v4*v2 - z8*v4*v4;
+ w = 1. - fifteen_over_pi_power_4*v2*v*w;
+ }
+ ASSERT(w >= 0.0 && w <= 1.0);
+ return w;
+}
+
+static INLINE double
+blackbody_fraction
+ (const double lambda0, /* In meter */
+ const double lambda1, /* In meter */
+ const double temperature) /* In Kelvin */
+{
+ const double C2 = 1.43877735e-2; /* m.K */
+ double x0 = C2 / lambda0;
+ double x1 = C2 / lambda1;
+ double v0 = x0 / temperature;
+ double v1 = x1 / temperature;
+ double w0 = wiebelt(v0);
+ double w1 = wiebelt(v1);
+ return w1 - w0;
+}
+
+static INLINE double
+planck
+ (const double lambda_min, /* In meter */
+ const double lambda_max, /* In meter */
+ const double temperature) /* In Kelvin */
+{
+ const double T2 = temperature*temperature;
+ const double T4 = T2*T2;
+ const double BOLTZMANN_CONSTANT = 5.6696e-8; /* W/m^2/K^4 */
+ ASSERT(lambda_min < lambda_max && temperature >= 0);
+ return blackbody_fraction(lambda_min, lambda_max, temperature)
+ * BOLTZMANN_CONSTANT * T4;
+}
+
extern LOCAL_SYM res_T
open_output_stream
(struct htrdr* htrdr,
@@ -146,5 +205,13 @@ update_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_message progress)
print_mpi_progress(htrdr, progress);
}
+static FINLINE int
+cmp_dbl(const void* a, const void* b)
+{
+ const double d0 = *((const double*)a);
+ const double d1 = *((const double*)b);
+ return d0 < d1 ? -1 : (d0 > d1 ? 1 : 0);
+}
+
#endif /* HTRDR_C_H */
diff --git a/src/htrdr_compute_radiance_lw.c b/src/htrdr_compute_radiance_lw.c
@@ -30,8 +30,6 @@
#include <rsys/double2.h>
#include <rsys/double3.h>
-#define BOLTZMANN_CONSTANT 5.6696e-8 /* W/m^2/K^4 */
-
enum event {
EVENT_ABSORPTION,
EVENT_SCATTERING,
@@ -56,64 +54,6 @@ static const struct filter_context FILTER_CONTEXT_NULL = {
/*******************************************************************************
* Helper functions
******************************************************************************/
-static FINLINE double
-wiebelt(const double v)
-{
- int m;
- double w, v2, v4;
- /*.153989717364e+00;*/
- const double fifteen_over_pi_power_4 = 15.0/(PI*PI*PI*PI);
- const double z0 = 1.0/3.0;
- const double z1 = 1.0/8.0;
- const double z2 = 1.0/60.0;
- const double z4 = 1.0/5040.0;
- const double z6 = 1.0/272160.0;
- const double z8 = 1.0/13305600.0;
-
- if(v >= 2.) {
- w = 0;
- for(m=1; m<6 ;m++)
- w+=exp(-m*v)/(m*m*m*m) * (((m*v+3)*m*v+6)*m*v+6);
- w = w * fifteen_over_pi_power_4;
- } else {
- v2 = v*v;
- v4 = v2*v2;
- w = z0 - z1*v + z2*v2 - z4*v2*v2 + z6*v4*v2 - z8*v4*v4;
- w = 1. - fifteen_over_pi_power_4*v2*v*w;
- }
- ASSERT(w >= 0.0 && w <= 1.0);
- return w;
-}
-
-static FINLINE double
-blackbody_fraction
- (const double lambda0, /* In meter */
- const double lambda1, /* In meter */
- const double temperature) /* In Kelvin */
-{
- const double C2 = 1.43877735e-2; /* m.K */
- double x0 = C2 / lambda0;
- double x1 = C2 / lambda1;
- double v0 = x0 / temperature;
- double v1 = x1 / temperature;
- double w0 = wiebelt(v0);
- double w1 = wiebelt(v1);
- return w1 - w0;
-}
-
-static FINLINE double
-planck
- (const double lambda_min, /* In meter */
- const double lambda_max, /* In meter */
- const double temperature) /* In Kelvin */
-{
- const double T2 = temperature*temperature;
- const double T4 = T2*T2;
- ASSERT(lambda_min < lambda_max && temperature >= 0);
- return blackbody_fraction(lambda_min, lambda_max, temperature)
- * BOLTZMANN_CONSTANT * T4;
-}
-
static int
hit_filter
(const struct svx_hit* hit,
diff --git a/src/htrdr_draw_radiance.c b/src/htrdr_draw_radiance.c
@@ -0,0 +1,897 @@
+/* Copyright (C) 2018, 2019, 2020 |Meso|Star> (contact@meso-star.com)
+ * Copyright (C) 2018, 2019 CNRS, Université Paul Sabatier
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#define _POSIX_C_SOURCE 200112L /* nanosleep && nextafter */
+
+#include "htrdr.h"
+#include "htrdr_c.h"
+#include "htrdr_buffer.h"
+#include "htrdr_camera.h"
+#include "htrdr_solve.h"
+
+#include <high_tune/htsky.h>
+
+#include <rsys/algorithm.h>
+#include <rsys/clock_time.h>
+#include <rsys/cstr.h>
+#include <rsys/dynamic_array_u32.h>
+#include <rsys/math.h>
+#include <rsys/mutex.h>
+#include <star/ssp.h>
+
+#include <omp.h>
+#include <mpi.h>
+#include <time.h>
+#include <unistd.h>
+
+#define RNG_SEQUENCE_SIZE 10000
+
+#define TILE_MCODE_NULL UINT32_MAX
+#define TILE_SIZE 32 /* Definition in X & Y of a tile */
+STATIC_ASSERT(IS_POW2(TILE_SIZE), TILE_SIZE_must_be_a_power_of_2);
+
+/* Tile of row ordered image pixels */
+struct tile {
+ struct list_node node;
+ struct mem_allocator* allocator;
+ ref_T ref;
+
+ struct tile_data {
+ uint16_t x, y; /* 2D coordinates of the tile in tile space */
+ /* Simulate the flexible array member of the C99 standard. */
+ struct htrdr_accum accums[1/*dummy element*/];
+ } data;
+};
+
+/* List of tile to compute onto the MPI process. */
+struct proc_work {
+ struct mutex* mutex;
+ struct darray_u32 tiles; /* #tiles to render */
+ size_t itile; /* Next tile to render in the above list of tiles */
+};
+
+/*******************************************************************************
+ * Helper functions
+ ******************************************************************************/
+static FINLINE uint16_t
+morton2D_decode(const uint32_t u32)
+{
+ uint32_t x = u32 & 0x55555555;
+ x = (x | (x >> 1)) & 0x33333333;
+ x = (x | (x >> 2)) & 0x0F0F0F0F;
+ x = (x | (x >> 4)) & 0x00FF00FF;
+ x = (x | (x >> 8)) & 0x0000FFFF;
+ return (uint16_t)x;
+}
+
+static FINLINE uint32_t
+morton2D_encode(const uint16_t u16)
+{
+ uint32_t u32 = u16;
+ u32 = (u32 | (u32 << 8)) & 0x00FF00FF;
+ u32 = (u32 | (u32 << 4)) & 0X0F0F0F0F;
+ u32 = (u32 | (u32 << 2)) & 0x33333333;
+ u32 = (u32 | (u32 << 1)) & 0x55555555;
+ return u32;
+}
+
+static FINLINE struct tile*
+tile_create(struct mem_allocator* allocator)
+{
+ struct tile* tile;
+ const size_t tile_sz =
+ sizeof(struct tile) - sizeof(struct htrdr_accum)/*rm dummy accum*/;
+ const size_t buf_sz = /* Flexiblbe array element */
+ TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]);
+ ASSERT(allocator);
+
+ tile = MEM_ALLOC(allocator, tile_sz+buf_sz);
+ if(!tile) return NULL;
+
+ ref_init(&tile->ref);
+ list_init(&tile->node);
+ tile->allocator = allocator;
+ ASSERT(IS_ALIGNED(&tile->data.accums, ALIGNOF(struct htrdr_accum)));
+
+ return tile;
+}
+
+static INLINE void
+tile_ref_get(struct tile* tile)
+{
+ ASSERT(tile);
+ tile_ref_get(tile);
+}
+
+static INLINE void
+release_tile(ref_T* ref)
+{
+ struct tile* tile = CONTAINER_OF(ref, struct tile, ref);
+ ASSERT(ref);
+ MEM_RM(tile->allocator, tile);
+}
+
+static INLINE void
+tile_ref_put(struct tile* tile)
+{
+ ASSERT(tile);
+ ref_put(&tile->ref, release_tile);
+}
+
+static FINLINE struct htrdr_accum*
+tile_at
+ (struct tile* tile,
+ const size_t x, /* In tile space */
+ const size_t y) /* In tile space */
+{
+ ASSERT(tile && x < TILE_SIZE && y < TILE_SIZE);
+ return tile->data.accums + (y*TILE_SIZE + x) * HTRDR_ESTIMATES_COUNT__;
+}
+
+static void
+write_tile_data(struct htrdr_buffer* buf, const struct tile_data* tile_data)
+{
+ struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL;
+ size_t icol, irow;
+ size_t irow_tile;
+ size_t ncols_tile, nrows_tile;
+ char* buf_mem;
+ ASSERT(buf && tile_data);
+
+ htrdr_buffer_get_layout(buf, &layout);
+ buf_mem = htrdr_buffer_get_data(buf);
+ ASSERT(layout.elmt_size == sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]));
+
+ /* Compute the row/column of the tile origin into the buffer */
+ icol = tile_data->x * (size_t)TILE_SIZE;
+ irow = tile_data->y * (size_t)TILE_SIZE;
+
+ /* Define the number of tile row/columns to write into the buffer */
+ ncols_tile = MMIN(icol + TILE_SIZE, layout.width) - icol;
+ nrows_tile = MMIN(irow + TILE_SIZE, layout.height) - irow;
+
+ /* Copy the tile data, row by row */
+ FOR_EACH(irow_tile, 0, nrows_tile) {
+ char* buf_row = buf_mem + (irow + irow_tile) * layout.pitch;
+ const struct htrdr_accum* tile_row =
+ tile_data->accums + irow_tile*TILE_SIZE*HTRDR_ESTIMATES_COUNT__;
+ memcpy(buf_row + icol*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]),
+ tile_row, ncols_tile*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]));
+ }
+}
+
+static INLINE void
+proc_work_init(struct mem_allocator* allocator, struct proc_work* work)
+{
+ ASSERT(work);
+ darray_u32_init(allocator, &work->tiles);
+ work->itile = 0;
+ CHK(work->mutex = mutex_create());
+}
+
+static INLINE void
+proc_work_release(struct proc_work* work)
+{
+ darray_u32_release(&work->tiles);
+ mutex_destroy(work->mutex);
+}
+
+static INLINE void
+proc_work_reset(struct proc_work* work)
+{
+ ASSERT(work);
+ mutex_lock(work->mutex);
+ darray_u32_clear(&work->tiles);
+ work->itile = 0;
+ mutex_unlock(work->mutex);
+}
+
+static INLINE void
+proc_work_add_tile(struct proc_work* work, const uint32_t mcode)
+{
+ mutex_lock(work->mutex);
+ CHK(darray_u32_push_back(&work->tiles, &mcode) == RES_OK);
+ mutex_unlock(work->mutex);
+}
+
+static INLINE uint32_t
+proc_work_get_tile(struct proc_work* work)
+{
+ uint32_t mcode;
+ ASSERT(work);
+ mutex_lock(work->mutex);
+ if(work->itile >= darray_u32_size_get(&work->tiles)) {
+ mcode = TILE_MCODE_NULL;
+ } else {
+ mcode = darray_u32_cdata_get(&work->tiles)[work->itile];
+ ++work->itile;
+ }
+ mutex_unlock(work->mutex);
+ return mcode;
+}
+
+static INLINE size_t
+proc_work_get_ntiles(struct proc_work* work)
+{
+ size_t sz = 0;
+ ASSERT(work);
+ mutex_lock(work->mutex);
+ sz = darray_u32_size_get(&work->tiles);
+ mutex_unlock(work->mutex);
+ return sz;
+}
+
+static void
+mpi_wait_for_request(struct htrdr* htrdr, MPI_Request* req)
+{
+ ASSERT(htrdr && req);
+
+ /* Wait for process synchronisation */
+ for(;;) {
+ struct timespec t;
+ int complete;
+ t.tv_sec = 0;
+ t.tv_nsec = 10000000; /* 10ms */
+
+ mutex_lock(htrdr->mpi_mutex);
+ MPI(Test(req, &complete, MPI_STATUS_IGNORE));
+ mutex_unlock(htrdr->mpi_mutex);
+ if(complete) break;
+
+ nanosleep(&t, NULL);
+ }
+}
+
+static void
+mpi_probe_thieves
+ (struct htrdr* htrdr,
+ struct proc_work* work,
+ ATOMIC* probe_thieves)
+{
+ uint32_t tiles[UINT8_MAX];
+ struct timespec t;
+ ASSERT(htrdr && work && probe_thieves);
+
+ if(htrdr->mpi_nprocs == 1) /* The process is alone. No thief is possible */
+ return;
+
+ t.tv_sec = 0;
+
+ /* Protect MPI calls of multiple invocations from concurrent threads */
+ #define P_MPI(Func) { \
+ mutex_lock(htrdr->mpi_mutex); \
+ MPI(Func); \
+ mutex_unlock(htrdr->mpi_mutex); \
+ } (void)0
+
+ while(ATOMIC_GET(probe_thieves)) {
+ MPI_Status status;
+ size_t itile;
+ int msg;
+
+ /* Probe if a steal request was submitted by any processes */
+ P_MPI(Iprobe(MPI_ANY_SOURCE, HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &msg,
+ &status));
+
+ if(msg) { /* A steal request was posted */
+ MPI_Request req;
+ uint8_t ntiles_to_steal;
+
+ /* Asynchronously receive the steal request */
+ P_MPI(Irecv(&ntiles_to_steal, 1, MPI_UINT8_T, status.MPI_SOURCE,
+ HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &req));
+
+ /* Wait for the completion of the steal request */
+ mpi_wait_for_request(htrdr, &req);
+
+ /* Thief some tiles */
+ FOR_EACH(itile, 0, ntiles_to_steal) {
+ tiles[itile] = proc_work_get_tile(work);
+ }
+ P_MPI(Send(&tiles, ntiles_to_steal, MPI_UINT32_T, status.MPI_SOURCE,
+ HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD));
+ }
+ t.tv_nsec = 500000000; /* 500ms */
+ nanosleep(&t, NULL);
+ }
+ #undef P_MPI
+}
+
+static int
+mpi_sample_working_process(struct htrdr* htrdr, struct ssp_rng* rng)
+{
+ int iproc, i;
+ int dst_rank;
+ ASSERT(htrdr && rng && htrdr->mpi_nworking_procs);
+
+ /* Sample the index of the 1st active process */
+ iproc = (int)(ssp_rng_canonical(rng) * (double)htrdr->mpi_nworking_procs);
+
+ /* Find the rank of the sampled active process. Use a simple linear search
+ * since the overall number of processes should be quite low; at most few
+ * dozens. */
+ i = 0;
+ FOR_EACH(dst_rank, 0, htrdr->mpi_nprocs) {
+ if(htrdr->mpi_working_procs[dst_rank] == 0) continue; /* Inactive process */
+ if(i == iproc) break; /* The rank of the sampled process is found */
+ ++i;
+ }
+ ASSERT(dst_rank < htrdr->mpi_nprocs);
+ return dst_rank;
+}
+
+/* Return the number of stolen tiles */
+static size_t
+mpi_steal_work
+ (struct htrdr* htrdr,
+ struct ssp_rng* rng,
+ struct proc_work* work)
+{
+ MPI_Request req;
+ size_t itile;
+ size_t nthieves = 0;
+ uint32_t tiles[UINT8_MAX]; /* Morton code of the stolen tile */
+ int proc_to_steal; /* Process to steal */
+ uint8_t ntiles_to_steal = MMIN((uint8_t)(htrdr->nthreads*2), 16);
+ ASSERT(htrdr && rng && work && htrdr->nthreads < UINT8_MAX);
+
+ /* Protect MPI calls of multiple invocations from concurrent threads */
+ #define P_MPI(Func) { \
+ mutex_lock(htrdr->mpi_mutex); \
+ MPI(Func); \
+ mutex_unlock(htrdr->mpi_mutex); \
+ } (void)0
+
+ /* No more working process => nohting to steal */
+ if(!htrdr->mpi_nworking_procs) return 0;
+
+ /* Sample a process to steal */
+ proc_to_steal = mpi_sample_working_process(htrdr, rng);
+
+ /* Send a steal request to the sampled process and wait for a response */
+ P_MPI(Send(&ntiles_to_steal, 1, MPI_UINT8_T, proc_to_steal,
+ HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD));
+
+ /* Receive the stolen tile from the sampled process */
+ P_MPI(Irecv(tiles, ntiles_to_steal, MPI_UINT32_T, proc_to_steal,
+ HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD, &req));
+
+ mpi_wait_for_request(htrdr, &req);
+
+ FOR_EACH(itile, 0, ntiles_to_steal) {
+ if(tiles[itile] == TILE_MCODE_NULL) {
+ ASSERT(htrdr->mpi_working_procs[proc_to_steal] != 0);
+ htrdr->mpi_working_procs[proc_to_steal] = 0;
+ htrdr->mpi_nworking_procs--;
+ break;
+ }
+ proc_work_add_tile(work, tiles[itile]);
+ ++nthieves;
+ }
+ #undef P_MPI
+ return nthieves;
+}
+
+static res_T
+mpi_gather_tiles
+ (struct htrdr* htrdr,
+ struct htrdr_buffer* buf,
+ const size_t ntiles,
+ struct list_node* tiles)
+{
+ /* Compute the size of the tile_data */
+ const size_t msg_sz =
+ sizeof(struct tile_data) - sizeof(struct htrdr_accum)/*dummy*/
+ + TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]);
+
+ struct list_node* node = NULL;
+ struct tile* tile = NULL;
+ res_T res = RES_OK;
+ ASSERT(htrdr && tiles);
+ ASSERT(htrdr->mpi_rank != 0 || buf);
+ (void)ntiles;
+
+ if(htrdr->mpi_rank != 0) { /* Non master process */
+ /* Send the computed tile to the master process */
+ LIST_FOR_EACH(node, tiles) {
+ struct tile* t = CONTAINER_OF(node, struct tile, node);
+ MPI(Send(&t->data, (int)msg_sz, MPI_CHAR, 0,
+ HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD));
+ }
+ } else { /* Master process */
+ size_t itile = 0;
+
+ LIST_FOR_EACH(node, tiles) {
+ struct tile* t = CONTAINER_OF(node, struct tile, node);
+ write_tile_data(buf, &t->data);
+ ++itile;
+ }
+
+ if(itile != ntiles) {
+ ASSERT(htrdr->mpi_nprocs > 1);
+
+ /* Create a temporary tile to receive the tile data computed by the
+ * concurrent MPI processes */
+ tile = tile_create(htrdr->allocator);
+ if(!tile) {
+ res = RES_MEM_ERR;
+ htrdr_log_err(htrdr,
+ "could not allocate the temporary tile used to gather the process "
+ "output data -- %s.\n", res_to_cstr(res));
+ goto error;
+ }
+
+ /* Receive the tile data of the concurret MPI processes */
+ FOR_EACH(itile, itile, ntiles) {
+ MPI(Recv(&tile->data, (int)msg_sz, MPI_CHAR, MPI_ANY_SOURCE,
+ HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE));
+ write_tile_data(buf, &tile->data);
+ }
+ }
+ }
+
+exit:
+ if(tile) tile_ref_put(tile);
+ return res;
+error:
+ goto exit;
+}
+
+static INLINE size_t
+sample_lw_spectral_interval(struct htrdr* htrdr, const double r)
+{
+ const double* cdf = NULL;
+ const double* find = NULL;
+ double r_next = nextafter(r, DBL_MAX);
+ size_t cdf_length = 0;
+ size_t i;
+ ASSERT(htrdr && iband);
+ ASSERT(r >= 0 && r < 1);
+
+ cdf = darray_double_cdata_get(&htrdr->lw_cdf);
+ cdf_length = darray_double_size_get(&htrdr->lw_cdf);
+
+ /* Use r_next rather than r in order to find the first entry that is not less
+ * than *or equal* to r */
+ find = search_lower_bound(&r_next, cdf, cdf_length, sizeof(double), cmp_dbl);
+ ASSERT(find);
+
+ i = (size_t)(find - cdf);
+ ASSERT(i < cdf_length && cdf[i] > r && (!i || cdf[i-1] <= r));
+ return i;
+}
+
+static res_T
+draw_tile
+ (struct htrdr* htrdr,
+ const size_t ithread,
+ const int64_t tile_mcode, /* For debug only */
+ const size_t tile_org[2], /* Origin of the tile in pixel space */
+ const size_t tile_sz[2], /* Definition of the tile */
+ const double pix_sz[2], /* Size of a pixel in the normalized image plane */
+ const struct htrdr_camera* cam,
+ const size_t spp, /* #samples per pixel */
+ struct ssp_rng* rng,
+ struct tile* tile)
+{
+ size_t nchannels;
+ size_t npixels;
+ size_t mcode; /* Morton code of tile pixel */
+ ASSERT(htrdr && tile_org && tile_sz && pix_sz && cam && spp && tile);
+ (void)tile_mcode;
+ /* Adjust the #pixels to process them wrt a morton order */
+ npixels = round_up_pow2(MMAX(tile_sz[0], tile_sz[1]));
+ npixels *= npixels;
+
+ /* Define how many channels to handle */
+ nchannels = htsky_is_long_wave(htrdr->sky) ? 1 : 3;
+
+ FOR_EACH(mcode, 0, npixels) {
+ struct htrdr_accum* pix_accums;
+ size_t ipix_tile[2]; /* Pixel coord in the tile */
+ size_t ipix[2]; /* Pixel coord in the buffer */
+ size_t ichannel;
+
+ ipix_tile[0] = morton2D_decode((uint32_t)(mcode>>0));
+ if(ipix_tile[0] >= tile_sz[0]) continue; /* Pixel is out of tile */
+ ipix_tile[1] = morton2D_decode((uint32_t)(mcode>>1));
+ if(ipix_tile[1] >= tile_sz[1]) continue; /* Pixel is out of tile */
+
+ /* Fetch and reset the pixel accumulator */
+ pix_accums = tile_at(tile, ipix_tile[0], ipix_tile[1]);
+
+ /* Reset the pixel accumulators */
+ pix_accums[HTRDR_ESTIMATE_X] = HTRDR_ACCUM_NULL;
+ pix_accums[HTRDR_ESTIMATE_Y] = HTRDR_ACCUM_NULL;
+ pix_accums[HTRDR_ESTIMATE_Z] = HTRDR_ACCUM_NULL;
+ pix_accums[HTRDR_ESTIMATE_TIME] = HTRDR_ACCUM_NULL;
+
+ /* Compute the pixel coordinate */
+ ipix[0] = tile_org[0] + ipix_tile[0];
+ ipix[1] = tile_org[1] + ipix_tile[1];
+
+ FOR_EACH(ichannel, 0, nchannels) {
+ /* Check that the X, Y and Z estimates are stored in accumulators 0, 1 et
+ * 2, respectively */
+ STATIC_ASSERT
+ ( HTRDR_ESTIMATE_X == 0
+ && HTRDR_ESTIMATE_Y == 1
+ && HTRDR_ESTIMATE_Z == 2,
+ Unexpected_htrdr_estimate_enumerate);
+ size_t isamp;
+
+ FOR_EACH(isamp, 0, spp) {
+ struct time t0, t1;
+ double pix_samp[2];
+ double ray_org[3];
+ double ray_dir[3];
+ double weight;
+ double r0, r1;
+ size_t iband;
+ size_t iquad;
+ double usec;
+
+ /* Begin the registration of the time spent to in the realisation */
+ time_current(&t0);
+
+ /* Sample a position into the pixel, in the normalized image plane */
+ pix_samp[0] = ((double)ipix[0] + ssp_rng_canonical(rng)) * pix_sz[0];
+ pix_samp[1] = ((double)ipix[1] + ssp_rng_canonical(rng)) * pix_sz[1];
+
+ /* Generate a ray starting from the pinhole camera and passing through the
+ * pixel sample */
+ htrdr_camera_ray(cam, pix_samp, ray_org, ray_dir);
+
+ r0 = ssp_rng_canonical(rng);
+ r1 = ssp_rng_canonical(rng);
+
+ if(htsky_is_long_wave(htrdr->sky)) {
+ /* Sample a spectral band and a quadrature point */
+ iband = sample_lw_spectral_interval(htrdr, r0);
+ iquad = htsky_spectral_band_sample_quadrature(htrdr->sky, r1, iband);
+ /* Compute the luminance */
+ weight = htrdr_compute_radiance_lw
+ (htrdr, ithread, rng, ray_org, ray_dir, iband, iquad);
+ } else {
+ /* Sample a spectral band and a quadrature point */
+ switch(ichannel) {
+ case 0:
+ htsky_sample_sw_spectral_data_CIE_1931_X
+ (htrdr->sky, r0, r1, &iband, &iquad);
+ break;
+ case 1:
+ htsky_sample_sw_spectral_data_CIE_1931_Y
+ (htrdr->sky, r0, r1, &iband, &iquad);
+ break;
+ case 2:
+ htsky_sample_sw_spectral_data_CIE_1931_Z
+ (htrdr->sky, r0, r1, &iband, &iquad);
+ break;
+ default: FATAL("Unreachable code.\n"); break;
+ }
+ /* Compute the luminance */
+ weight = htrdr_compute_radiance_sw
+ (htrdr, ithread, rng, ray_org, ray_dir, iband, iquad);
+ }
+ ASSERT(weight >= 0);
+
+ /* End the registration of the per realisation time */
+ time_sub(&t0, time_current(&t1), &t0);
+ usec = (double)time_val(&t0, TIME_NSEC) * 0.001;
+
+ /* Update the pixel accumulator of the current channel */
+ pix_accums[ichannel].sum_weights += weight;
+ pix_accums[ichannel].sum_weights_sqr += weight*weight;
+ pix_accums[ichannel].nweights += 1;
+
+ /* Update the pixel accumulator of per realisation time */
+ pix_accums[HTRDR_ESTIMATE_TIME].sum_weights += usec;
+ pix_accums[HTRDR_ESTIMATE_TIME].sum_weights_sqr += usec*usec;
+ pix_accums[HTRDR_ESTIMATE_TIME].nweights += 1;
+ }
+ }
+ }
+ return RES_OK;
+}
+
+static res_T
+draw_image
+ (struct htrdr* htrdr,
+ const struct htrdr_camera* cam,
+ const size_t width, /* Image width */
+ const size_t height, /* Image height */
+ const size_t spp,
+ const size_t ntiles_x,
+ const size_t ntiles_y,
+ const size_t ntiles_adjusted,
+ const double pix_sz[2], /* Pixel size in the normalized image plane */
+ struct proc_work* work,
+ struct list_node* tiles)
+{
+ struct ssp_rng* rng_proc = NULL;
+ size_t nthreads = 0;
+ size_t nthieves = 0;
+ size_t proc_ntiles = 0;
+ ATOMIC nsolved_tiles = 0;
+ ATOMIC res = RES_OK;
+ ASSERT(htrdr && cam && spp && ntiles_adjusted && work && tiles);
+ ASSERT(pix_sz && pix_sz[0] > 0 && pix_sz[1] > 0);
+ ASSERT(width && height);
+ (void)ntiles_x, (void)ntiles_y;
+
+ res = ssp_rng_create(htrdr->allocator, &ssp_rng_mt19937_64, &rng_proc);
+ if(res != RES_OK) {
+ htrdr_log_err(htrdr, "could not create the RNG used to sample a process "
+ "to steal -- %s.\n", res_to_cstr((res_T)res));
+ goto error;
+ }
+
+ proc_ntiles = proc_work_get_ntiles(work);
+ nthreads = MMIN(htrdr->nthreads, proc_ntiles);
+
+ /* The process is not considered as a working process for himself */
+ htrdr->mpi_working_procs[htrdr->mpi_rank] = 0;
+ --htrdr->mpi_nworking_procs;
+
+ omp_set_num_threads((int)nthreads);
+ #pragma omp parallel
+ for(;;) {
+ const int ithread = omp_get_thread_num();
+ struct ssp_rng_proxy* rng_proxy = NULL;
+ struct ssp_rng* rng;
+ struct tile* tile;
+ uint32_t mcode = TILE_MCODE_NULL;
+ size_t tile_org[2];
+ size_t tile_sz[2];
+ size_t n;
+ res_T res_local = RES_OK;
+ int32_t pcent;
+
+ /* Get a tile to draw */
+ #pragma omp critical
+ {
+ mcode = proc_work_get_tile(work);
+ if(mcode == TILE_MCODE_NULL) { /* No more work on this process */
+ /* Try to steal works to concurrent processes */
+ proc_work_reset(work);
+ nthieves = mpi_steal_work(htrdr, rng_proc, work);
+ if(nthieves != 0) {
+ mcode = proc_work_get_tile(work);
+ }
+ }
+ }
+ if(mcode == TILE_MCODE_NULL) break; /* No more work */
+
+ /* Decode the morton code to retrieve the tile index */
+ tile_org[0] = morton2D_decode((uint32_t)(mcode>>0));
+ tile_org[1] = morton2D_decode((uint32_t)(mcode>>1));
+ ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y);
+
+ /* Create the tile */
+ tile = tile_create(htrdr->allocator);
+ if(!tile) {
+ ATOMIC_SET(&res, RES_MEM_ERR);
+ htrdr_log_err(htrdr,
+ "could not allocate the memory space of the tile (%lu, %lu) -- %s.\n",
+ (unsigned long)tile_org[0], (unsigned long)tile_org[1],
+ res_to_cstr((res_T)ATOMIC_GET(&res)));
+ break;
+ }
+
+ /* Register the tile */
+ #pragma omp critical
+ list_add_tail(tiles, &tile->node);
+
+ tile->data.x = (uint16_t)tile_org[0];
+ tile->data.y = (uint16_t)tile_org[1];
+
+ /* Define the tile origin in pixel space */
+ tile_org[0] *= TILE_SIZE;
+ tile_org[1] *= TILE_SIZE;
+
+ /* Compute the size of the tile clamped by the borders of the buffer */
+ tile_sz[0] = MMIN(TILE_SIZE, width - tile_org[0]);
+ tile_sz[1] = MMIN(TILE_SIZE, height - tile_org[1]);
+
+ /* Create a proxy RNG for the current tile. This proxy is used for the
+ * current thread only and thus it has to manage only one RNG. This proxy
+ * is initialised in order to ensure that an unique and predictable set of
+ * random numbers is used for the current tile. */
+ SSP(rng_proxy_create2
+ (&htrdr->lifo_allocators[ithread],
+ &ssp_rng_threefry,
+ RNG_SEQUENCE_SIZE * (size_t)mcode, /* Offset */
+ RNG_SEQUENCE_SIZE, /* Size */
+ RNG_SEQUENCE_SIZE * (size_t)ntiles_adjusted, /* Pitch */
+ 1, &rng_proxy));
+ SSP(rng_proxy_create_rng(rng_proxy, 0, &rng));
+
+ /* Launch the tile rendering */
+ res_local = draw_tile(htrdr, (size_t)ithread, mcode, tile_org, tile_sz,
+ pix_sz, cam, spp, rng, tile);
+
+ SSP(rng_proxy_ref_put(rng_proxy));
+ SSP(rng_ref_put(rng));
+
+ if(res_local != RES_OK) {
+ ATOMIC_SET(&res, res_local);
+ break;
+ }
+
+ /* Update the progress status */
+ n = (size_t)ATOMIC_INCR(&nsolved_tiles);
+ pcent = (int32_t)((double)n * 100.0 / (double)proc_ntiles + 0.5/*round*/);
+
+ #pragma omp critical
+ if(pcent > htrdr->mpi_progress_render[0]) {
+ htrdr->mpi_progress_render[0] = pcent;
+ if(htrdr->mpi_rank == 0) {
+ update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+ } else { /* Send the progress percentage to the master process */
+ send_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING, pcent);
+ }
+ }
+ }
+
+ if(ATOMIC_GET(&res) != RES_OK) goto error;
+
+ /* Synchronize the process */
+ mutex_lock(htrdr->mpi_mutex);
+ MPI(Barrier(MPI_COMM_WORLD));
+ mutex_unlock(htrdr->mpi_mutex);
+
+exit:
+ if(rng_proc) SSP(rng_ref_put(rng_proc));
+ return (res_T)res;
+error:
+ goto exit;
+}
+
+/*******************************************************************************
+ * Local functions
+ ******************************************************************************/
+res_T
+htrdr_draw_radiance
+ (struct htrdr* htrdr,
+ const struct htrdr_camera* cam,
+ const size_t width,
+ const size_t height,
+ const size_t spp,
+ struct htrdr_buffer* buf)
+{
+ char strbuf[128];
+ struct time t0, t1;
+ struct list_node tiles;
+ size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted;
+ size_t itile;
+ struct proc_work work;
+ struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL;
+ size_t proc_ntiles_adjusted;
+ double pix_sz[2];
+ ATOMIC probe_thieves = 1;
+ ATOMIC res = RES_OK;
+ ASSERT(htrdr && cam && width && height);
+ ASSERT(htrdr->mpi_rank != 0 || buf);
+
+ list_init(&tiles);
+ proc_work_init(htrdr->allocator, &work);
+
+ if(htrdr->mpi_rank == 0) {
+ htrdr_buffer_get_layout(buf, &layout);
+ ASSERT(layout.width || layout.height || layout.elmt_size);
+ ASSERT(layout.width == width && layout.height == height);
+
+ if(layout.elmt_size != sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__])
+ || layout.alignment < ALIGNOF(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__])) {
+ htrdr_log_err(htrdr,
+ "%s: invalid buffer layout. "
+ "The pixel size must be the size of %lu accumulators.\n",
+ FUNC_NAME, (unsigned long)HTRDR_ESTIMATES_COUNT__);
+ res = RES_BAD_ARG;
+ goto error;
+ }
+ }
+
+ /* Compute the overall number of tiles */
+ ntiles_x = (width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
+ ntiles_y = (height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
+ ntiles = ntiles_x * ntiles_y;
+
+ /* Compute the pixel size in the normalized image plane */
+ pix_sz[0] = 1.0 / (double)width;
+ pix_sz[1] = 1.0 / (double)height;
+
+ /* Adjust the #tiles for the morton-encoding procedure */
+ ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y));
+ ntiles_adjusted *= ntiles_adjusted;
+
+ /* Define the initial number of tiles of the current process */
+ proc_ntiles_adjusted = ntiles_adjusted / (size_t)htrdr->mpi_nprocs;
+ if(htrdr->mpi_rank == 0) { /* Affect the remaining tiles to the master proc */
+ proc_ntiles_adjusted +=
+ ntiles_adjusted - proc_ntiles_adjusted*(size_t)htrdr->mpi_nprocs;
+ }
+
+ /* Define the initial list of tiles of the process */
+ FOR_EACH(itile, 0, proc_ntiles_adjusted) {
+ uint32_t mcode;
+ uint16_t tile_org[2];
+
+ mcode = (uint32_t)itile*(uint32_t)htrdr->mpi_nprocs
+ + (uint32_t)htrdr->mpi_rank;
+
+ tile_org[0] = morton2D_decode(mcode>>0);
+ if(tile_org[0] >= ntiles_x) continue;
+ tile_org[1] = morton2D_decode(mcode>>1);
+ if(tile_org[1] >= ntiles_y) continue;
+ proc_work_add_tile(&work, mcode);
+ }
+
+ if(htrdr->mpi_rank == 0) {
+ fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+ print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+ }
+
+ time_current(&t0);
+
+ omp_set_nested(1); /* Enable nested threads for draw_image */
+ #pragma omp parallel sections num_threads(2)
+ {
+ #pragma omp section
+ mpi_probe_thieves(htrdr, &work, &probe_thieves);
+
+ #pragma omp section
+ {
+ draw_image(htrdr, cam, width, height, spp, ntiles_x, ntiles_y,
+ ntiles_adjusted, pix_sz, &work, &tiles);
+ /* The processes have no more work to do. Stop probing for thieves */
+ ATOMIC_SET(&probe_thieves, 0);
+ }
+ }
+
+ if(htrdr->mpi_rank == 0) {
+ update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+ fprintf(stderr, "\n"); /* Add a new line after the progress statuses */
+ }
+
+ time_sub(&t0, time_current(&t1), &t0);
+ time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf));
+ htrdr_log(htrdr, "Rendering time: %s\n", strbuf);
+
+ /* Gather accum buffers from the group of processes */
+ time_current(&t0);
+ res = mpi_gather_tiles(htrdr, buf, ntiles, &tiles);
+ if(res != RES_OK) goto error;
+ time_sub(&t0, time_current(&t1), &t0);
+ time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf));
+ htrdr_log(htrdr, "Image gathering time: %s\n", strbuf);
+
+exit:
+ { /* Free allocated tiles */
+ struct list_node* node;
+ struct list_node* tmp;
+ LIST_FOR_EACH_SAFE(node, tmp, &tiles) {
+ struct tile* tile = CONTAINER_OF(node, struct tile, node);
+ list_del(node);
+ tile_ref_put(tile);
+ }
+ }
+ proc_work_release(&work);
+ return (res_T)res;
+error:
+ goto exit;
+}
+
diff --git a/src/htrdr_draw_radiance_sw.c b/src/htrdr_draw_radiance_sw.c
@@ -1,851 +0,0 @@
-/* Copyright (C) 2018, 2019, 2020 |Meso|Star> (contact@meso-star.com)
- * Copyright (C) 2018, 2019 CNRS, Université Paul Sabatier
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-#define _POSIX_C_SOURCE 199309L /* nanosleep */
-
-#include "htrdr.h"
-#include "htrdr_c.h"
-#include "htrdr_buffer.h"
-#include "htrdr_camera.h"
-#include "htrdr_solve.h"
-
-#include <high_tune/htsky.h>
-
-#include <rsys/clock_time.h>
-#include <rsys/cstr.h>
-#include <rsys/dynamic_array_u32.h>
-#include <rsys/math.h>
-#include <rsys/mutex.h>
-#include <star/ssp.h>
-
-#include <omp.h>
-#include <mpi.h>
-#include <time.h>
-#include <unistd.h>
-
-#define RNG_SEQUENCE_SIZE 10000
-
-#define TILE_MCODE_NULL UINT32_MAX
-#define TILE_SIZE 32 /* Definition in X & Y of a tile */
-STATIC_ASSERT(IS_POW2(TILE_SIZE), TILE_SIZE_must_be_a_power_of_2);
-
-/* Tile of row ordered image pixels */
-struct tile {
- struct list_node node;
- struct mem_allocator* allocator;
- ref_T ref;
-
- struct tile_data {
- uint16_t x, y; /* 2D coordinates of the tile in tile space */
- /* Simulate the flexible array member of the C99 standard. */
- struct htrdr_accum accums[1/*dummy element*/];
- } data;
-};
-
-/* List of tile to compute onto the MPI process. */
-struct proc_work {
- struct mutex* mutex;
- struct darray_u32 tiles; /* #tiles to render */
- size_t itile; /* Next tile to render in the above list of tiles */
-};
-
-/*******************************************************************************
- * Helper functions
- ******************************************************************************/
-static FINLINE uint16_t
-morton2D_decode(const uint32_t u32)
-{
- uint32_t x = u32 & 0x55555555;
- x = (x | (x >> 1)) & 0x33333333;
- x = (x | (x >> 2)) & 0x0F0F0F0F;
- x = (x | (x >> 4)) & 0x00FF00FF;
- x = (x | (x >> 8)) & 0x0000FFFF;
- return (uint16_t)x;
-}
-
-static FINLINE uint32_t
-morton2D_encode(const uint16_t u16)
-{
- uint32_t u32 = u16;
- u32 = (u32 | (u32 << 8)) & 0x00FF00FF;
- u32 = (u32 | (u32 << 4)) & 0X0F0F0F0F;
- u32 = (u32 | (u32 << 2)) & 0x33333333;
- u32 = (u32 | (u32 << 1)) & 0x55555555;
- return u32;
-}
-
-static FINLINE struct tile*
-tile_create(struct mem_allocator* allocator)
-{
- struct tile* tile;
- const size_t tile_sz =
- sizeof(struct tile) - sizeof(struct htrdr_accum)/*rm dummy accum*/;
- const size_t buf_sz = /* Flexiblbe array element */
- TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]);
- ASSERT(allocator);
-
- tile = MEM_ALLOC(allocator, tile_sz+buf_sz);
- if(!tile) return NULL;
-
- ref_init(&tile->ref);
- list_init(&tile->node);
- tile->allocator = allocator;
- ASSERT(IS_ALIGNED(&tile->data.accums, ALIGNOF(struct htrdr_accum)));
-
- return tile;
-}
-
-static INLINE void
-tile_ref_get(struct tile* tile)
-{
- ASSERT(tile);
- tile_ref_get(tile);
-}
-
-static INLINE void
-release_tile(ref_T* ref)
-{
- struct tile* tile = CONTAINER_OF(ref, struct tile, ref);
- ASSERT(ref);
- MEM_RM(tile->allocator, tile);
-}
-
-static INLINE void
-tile_ref_put(struct tile* tile)
-{
- ASSERT(tile);
- ref_put(&tile->ref, release_tile);
-}
-
-static FINLINE struct htrdr_accum*
-tile_at
- (struct tile* tile,
- const size_t x, /* In tile space */
- const size_t y) /* In tile space */
-{
- ASSERT(tile && x < TILE_SIZE && y < TILE_SIZE);
- return tile->data.accums + (y*TILE_SIZE + x) * HTRDR_ESTIMATES_COUNT__;
-}
-
-static void
-write_tile_data(struct htrdr_buffer* buf, const struct tile_data* tile_data)
-{
- struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL;
- size_t icol, irow;
- size_t irow_tile;
- size_t ncols_tile, nrows_tile;
- char* buf_mem;
- ASSERT(buf && tile_data);
-
- htrdr_buffer_get_layout(buf, &layout);
- buf_mem = htrdr_buffer_get_data(buf);
- ASSERT(layout.elmt_size == sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]));
-
- /* Compute the row/column of the tile origin into the buffer */
- icol = tile_data->x * (size_t)TILE_SIZE;
- irow = tile_data->y * (size_t)TILE_SIZE;
-
- /* Define the number of tile row/columns to write into the buffer */
- ncols_tile = MMIN(icol + TILE_SIZE, layout.width) - icol;
- nrows_tile = MMIN(irow + TILE_SIZE, layout.height) - irow;
-
- /* Copy the tile data, row by row */
- FOR_EACH(irow_tile, 0, nrows_tile) {
- char* buf_row = buf_mem + (irow + irow_tile) * layout.pitch;
- const struct htrdr_accum* tile_row =
- tile_data->accums + irow_tile*TILE_SIZE*HTRDR_ESTIMATES_COUNT__;
- memcpy(buf_row + icol*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]),
- tile_row, ncols_tile*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]));
- }
-}
-
-static INLINE void
-proc_work_init(struct mem_allocator* allocator, struct proc_work* work)
-{
- ASSERT(work);
- darray_u32_init(allocator, &work->tiles);
- work->itile = 0;
- CHK(work->mutex = mutex_create());
-}
-
-static INLINE void
-proc_work_release(struct proc_work* work)
-{
- darray_u32_release(&work->tiles);
- mutex_destroy(work->mutex);
-}
-
-static INLINE void
-proc_work_reset(struct proc_work* work)
-{
- ASSERT(work);
- mutex_lock(work->mutex);
- darray_u32_clear(&work->tiles);
- work->itile = 0;
- mutex_unlock(work->mutex);
-}
-
-static INLINE void
-proc_work_add_tile(struct proc_work* work, const uint32_t mcode)
-{
- mutex_lock(work->mutex);
- CHK(darray_u32_push_back(&work->tiles, &mcode) == RES_OK);
- mutex_unlock(work->mutex);
-}
-
-static INLINE uint32_t
-proc_work_get_tile(struct proc_work* work)
-{
- uint32_t mcode;
- ASSERT(work);
- mutex_lock(work->mutex);
- if(work->itile >= darray_u32_size_get(&work->tiles)) {
- mcode = TILE_MCODE_NULL;
- } else {
- mcode = darray_u32_cdata_get(&work->tiles)[work->itile];
- ++work->itile;
- }
- mutex_unlock(work->mutex);
- return mcode;
-}
-
-static INLINE size_t
-proc_work_get_ntiles(struct proc_work* work)
-{
- size_t sz = 0;
- ASSERT(work);
- mutex_lock(work->mutex);
- sz = darray_u32_size_get(&work->tiles);
- mutex_unlock(work->mutex);
- return sz;
-}
-
-static void
-mpi_wait_for_request(struct htrdr* htrdr, MPI_Request* req)
-{
- ASSERT(htrdr && req);
-
- /* Wait for process synchronisation */
- for(;;) {
- struct timespec t;
- int complete;
- t.tv_sec = 0;
- t.tv_nsec = 10000000; /* 10ms */
-
- mutex_lock(htrdr->mpi_mutex);
- MPI(Test(req, &complete, MPI_STATUS_IGNORE));
- mutex_unlock(htrdr->mpi_mutex);
- if(complete) break;
-
- nanosleep(&t, NULL);
- }
-}
-
-static void
-mpi_probe_thieves
- (struct htrdr* htrdr,
- struct proc_work* work,
- ATOMIC* probe_thieves)
-{
- uint32_t tiles[UINT8_MAX];
- struct timespec t;
- ASSERT(htrdr && work && probe_thieves);
-
- if(htrdr->mpi_nprocs == 1) /* The process is alone. No thief is possible */
- return;
-
- t.tv_sec = 0;
-
- /* Protect MPI calls of multiple invocations from concurrent threads */
- #define P_MPI(Func) { \
- mutex_lock(htrdr->mpi_mutex); \
- MPI(Func); \
- mutex_unlock(htrdr->mpi_mutex); \
- } (void)0
-
- while(ATOMIC_GET(probe_thieves)) {
- MPI_Status status;
- size_t itile;
- int msg;
-
- /* Probe if a steal request was submitted by any processes */
- P_MPI(Iprobe(MPI_ANY_SOURCE, HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &msg,
- &status));
-
- if(msg) { /* A steal request was posted */
- MPI_Request req;
- uint8_t ntiles_to_steal;
-
- /* Asynchronously receive the steal request */
- P_MPI(Irecv(&ntiles_to_steal, 1, MPI_UINT8_T, status.MPI_SOURCE,
- HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &req));
-
- /* Wait for the completion of the steal request */
- mpi_wait_for_request(htrdr, &req);
-
- /* Thief some tiles */
- FOR_EACH(itile, 0, ntiles_to_steal) {
- tiles[itile] = proc_work_get_tile(work);
- }
- P_MPI(Send(&tiles, ntiles_to_steal, MPI_UINT32_T, status.MPI_SOURCE,
- HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD));
- }
- t.tv_nsec = 500000000; /* 500ms */
- nanosleep(&t, NULL);
- }
- #undef P_MPI
-}
-
-static int
-mpi_sample_working_process(struct htrdr* htrdr, struct ssp_rng* rng)
-{
- int iproc, i;
- int dst_rank;
- ASSERT(htrdr && rng && htrdr->mpi_nworking_procs);
-
- /* Sample the index of the 1st active process */
- iproc = (int)(ssp_rng_canonical(rng) * (double)htrdr->mpi_nworking_procs);
-
- /* Find the rank of the sampled active process. Use a simple linear search
- * since the overall number of processes should be quite low; at most few
- * dozens. */
- i = 0;
- FOR_EACH(dst_rank, 0, htrdr->mpi_nprocs) {
- if(htrdr->mpi_working_procs[dst_rank] == 0) continue; /* Inactive process */
- if(i == iproc) break; /* The rank of the sampled process is found */
- ++i;
- }
- ASSERT(dst_rank < htrdr->mpi_nprocs);
- return dst_rank;
-}
-
-/* Return the number of stolen tiles */
-static size_t
-mpi_steal_work
- (struct htrdr* htrdr,
- struct ssp_rng* rng,
- struct proc_work* work)
-{
- MPI_Request req;
- size_t itile;
- size_t nthieves = 0;
- uint32_t tiles[UINT8_MAX]; /* Morton code of the stolen tile */
- int proc_to_steal; /* Process to steal */
- uint8_t ntiles_to_steal = MMIN((uint8_t)(htrdr->nthreads*2), 16);
- ASSERT(htrdr && rng && work && htrdr->nthreads < UINT8_MAX);
-
- /* Protect MPI calls of multiple invocations from concurrent threads */
- #define P_MPI(Func) { \
- mutex_lock(htrdr->mpi_mutex); \
- MPI(Func); \
- mutex_unlock(htrdr->mpi_mutex); \
- } (void)0
-
- /* No more working process => nohting to steal */
- if(!htrdr->mpi_nworking_procs) return 0;
-
- /* Sample a process to steal */
- proc_to_steal = mpi_sample_working_process(htrdr, rng);
-
- /* Send a steal request to the sampled process and wait for a response */
- P_MPI(Send(&ntiles_to_steal, 1, MPI_UINT8_T, proc_to_steal,
- HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD));
-
- /* Receive the stolen tile from the sampled process */
- P_MPI(Irecv(tiles, ntiles_to_steal, MPI_UINT32_T, proc_to_steal,
- HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD, &req));
-
- mpi_wait_for_request(htrdr, &req);
-
- FOR_EACH(itile, 0, ntiles_to_steal) {
- if(tiles[itile] == TILE_MCODE_NULL) {
- ASSERT(htrdr->mpi_working_procs[proc_to_steal] != 0);
- htrdr->mpi_working_procs[proc_to_steal] = 0;
- htrdr->mpi_nworking_procs--;
- break;
- }
- proc_work_add_tile(work, tiles[itile]);
- ++nthieves;
- }
- #undef P_MPI
- return nthieves;
-}
-
-static res_T
-mpi_gather_tiles
- (struct htrdr* htrdr,
- struct htrdr_buffer* buf,
- const size_t ntiles,
- struct list_node* tiles)
-{
- /* Compute the size of the tile_data */
- const size_t msg_sz =
- sizeof(struct tile_data) - sizeof(struct htrdr_accum)/*dummy*/
- + TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]);
-
- struct list_node* node = NULL;
- struct tile* tile = NULL;
- res_T res = RES_OK;
- ASSERT(htrdr && tiles);
- ASSERT(htrdr->mpi_rank != 0 || buf);
- (void)ntiles;
-
- if(htrdr->mpi_rank != 0) { /* Non master process */
- /* Send the computed tile to the master process */
- LIST_FOR_EACH(node, tiles) {
- struct tile* t = CONTAINER_OF(node, struct tile, node);
- MPI(Send(&t->data, (int)msg_sz, MPI_CHAR, 0,
- HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD));
- }
- } else { /* Master process */
- size_t itile = 0;
-
- LIST_FOR_EACH(node, tiles) {
- struct tile* t = CONTAINER_OF(node, struct tile, node);
- write_tile_data(buf, &t->data);
- ++itile;
- }
-
- if(itile != ntiles) {
- ASSERT(htrdr->mpi_nprocs > 1);
-
- /* Create a temporary tile to receive the tile data computed by the
- * concurrent MPI processes */
- tile = tile_create(htrdr->allocator);
- if(!tile) {
- res = RES_MEM_ERR;
- htrdr_log_err(htrdr,
- "could not allocate the temporary tile used to gather the process "
- "output data -- %s.\n", res_to_cstr(res));
- goto error;
- }
-
- /* Receive the tile data of the concurret MPI processes */
- FOR_EACH(itile, itile, ntiles) {
- MPI(Recv(&tile->data, (int)msg_sz, MPI_CHAR, MPI_ANY_SOURCE,
- HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE));
- write_tile_data(buf, &tile->data);
- }
- }
- }
-
-exit:
- if(tile) tile_ref_put(tile);
- return res;
-error:
- goto exit;
-}
-
-static res_T
-draw_tile
- (struct htrdr* htrdr,
- const size_t ithread,
- const int64_t tile_mcode, /* For debug only */
- const size_t tile_org[2], /* Origin of the tile in pixel space */
- const size_t tile_sz[2], /* Definition of the tile */
- const double pix_sz[2], /* Size of a pixel in the normalized image plane */
- const struct htrdr_camera* cam,
- const size_t spp, /* #samples per pixel */
- struct ssp_rng* rng,
- struct tile* tile)
-{
- size_t npixels;
- size_t mcode; /* Morton code of tile pixel */
- ASSERT(htrdr && tile_org && tile_sz && pix_sz && cam && spp && tile);
- (void)tile_mcode;
- /* Adjust the #pixels to process them wrt a morton order */
- npixels = round_up_pow2(MMAX(tile_sz[0], tile_sz[1]));
- npixels *= npixels;
-
- FOR_EACH(mcode, 0, npixels) {
- struct htrdr_accum* pix_accums;
- size_t ipix_tile[2]; /* Pixel coord in the tile */
- size_t ipix[2]; /* Pixel coord in the buffer */
- size_t ichannel;
-
- ipix_tile[0] = morton2D_decode((uint32_t)(mcode>>0));
- if(ipix_tile[0] >= tile_sz[0]) continue; /* Pixel is out of tile */
- ipix_tile[1] = morton2D_decode((uint32_t)(mcode>>1));
- if(ipix_tile[1] >= tile_sz[1]) continue; /* Pixel is out of tile */
-
- /* Fetch and reset the pixel accumulator */
- pix_accums = tile_at(tile, ipix_tile[0], ipix_tile[1]);
- pix_accums[HTRDR_ESTIMATE_TIME] = HTRDR_ACCUM_NULL; /* Reset time per radiative path */
-
- /* Compute the pixel coordinate */
- ipix[0] = tile_org[0] + ipix_tile[0];
- ipix[1] = tile_org[1] + ipix_tile[1];
-
- FOR_EACH(ichannel, 0, 3) {
- /* Check that the X, Y and Z estimates are stored in accumulators 0, 1 et
- * 2, respectively */
- STATIC_ASSERT
- ( HTRDR_ESTIMATE_X == 0
- && HTRDR_ESTIMATE_Y == 1
- && HTRDR_ESTIMATE_Z == 2,
- Unexpected_htrdr_estimate_enumerate);
- size_t isamp;
-
- pix_accums[ichannel] = HTRDR_ACCUM_NULL;
- FOR_EACH(isamp, 0, spp) {
- struct time t0, t1;
- double pix_samp[2];
- double ray_org[3];
- double ray_dir[3];
- double weight;
- double r0, r1;
- size_t iband;
- size_t iquad;
- double usec;
-
- /* Sample a position into the pixel, in the normalized image plane */
- pix_samp[0] = ((double)ipix[0] + ssp_rng_canonical(rng)) * pix_sz[0];
- pix_samp[1] = ((double)ipix[1] + ssp_rng_canonical(rng)) * pix_sz[1];
-
- /* Generate a ray starting from the pinhole camera and passing through the
- * pixel sample */
- htrdr_camera_ray(cam, pix_samp, ray_org, ray_dir);
-
- /* Sample a spectral band and a quadrature point */
- r0 = ssp_rng_canonical(rng);
- r1 = ssp_rng_canonical(rng);
- switch(ichannel) {
- case 0:
- htsky_sample_sw_spectral_data_CIE_1931_X
- (htrdr->sky, r0, r1, &iband, &iquad);
- break;
- case 1:
- htsky_sample_sw_spectral_data_CIE_1931_Y
- (htrdr->sky, r0, r1, &iband, &iquad);
- break;
- case 2:
- htsky_sample_sw_spectral_data_CIE_1931_Z
- (htrdr->sky, r0, r1, &iband, &iquad);
- break;
- default: FATAL("Unreachable code.\n"); break;
- }
-
- /* Compute the radiance that reach the pixel through the ray */
- time_current(&t0);
- weight = htrdr_compute_radiance_sw
- (htrdr, ithread, rng, ray_org, ray_dir, iband, iquad);
- ASSERT(weight >= 0);
- time_sub(&t0, time_current(&t1), &t0);
- usec = (double)time_val(&t0, TIME_NSEC) * 0.001;
-
- /* Update the pixel accumulator of the current channel */
- pix_accums[ichannel].sum_weights += weight;
- pix_accums[ichannel].sum_weights_sqr += weight*weight;
- pix_accums[ichannel].nweights += 1;
-
- /* Update the pixel accumulator of per realisation time */
- pix_accums[HTRDR_ESTIMATE_TIME].sum_weights += usec;
- pix_accums[HTRDR_ESTIMATE_TIME].sum_weights_sqr += usec*usec;
- pix_accums[HTRDR_ESTIMATE_TIME].nweights += 1;
- }
- }
- }
- return RES_OK;
-}
-
-static res_T
-draw_image
- (struct htrdr* htrdr,
- const struct htrdr_camera* cam,
- const size_t width, /* Image width */
- const size_t height, /* Image height */
- const size_t spp,
- const size_t ntiles_x,
- const size_t ntiles_y,
- const size_t ntiles_adjusted,
- const double pix_sz[2], /* Pixel size in the normalized image plane */
- struct proc_work* work,
- struct list_node* tiles)
-{
- struct ssp_rng* rng_proc = NULL;
- size_t nthreads = 0;
- size_t nthieves = 0;
- size_t proc_ntiles = 0;
- ATOMIC nsolved_tiles = 0;
- ATOMIC res = RES_OK;
- ASSERT(htrdr && cam && spp && ntiles_adjusted && work && tiles);
- ASSERT(pix_sz && pix_sz[0] > 0 && pix_sz[1] > 0);
- ASSERT(width && height);
- (void)ntiles_x, (void)ntiles_y;
-
- res = ssp_rng_create(htrdr->allocator, &ssp_rng_mt19937_64, &rng_proc);
- if(res != RES_OK) {
- htrdr_log_err(htrdr, "could not create the RNG used to sample a process "
- "to steal -- %s.\n", res_to_cstr((res_T)res));
- goto error;
- }
-
- proc_ntiles = proc_work_get_ntiles(work);
- nthreads = MMIN(htrdr->nthreads, proc_ntiles);
-
- /* The process is not considered as a working process for himself */
- htrdr->mpi_working_procs[htrdr->mpi_rank] = 0;
- --htrdr->mpi_nworking_procs;
-
- omp_set_num_threads((int)nthreads);
- #pragma omp parallel
- for(;;) {
- const int ithread = omp_get_thread_num();
- struct ssp_rng_proxy* rng_proxy = NULL;
- struct ssp_rng* rng;
- struct tile* tile;
- uint32_t mcode = TILE_MCODE_NULL;
- size_t tile_org[2];
- size_t tile_sz[2];
- size_t n;
- res_T res_local = RES_OK;
- int32_t pcent;
-
- /* Get a tile to draw */
- #pragma omp critical
- {
- mcode = proc_work_get_tile(work);
- if(mcode == TILE_MCODE_NULL) { /* No more work on this process */
- /* Try to steal works to concurrent processes */
- proc_work_reset(work);
- nthieves = mpi_steal_work(htrdr, rng_proc, work);
- if(nthieves != 0) {
- mcode = proc_work_get_tile(work);
- }
- }
- }
- if(mcode == TILE_MCODE_NULL) break; /* No more work */
-
- /* Decode the morton code to retrieve the tile index */
- tile_org[0] = morton2D_decode((uint32_t)(mcode>>0));
- tile_org[1] = morton2D_decode((uint32_t)(mcode>>1));
- ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y);
-
- /* Create the tile */
- tile = tile_create(htrdr->allocator);
- if(!tile) {
- ATOMIC_SET(&res, RES_MEM_ERR);
- htrdr_log_err(htrdr,
- "could not allocate the memory space of the tile (%lu, %lu) -- %s.\n",
- (unsigned long)tile_org[0], (unsigned long)tile_org[1],
- res_to_cstr((res_T)ATOMIC_GET(&res)));
- break;
- }
-
- /* Register the tile */
- #pragma omp critical
- list_add_tail(tiles, &tile->node);
-
- tile->data.x = (uint16_t)tile_org[0];
- tile->data.y = (uint16_t)tile_org[1];
-
- /* Define the tile origin in pixel space */
- tile_org[0] *= TILE_SIZE;
- tile_org[1] *= TILE_SIZE;
-
- /* Compute the size of the tile clamped by the borders of the buffer */
- tile_sz[0] = MMIN(TILE_SIZE, width - tile_org[0]);
- tile_sz[1] = MMIN(TILE_SIZE, height - tile_org[1]);
-
- /* Create a proxy RNG for the current tile. This proxy is used for the
- * current thread only and thus it has to manage only one RNG. This proxy
- * is initialised in order to ensure that a unique and predictable set of
- * random numbers is used for the current tile. */
- SSP(rng_proxy_create2
- (&htrdr->lifo_allocators[ithread],
- &ssp_rng_threefry,
- RNG_SEQUENCE_SIZE * (size_t)mcode, /* Offset */
- RNG_SEQUENCE_SIZE, /* Size */
- RNG_SEQUENCE_SIZE * (size_t)ntiles_adjusted, /* Pitch */
- 1, &rng_proxy));
- SSP(rng_proxy_create_rng(rng_proxy, 0, &rng));
-
- /* Launch the tile rendering */
- res_local = draw_tile(htrdr, (size_t)ithread, mcode, tile_org, tile_sz,
- pix_sz, cam, spp, rng, tile);
-
- SSP(rng_proxy_ref_put(rng_proxy));
- SSP(rng_ref_put(rng));
-
- if(res_local != RES_OK) {
- ATOMIC_SET(&res, res_local);
- break;
- }
-
- /* Update the progress status */
- n = (size_t)ATOMIC_INCR(&nsolved_tiles);
- pcent = (int32_t)((double)n * 100.0 / (double)proc_ntiles + 0.5/*round*/);
-
- #pragma omp critical
- if(pcent > htrdr->mpi_progress_render[0]) {
- htrdr->mpi_progress_render[0] = pcent;
- if(htrdr->mpi_rank == 0) {
- update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
- } else { /* Send the progress percentage to the master process */
- send_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING, pcent);
- }
- }
- }
-
- if(ATOMIC_GET(&res) != RES_OK) goto error;
-
- /* Synchronize the process */
- mutex_lock(htrdr->mpi_mutex);
- MPI(Barrier(MPI_COMM_WORLD));
- mutex_unlock(htrdr->mpi_mutex);
-
-exit:
- if(rng_proc) SSP(rng_ref_put(rng_proc));
- return (res_T)res;
-error:
- goto exit;
-}
-
-/*******************************************************************************
- * Local functions
- ******************************************************************************/
-res_T
-htrdr_draw_radiance_sw
- (struct htrdr* htrdr,
- const struct htrdr_camera* cam,
- const size_t width,
- const size_t height,
- const size_t spp,
- struct htrdr_buffer* buf)
-{
- char strbuf[128];
- struct time t0, t1;
- struct list_node tiles;
- size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted;
- size_t itile;
- struct proc_work work;
- struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL;
- size_t proc_ntiles_adjusted;
- double pix_sz[2];
- ATOMIC probe_thieves = 1;
- ATOMIC res = RES_OK;
- ASSERT(htrdr && cam && width && height);
- ASSERT(htrdr->mpi_rank != 0 || buf);
-
- list_init(&tiles);
- proc_work_init(htrdr->allocator, &work);
-
- if(htrdr->mpi_rank == 0) {
- htrdr_buffer_get_layout(buf, &layout);
- ASSERT(layout.width || layout.height || layout.elmt_size);
- ASSERT(layout.width == width && layout.height == height);
-
- if(layout.elmt_size != sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__])
- || layout.alignment < ALIGNOF(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__])) {
- htrdr_log_err(htrdr,
- "%s: invalid buffer layout. "
- "The pixel size must be the size of %lu accumulators.\n",
- FUNC_NAME, (unsigned long)HTRDR_ESTIMATES_COUNT__);
- res = RES_BAD_ARG;
- goto error;
- }
- }
-
- /* Compute the overall number of tiles */
- ntiles_x = (width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
- ntiles_y = (height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
- ntiles = ntiles_x * ntiles_y;
-
- /* Compute the pixel size in the normalized image plane */
- pix_sz[0] = 1.0 / (double)width;
- pix_sz[1] = 1.0 / (double)height;
-
- /* Adjust the #tiles for the morton-encoding procedure */
- ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y));
- ntiles_adjusted *= ntiles_adjusted;
-
- /* Define the initial number of tiles of the current process */
- proc_ntiles_adjusted = ntiles_adjusted / (size_t)htrdr->mpi_nprocs;
- if(htrdr->mpi_rank == 0) { /* Affect the remaining tiles to the master proc */
- proc_ntiles_adjusted +=
- ntiles_adjusted - proc_ntiles_adjusted*(size_t)htrdr->mpi_nprocs;
- }
-
- /* Define the initial list of tiles of the process */
- FOR_EACH(itile, 0, proc_ntiles_adjusted) {
- uint32_t mcode;
- uint16_t tile_org[2];
-
- mcode = (uint32_t)itile*(uint32_t)htrdr->mpi_nprocs
- + (uint32_t)htrdr->mpi_rank;
-
- tile_org[0] = morton2D_decode(mcode>>0);
- if(tile_org[0] >= ntiles_x) continue;
- tile_org[1] = morton2D_decode(mcode>>1);
- if(tile_org[1] >= ntiles_y) continue;
- proc_work_add_tile(&work, mcode);
- }
-
- if(htrdr->mpi_rank == 0) {
- fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
- print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
- }
-
- time_current(&t0);
-
- omp_set_nested(1); /* Enable nested threads for draw_image */
- #pragma omp parallel sections num_threads(2)
- {
- #pragma omp section
- mpi_probe_thieves(htrdr, &work, &probe_thieves);
-
- #pragma omp section
- {
- draw_image(htrdr, cam, width, height, spp, ntiles_x, ntiles_y,
- ntiles_adjusted, pix_sz, &work, &tiles);
- /* The processes have no more work to do. Stop probing for thieves */
- ATOMIC_SET(&probe_thieves, 0);
- }
- }
-
- if(htrdr->mpi_rank == 0) {
- update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
- fprintf(stderr, "\n"); /* Add a new line after the progress statuses */
- }
-
- time_sub(&t0, time_current(&t1), &t0);
- time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf));
- htrdr_log(htrdr, "Rendering time: %s\n", strbuf);
-
- /* Gather accum buffers from the group of processes */
- time_current(&t0);
- res = mpi_gather_tiles(htrdr, buf, ntiles, &tiles);
- if(res != RES_OK) goto error;
- time_sub(&t0, time_current(&t1), &t0);
- time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf));
- htrdr_log(htrdr, "Image gathering time: %s\n", strbuf);
-
-exit:
- { /* Free allocated tiles */
- struct list_node* node;
- struct list_node* tmp;
- LIST_FOR_EACH_SAFE(node, tmp, &tiles) {
- struct tile* tile = CONTAINER_OF(node, struct tile, node);
- list_del(node);
- tile_ref_put(tile);
- }
- }
- proc_work_release(&work);
- return (res_T)res;
-error:
- goto exit;
-}
-
diff --git a/src/htrdr_solve.h b/src/htrdr_solve.h
@@ -56,7 +56,7 @@ htrdr_compute_radiance_lw
const size_t iquad); /* Index of the quadrature point into the band */
extern LOCAL_SYM res_T
-htrdr_draw_radiance_sw
+htrdr_draw_radiance
(struct htrdr* htrdr,
const struct htrdr_camera* cam,
const size_t width, /* Image width */
diff --git a/src/htrdr_sun.c b/src/htrdr_sun.c
@@ -45,14 +45,6 @@ struct htrdr_sun {
/*******************************************************************************
* Helper functions
******************************************************************************/
-static INLINE int
-cmp_dbl(const void* a, const void* b)
-{
- const double d0 = *((const double*)a);
- const double d1 = *((const double*)b);
- return d0 < d1 ? -1 : (d0 > d1 ? 1 : 0);
-}
-
static void
release_sun(ref_T* ref)
{