htrdr

Solving radiative transfer in heterogeneous media
git clone git://git.meso-star.fr/htrdr.git
Log | Files | Refs | README | LICENSE

commit 56fa5639dc8b223bf8b25a561ab6174f961bb504
parent b9aba36680492ebc8f20769da4f6f90d3c63619f
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Tue, 17 Mar 2020 10:36:41 +0100

Plug the long wave realisation in the draw function

Diffstat:
Mcmake/CMakeLists.txt | 2+-
Msrc/htrdr.c | 79++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/htrdr.h | 3+++
Msrc/htrdr_c.h | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/htrdr_compute_radiance_lw.c | 60------------------------------------------------------------
Asrc/htrdr_draw_radiance.c | 897+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/htrdr_draw_radiance_sw.c | 851-------------------------------------------------------------------------------
Msrc/htrdr_solve.h | 2+-
Msrc/htrdr_sun.c | 8--------
9 files changed, 1047 insertions(+), 922 deletions(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -93,7 +93,7 @@ set(HTRDR_FILES_SRC htrdr_camera.c htrdr_compute_radiance_sw.c htrdr_compute_radiance_lw.c - htrdr_draw_radiance_sw.c + htrdr_draw_radiance.c htrdr_grid.c htrdr_ground.c htrdr_interface.c diff --git a/src/htrdr.c b/src/htrdr.c @@ -357,6 +357,74 @@ error: goto exit; } +static res_T +setup_lw_cdf(struct htrdr* htrdr) +{ + /* Reference temperature used to compute the Long Wave cumulative */ + const double Tref = 290; /* In Kelvin */ + double* cdf = NULL; + double* pdf = NULL; + double sum = 0; + size_t i; + size_t nbands; + res_T res = RES_OK; + ASSERT(htrdr && htsky_is_long_wave(htrdr->sky)); + + nbands = htsky_get_spectral_bands_count(htrdr->sky); + res = darray_double_resize(&htrdr->lw_cdf, nbands); + if(res != RES_OK) { + htrdr_log_err(htrdr, + "error allocating the CDF of the long wave spectral bands -- %s.\n", + res_to_cstr(res)); + goto error; + } + + /* Alias the same array by the cdf and the pdf variable to make easier the + * reading of the code */ + pdf = darray_double_data_get(&htrdr->lw_cdf); + cdf = darray_double_data_get(&htrdr->lw_cdf); + + /* Compute the *unormalized* probability to sample a long wave band */ + sum = 0; + FOR_EACH(i, 0, nbands) { + const size_t iband = htsky_get_spectral_band_id(htrdr->sky, i); + double wlens[2]; + HTSKY(get_spectral_band_bounds(htrdr->sky, iband, wlens)); + + /* Convert from nanometer to meter */ + wlens[0] = wlens[0] * 1.e9; + wlens[1] = wlens[1] * 1.e9; + + /* Compute the probability of the current band */ + pdf[i] = planck(wlens[0], wlens[1], Tref); + + /* Update the norm */ + sum += pdf[i]; + } + + /* Compute the cumulative of the previously computed probabilities */ + FOR_EACH(i, 0, nbands) { + /* Normalize the probability */ + pdf[i] /= sum; + + /* Setup the cumulative */ + if(i == 0) { + cdf[i] = pdf[i]; + } else { + cdf[i] = pdf[i] + cdf[i-1]; + ASSERT(cdf[i] >= cdf[i-1]); + } + } + + cdf[nbands - 1] = 1.0; + +exit: + return res; +error: + darray_double_clear(&htrdr->lw_cdf); + goto exit; +} + /******************************************************************************* * Local functions ******************************************************************************/ @@ -386,6 +454,8 @@ htrdr_init str_init(htrdr->allocator, &htrdr->output_name); + darray_double_init(htrdr->allocator, &htrdr->lw_cdf); + nthreads_max = MMAX(omp_get_max_threads(), omp_get_num_procs()); htrdr->dump_vtk = args->dump_vtk; htrdr->verbose = args->verbose; @@ -482,6 +552,12 @@ htrdr_init res = htsky_create(&htrdr->logger, htrdr->allocator, &htsky_args, &htrdr->sky); if(res != RES_OK) goto error; + if(htsky_is_long_wave(htrdr->sky)) { + /* Define the CDF used to sample a long wave band */ + res = setup_lw_cdf(htrdr); + if(res != RES_OK) goto error; + } + htrdr->lifo_allocators = MEM_CALLOC (htrdr->allocator, htrdr->nthreads, sizeof(*htrdr->lifo_allocators)); if(!htrdr->lifo_allocators) { @@ -530,6 +606,7 @@ htrdr_release(struct htrdr* htrdr) MEM_RM(htrdr->allocator, htrdr->lifo_allocators); } str_release(&htrdr->output_name); + darray_double_release(&htrdr->lw_cdf); logger_release(&htrdr->logger); } @@ -556,7 +633,7 @@ htrdr_run(struct htrdr* htrdr) } } } else { - res = htrdr_draw_radiance_sw(htrdr, htrdr->cam, htrdr->width, + res = htrdr_draw_radiance(htrdr, htrdr->cam, htrdr->width, htrdr->height, htrdr->spp, htrdr->buf); if(res != RES_OK) goto error; if(htrdr->mpi_rank == 0) { diff --git a/src/htrdr.h b/src/htrdr.h @@ -17,6 +17,7 @@ #ifndef HTRDR_H #define HTRDR_H +#include <rsys/dynamic_array_double.h> #include <rsys/logger.h> #include <rsys/ref_count.h> #include <rsys/str.h> @@ -54,6 +55,8 @@ struct htrdr { struct htsky* sky; + struct darray_double lw_cdf; /* CDF to sample a Long Waves band */ + size_t spp; /* #samples per pixel */ size_t width; /* Image width */ size_t height; /* Image height */ diff --git a/src/htrdr_c.h b/src/htrdr_c.h @@ -103,6 +103,65 @@ morton_xyz_decode_u21(const uint64_t code, uint32_t xyz[3]) xyz[2] = (uint32_t)morton3D_decode_u21(code >> 0); } +static INLINE double +wiebelt(const double v) +{ + int m; + double w, v2, v4; + /*.153989717364e+00;*/ + const double fifteen_over_pi_power_4 = 15.0/(PI*PI*PI*PI); + const double z0 = 1.0/3.0; + const double z1 = 1.0/8.0; + const double z2 = 1.0/60.0; + const double z4 = 1.0/5040.0; + const double z6 = 1.0/272160.0; + const double z8 = 1.0/13305600.0; + + if(v >= 2.) { + w = 0; + for(m=1; m<6 ;m++) + w+=exp(-m*v)/(m*m*m*m) * (((m*v+3)*m*v+6)*m*v+6); + w = w * fifteen_over_pi_power_4; + } else { + v2 = v*v; + v4 = v2*v2; + w = z0 - z1*v + z2*v2 - z4*v2*v2 + z6*v4*v2 - z8*v4*v4; + w = 1. - fifteen_over_pi_power_4*v2*v*w; + } + ASSERT(w >= 0.0 && w <= 1.0); + return w; +} + +static INLINE double +blackbody_fraction + (const double lambda0, /* In meter */ + const double lambda1, /* In meter */ + const double temperature) /* In Kelvin */ +{ + const double C2 = 1.43877735e-2; /* m.K */ + double x0 = C2 / lambda0; + double x1 = C2 / lambda1; + double v0 = x0 / temperature; + double v1 = x1 / temperature; + double w0 = wiebelt(v0); + double w1 = wiebelt(v1); + return w1 - w0; +} + +static INLINE double +planck + (const double lambda_min, /* In meter */ + const double lambda_max, /* In meter */ + const double temperature) /* In Kelvin */ +{ + const double T2 = temperature*temperature; + const double T4 = T2*T2; + const double BOLTZMANN_CONSTANT = 5.6696e-8; /* W/m^2/K^4 */ + ASSERT(lambda_min < lambda_max && temperature >= 0); + return blackbody_fraction(lambda_min, lambda_max, temperature) + * BOLTZMANN_CONSTANT * T4; +} + extern LOCAL_SYM res_T open_output_stream (struct htrdr* htrdr, @@ -146,5 +205,13 @@ update_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_message progress) print_mpi_progress(htrdr, progress); } +static FINLINE int +cmp_dbl(const void* a, const void* b) +{ + const double d0 = *((const double*)a); + const double d1 = *((const double*)b); + return d0 < d1 ? -1 : (d0 > d1 ? 1 : 0); +} + #endif /* HTRDR_C_H */ diff --git a/src/htrdr_compute_radiance_lw.c b/src/htrdr_compute_radiance_lw.c @@ -30,8 +30,6 @@ #include <rsys/double2.h> #include <rsys/double3.h> -#define BOLTZMANN_CONSTANT 5.6696e-8 /* W/m^2/K^4 */ - enum event { EVENT_ABSORPTION, EVENT_SCATTERING, @@ -56,64 +54,6 @@ static const struct filter_context FILTER_CONTEXT_NULL = { /******************************************************************************* * Helper functions ******************************************************************************/ -static FINLINE double -wiebelt(const double v) -{ - int m; - double w, v2, v4; - /*.153989717364e+00;*/ - const double fifteen_over_pi_power_4 = 15.0/(PI*PI*PI*PI); - const double z0 = 1.0/3.0; - const double z1 = 1.0/8.0; - const double z2 = 1.0/60.0; - const double z4 = 1.0/5040.0; - const double z6 = 1.0/272160.0; - const double z8 = 1.0/13305600.0; - - if(v >= 2.) { - w = 0; - for(m=1; m<6 ;m++) - w+=exp(-m*v)/(m*m*m*m) * (((m*v+3)*m*v+6)*m*v+6); - w = w * fifteen_over_pi_power_4; - } else { - v2 = v*v; - v4 = v2*v2; - w = z0 - z1*v + z2*v2 - z4*v2*v2 + z6*v4*v2 - z8*v4*v4; - w = 1. - fifteen_over_pi_power_4*v2*v*w; - } - ASSERT(w >= 0.0 && w <= 1.0); - return w; -} - -static FINLINE double -blackbody_fraction - (const double lambda0, /* In meter */ - const double lambda1, /* In meter */ - const double temperature) /* In Kelvin */ -{ - const double C2 = 1.43877735e-2; /* m.K */ - double x0 = C2 / lambda0; - double x1 = C2 / lambda1; - double v0 = x0 / temperature; - double v1 = x1 / temperature; - double w0 = wiebelt(v0); - double w1 = wiebelt(v1); - return w1 - w0; -} - -static FINLINE double -planck - (const double lambda_min, /* In meter */ - const double lambda_max, /* In meter */ - const double temperature) /* In Kelvin */ -{ - const double T2 = temperature*temperature; - const double T4 = T2*T2; - ASSERT(lambda_min < lambda_max && temperature >= 0); - return blackbody_fraction(lambda_min, lambda_max, temperature) - * BOLTZMANN_CONSTANT * T4; -} - static int hit_filter (const struct svx_hit* hit, diff --git a/src/htrdr_draw_radiance.c b/src/htrdr_draw_radiance.c @@ -0,0 +1,897 @@ +/* Copyright (C) 2018, 2019, 2020 |Meso|Star> (contact@meso-star.com) + * Copyright (C) 2018, 2019 CNRS, Université Paul Sabatier + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#define _POSIX_C_SOURCE 200112L /* nanosleep && nextafter */ + +#include "htrdr.h" +#include "htrdr_c.h" +#include "htrdr_buffer.h" +#include "htrdr_camera.h" +#include "htrdr_solve.h" + +#include <high_tune/htsky.h> + +#include <rsys/algorithm.h> +#include <rsys/clock_time.h> +#include <rsys/cstr.h> +#include <rsys/dynamic_array_u32.h> +#include <rsys/math.h> +#include <rsys/mutex.h> +#include <star/ssp.h> + +#include <omp.h> +#include <mpi.h> +#include <time.h> +#include <unistd.h> + +#define RNG_SEQUENCE_SIZE 10000 + +#define TILE_MCODE_NULL UINT32_MAX +#define TILE_SIZE 32 /* Definition in X & Y of a tile */ +STATIC_ASSERT(IS_POW2(TILE_SIZE), TILE_SIZE_must_be_a_power_of_2); + +/* Tile of row ordered image pixels */ +struct tile { + struct list_node node; + struct mem_allocator* allocator; + ref_T ref; + + struct tile_data { + uint16_t x, y; /* 2D coordinates of the tile in tile space */ + /* Simulate the flexible array member of the C99 standard. */ + struct htrdr_accum accums[1/*dummy element*/]; + } data; +}; + +/* List of tile to compute onto the MPI process. */ +struct proc_work { + struct mutex* mutex; + struct darray_u32 tiles; /* #tiles to render */ + size_t itile; /* Next tile to render in the above list of tiles */ +}; + +/******************************************************************************* + * Helper functions + ******************************************************************************/ +static FINLINE uint16_t +morton2D_decode(const uint32_t u32) +{ + uint32_t x = u32 & 0x55555555; + x = (x | (x >> 1)) & 0x33333333; + x = (x | (x >> 2)) & 0x0F0F0F0F; + x = (x | (x >> 4)) & 0x00FF00FF; + x = (x | (x >> 8)) & 0x0000FFFF; + return (uint16_t)x; +} + +static FINLINE uint32_t +morton2D_encode(const uint16_t u16) +{ + uint32_t u32 = u16; + u32 = (u32 | (u32 << 8)) & 0x00FF00FF; + u32 = (u32 | (u32 << 4)) & 0X0F0F0F0F; + u32 = (u32 | (u32 << 2)) & 0x33333333; + u32 = (u32 | (u32 << 1)) & 0x55555555; + return u32; +} + +static FINLINE struct tile* +tile_create(struct mem_allocator* allocator) +{ + struct tile* tile; + const size_t tile_sz = + sizeof(struct tile) - sizeof(struct htrdr_accum)/*rm dummy accum*/; + const size_t buf_sz = /* Flexiblbe array element */ + TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]); + ASSERT(allocator); + + tile = MEM_ALLOC(allocator, tile_sz+buf_sz); + if(!tile) return NULL; + + ref_init(&tile->ref); + list_init(&tile->node); + tile->allocator = allocator; + ASSERT(IS_ALIGNED(&tile->data.accums, ALIGNOF(struct htrdr_accum))); + + return tile; +} + +static INLINE void +tile_ref_get(struct tile* tile) +{ + ASSERT(tile); + tile_ref_get(tile); +} + +static INLINE void +release_tile(ref_T* ref) +{ + struct tile* tile = CONTAINER_OF(ref, struct tile, ref); + ASSERT(ref); + MEM_RM(tile->allocator, tile); +} + +static INLINE void +tile_ref_put(struct tile* tile) +{ + ASSERT(tile); + ref_put(&tile->ref, release_tile); +} + +static FINLINE struct htrdr_accum* +tile_at + (struct tile* tile, + const size_t x, /* In tile space */ + const size_t y) /* In tile space */ +{ + ASSERT(tile && x < TILE_SIZE && y < TILE_SIZE); + return tile->data.accums + (y*TILE_SIZE + x) * HTRDR_ESTIMATES_COUNT__; +} + +static void +write_tile_data(struct htrdr_buffer* buf, const struct tile_data* tile_data) +{ + struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL; + size_t icol, irow; + size_t irow_tile; + size_t ncols_tile, nrows_tile; + char* buf_mem; + ASSERT(buf && tile_data); + + htrdr_buffer_get_layout(buf, &layout); + buf_mem = htrdr_buffer_get_data(buf); + ASSERT(layout.elmt_size == sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__])); + + /* Compute the row/column of the tile origin into the buffer */ + icol = tile_data->x * (size_t)TILE_SIZE; + irow = tile_data->y * (size_t)TILE_SIZE; + + /* Define the number of tile row/columns to write into the buffer */ + ncols_tile = MMIN(icol + TILE_SIZE, layout.width) - icol; + nrows_tile = MMIN(irow + TILE_SIZE, layout.height) - irow; + + /* Copy the tile data, row by row */ + FOR_EACH(irow_tile, 0, nrows_tile) { + char* buf_row = buf_mem + (irow + irow_tile) * layout.pitch; + const struct htrdr_accum* tile_row = + tile_data->accums + irow_tile*TILE_SIZE*HTRDR_ESTIMATES_COUNT__; + memcpy(buf_row + icol*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]), + tile_row, ncols_tile*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__])); + } +} + +static INLINE void +proc_work_init(struct mem_allocator* allocator, struct proc_work* work) +{ + ASSERT(work); + darray_u32_init(allocator, &work->tiles); + work->itile = 0; + CHK(work->mutex = mutex_create()); +} + +static INLINE void +proc_work_release(struct proc_work* work) +{ + darray_u32_release(&work->tiles); + mutex_destroy(work->mutex); +} + +static INLINE void +proc_work_reset(struct proc_work* work) +{ + ASSERT(work); + mutex_lock(work->mutex); + darray_u32_clear(&work->tiles); + work->itile = 0; + mutex_unlock(work->mutex); +} + +static INLINE void +proc_work_add_tile(struct proc_work* work, const uint32_t mcode) +{ + mutex_lock(work->mutex); + CHK(darray_u32_push_back(&work->tiles, &mcode) == RES_OK); + mutex_unlock(work->mutex); +} + +static INLINE uint32_t +proc_work_get_tile(struct proc_work* work) +{ + uint32_t mcode; + ASSERT(work); + mutex_lock(work->mutex); + if(work->itile >= darray_u32_size_get(&work->tiles)) { + mcode = TILE_MCODE_NULL; + } else { + mcode = darray_u32_cdata_get(&work->tiles)[work->itile]; + ++work->itile; + } + mutex_unlock(work->mutex); + return mcode; +} + +static INLINE size_t +proc_work_get_ntiles(struct proc_work* work) +{ + size_t sz = 0; + ASSERT(work); + mutex_lock(work->mutex); + sz = darray_u32_size_get(&work->tiles); + mutex_unlock(work->mutex); + return sz; +} + +static void +mpi_wait_for_request(struct htrdr* htrdr, MPI_Request* req) +{ + ASSERT(htrdr && req); + + /* Wait for process synchronisation */ + for(;;) { + struct timespec t; + int complete; + t.tv_sec = 0; + t.tv_nsec = 10000000; /* 10ms */ + + mutex_lock(htrdr->mpi_mutex); + MPI(Test(req, &complete, MPI_STATUS_IGNORE)); + mutex_unlock(htrdr->mpi_mutex); + if(complete) break; + + nanosleep(&t, NULL); + } +} + +static void +mpi_probe_thieves + (struct htrdr* htrdr, + struct proc_work* work, + ATOMIC* probe_thieves) +{ + uint32_t tiles[UINT8_MAX]; + struct timespec t; + ASSERT(htrdr && work && probe_thieves); + + if(htrdr->mpi_nprocs == 1) /* The process is alone. No thief is possible */ + return; + + t.tv_sec = 0; + + /* Protect MPI calls of multiple invocations from concurrent threads */ + #define P_MPI(Func) { \ + mutex_lock(htrdr->mpi_mutex); \ + MPI(Func); \ + mutex_unlock(htrdr->mpi_mutex); \ + } (void)0 + + while(ATOMIC_GET(probe_thieves)) { + MPI_Status status; + size_t itile; + int msg; + + /* Probe if a steal request was submitted by any processes */ + P_MPI(Iprobe(MPI_ANY_SOURCE, HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &msg, + &status)); + + if(msg) { /* A steal request was posted */ + MPI_Request req; + uint8_t ntiles_to_steal; + + /* Asynchronously receive the steal request */ + P_MPI(Irecv(&ntiles_to_steal, 1, MPI_UINT8_T, status.MPI_SOURCE, + HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &req)); + + /* Wait for the completion of the steal request */ + mpi_wait_for_request(htrdr, &req); + + /* Thief some tiles */ + FOR_EACH(itile, 0, ntiles_to_steal) { + tiles[itile] = proc_work_get_tile(work); + } + P_MPI(Send(&tiles, ntiles_to_steal, MPI_UINT32_T, status.MPI_SOURCE, + HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD)); + } + t.tv_nsec = 500000000; /* 500ms */ + nanosleep(&t, NULL); + } + #undef P_MPI +} + +static int +mpi_sample_working_process(struct htrdr* htrdr, struct ssp_rng* rng) +{ + int iproc, i; + int dst_rank; + ASSERT(htrdr && rng && htrdr->mpi_nworking_procs); + + /* Sample the index of the 1st active process */ + iproc = (int)(ssp_rng_canonical(rng) * (double)htrdr->mpi_nworking_procs); + + /* Find the rank of the sampled active process. Use a simple linear search + * since the overall number of processes should be quite low; at most few + * dozens. */ + i = 0; + FOR_EACH(dst_rank, 0, htrdr->mpi_nprocs) { + if(htrdr->mpi_working_procs[dst_rank] == 0) continue; /* Inactive process */ + if(i == iproc) break; /* The rank of the sampled process is found */ + ++i; + } + ASSERT(dst_rank < htrdr->mpi_nprocs); + return dst_rank; +} + +/* Return the number of stolen tiles */ +static size_t +mpi_steal_work + (struct htrdr* htrdr, + struct ssp_rng* rng, + struct proc_work* work) +{ + MPI_Request req; + size_t itile; + size_t nthieves = 0; + uint32_t tiles[UINT8_MAX]; /* Morton code of the stolen tile */ + int proc_to_steal; /* Process to steal */ + uint8_t ntiles_to_steal = MMIN((uint8_t)(htrdr->nthreads*2), 16); + ASSERT(htrdr && rng && work && htrdr->nthreads < UINT8_MAX); + + /* Protect MPI calls of multiple invocations from concurrent threads */ + #define P_MPI(Func) { \ + mutex_lock(htrdr->mpi_mutex); \ + MPI(Func); \ + mutex_unlock(htrdr->mpi_mutex); \ + } (void)0 + + /* No more working process => nohting to steal */ + if(!htrdr->mpi_nworking_procs) return 0; + + /* Sample a process to steal */ + proc_to_steal = mpi_sample_working_process(htrdr, rng); + + /* Send a steal request to the sampled process and wait for a response */ + P_MPI(Send(&ntiles_to_steal, 1, MPI_UINT8_T, proc_to_steal, + HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD)); + + /* Receive the stolen tile from the sampled process */ + P_MPI(Irecv(tiles, ntiles_to_steal, MPI_UINT32_T, proc_to_steal, + HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD, &req)); + + mpi_wait_for_request(htrdr, &req); + + FOR_EACH(itile, 0, ntiles_to_steal) { + if(tiles[itile] == TILE_MCODE_NULL) { + ASSERT(htrdr->mpi_working_procs[proc_to_steal] != 0); + htrdr->mpi_working_procs[proc_to_steal] = 0; + htrdr->mpi_nworking_procs--; + break; + } + proc_work_add_tile(work, tiles[itile]); + ++nthieves; + } + #undef P_MPI + return nthieves; +} + +static res_T +mpi_gather_tiles + (struct htrdr* htrdr, + struct htrdr_buffer* buf, + const size_t ntiles, + struct list_node* tiles) +{ + /* Compute the size of the tile_data */ + const size_t msg_sz = + sizeof(struct tile_data) - sizeof(struct htrdr_accum)/*dummy*/ + + TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]); + + struct list_node* node = NULL; + struct tile* tile = NULL; + res_T res = RES_OK; + ASSERT(htrdr && tiles); + ASSERT(htrdr->mpi_rank != 0 || buf); + (void)ntiles; + + if(htrdr->mpi_rank != 0) { /* Non master process */ + /* Send the computed tile to the master process */ + LIST_FOR_EACH(node, tiles) { + struct tile* t = CONTAINER_OF(node, struct tile, node); + MPI(Send(&t->data, (int)msg_sz, MPI_CHAR, 0, + HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD)); + } + } else { /* Master process */ + size_t itile = 0; + + LIST_FOR_EACH(node, tiles) { + struct tile* t = CONTAINER_OF(node, struct tile, node); + write_tile_data(buf, &t->data); + ++itile; + } + + if(itile != ntiles) { + ASSERT(htrdr->mpi_nprocs > 1); + + /* Create a temporary tile to receive the tile data computed by the + * concurrent MPI processes */ + tile = tile_create(htrdr->allocator); + if(!tile) { + res = RES_MEM_ERR; + htrdr_log_err(htrdr, + "could not allocate the temporary tile used to gather the process " + "output data -- %s.\n", res_to_cstr(res)); + goto error; + } + + /* Receive the tile data of the concurret MPI processes */ + FOR_EACH(itile, itile, ntiles) { + MPI(Recv(&tile->data, (int)msg_sz, MPI_CHAR, MPI_ANY_SOURCE, + HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE)); + write_tile_data(buf, &tile->data); + } + } + } + +exit: + if(tile) tile_ref_put(tile); + return res; +error: + goto exit; +} + +static INLINE size_t +sample_lw_spectral_interval(struct htrdr* htrdr, const double r) +{ + const double* cdf = NULL; + const double* find = NULL; + double r_next = nextafter(r, DBL_MAX); + size_t cdf_length = 0; + size_t i; + ASSERT(htrdr && iband); + ASSERT(r >= 0 && r < 1); + + cdf = darray_double_cdata_get(&htrdr->lw_cdf); + cdf_length = darray_double_size_get(&htrdr->lw_cdf); + + /* Use r_next rather than r in order to find the first entry that is not less + * than *or equal* to r */ + find = search_lower_bound(&r_next, cdf, cdf_length, sizeof(double), cmp_dbl); + ASSERT(find); + + i = (size_t)(find - cdf); + ASSERT(i < cdf_length && cdf[i] > r && (!i || cdf[i-1] <= r)); + return i; +} + +static res_T +draw_tile + (struct htrdr* htrdr, + const size_t ithread, + const int64_t tile_mcode, /* For debug only */ + const size_t tile_org[2], /* Origin of the tile in pixel space */ + const size_t tile_sz[2], /* Definition of the tile */ + const double pix_sz[2], /* Size of a pixel in the normalized image plane */ + const struct htrdr_camera* cam, + const size_t spp, /* #samples per pixel */ + struct ssp_rng* rng, + struct tile* tile) +{ + size_t nchannels; + size_t npixels; + size_t mcode; /* Morton code of tile pixel */ + ASSERT(htrdr && tile_org && tile_sz && pix_sz && cam && spp && tile); + (void)tile_mcode; + /* Adjust the #pixels to process them wrt a morton order */ + npixels = round_up_pow2(MMAX(tile_sz[0], tile_sz[1])); + npixels *= npixels; + + /* Define how many channels to handle */ + nchannels = htsky_is_long_wave(htrdr->sky) ? 1 : 3; + + FOR_EACH(mcode, 0, npixels) { + struct htrdr_accum* pix_accums; + size_t ipix_tile[2]; /* Pixel coord in the tile */ + size_t ipix[2]; /* Pixel coord in the buffer */ + size_t ichannel; + + ipix_tile[0] = morton2D_decode((uint32_t)(mcode>>0)); + if(ipix_tile[0] >= tile_sz[0]) continue; /* Pixel is out of tile */ + ipix_tile[1] = morton2D_decode((uint32_t)(mcode>>1)); + if(ipix_tile[1] >= tile_sz[1]) continue; /* Pixel is out of tile */ + + /* Fetch and reset the pixel accumulator */ + pix_accums = tile_at(tile, ipix_tile[0], ipix_tile[1]); + + /* Reset the pixel accumulators */ + pix_accums[HTRDR_ESTIMATE_X] = HTRDR_ACCUM_NULL; + pix_accums[HTRDR_ESTIMATE_Y] = HTRDR_ACCUM_NULL; + pix_accums[HTRDR_ESTIMATE_Z] = HTRDR_ACCUM_NULL; + pix_accums[HTRDR_ESTIMATE_TIME] = HTRDR_ACCUM_NULL; + + /* Compute the pixel coordinate */ + ipix[0] = tile_org[0] + ipix_tile[0]; + ipix[1] = tile_org[1] + ipix_tile[1]; + + FOR_EACH(ichannel, 0, nchannels) { + /* Check that the X, Y and Z estimates are stored in accumulators 0, 1 et + * 2, respectively */ + STATIC_ASSERT + ( HTRDR_ESTIMATE_X == 0 + && HTRDR_ESTIMATE_Y == 1 + && HTRDR_ESTIMATE_Z == 2, + Unexpected_htrdr_estimate_enumerate); + size_t isamp; + + FOR_EACH(isamp, 0, spp) { + struct time t0, t1; + double pix_samp[2]; + double ray_org[3]; + double ray_dir[3]; + double weight; + double r0, r1; + size_t iband; + size_t iquad; + double usec; + + /* Begin the registration of the time spent to in the realisation */ + time_current(&t0); + + /* Sample a position into the pixel, in the normalized image plane */ + pix_samp[0] = ((double)ipix[0] + ssp_rng_canonical(rng)) * pix_sz[0]; + pix_samp[1] = ((double)ipix[1] + ssp_rng_canonical(rng)) * pix_sz[1]; + + /* Generate a ray starting from the pinhole camera and passing through the + * pixel sample */ + htrdr_camera_ray(cam, pix_samp, ray_org, ray_dir); + + r0 = ssp_rng_canonical(rng); + r1 = ssp_rng_canonical(rng); + + if(htsky_is_long_wave(htrdr->sky)) { + /* Sample a spectral band and a quadrature point */ + iband = sample_lw_spectral_interval(htrdr, r0); + iquad = htsky_spectral_band_sample_quadrature(htrdr->sky, r1, iband); + /* Compute the luminance */ + weight = htrdr_compute_radiance_lw + (htrdr, ithread, rng, ray_org, ray_dir, iband, iquad); + } else { + /* Sample a spectral band and a quadrature point */ + switch(ichannel) { + case 0: + htsky_sample_sw_spectral_data_CIE_1931_X + (htrdr->sky, r0, r1, &iband, &iquad); + break; + case 1: + htsky_sample_sw_spectral_data_CIE_1931_Y + (htrdr->sky, r0, r1, &iband, &iquad); + break; + case 2: + htsky_sample_sw_spectral_data_CIE_1931_Z + (htrdr->sky, r0, r1, &iband, &iquad); + break; + default: FATAL("Unreachable code.\n"); break; + } + /* Compute the luminance */ + weight = htrdr_compute_radiance_sw + (htrdr, ithread, rng, ray_org, ray_dir, iband, iquad); + } + ASSERT(weight >= 0); + + /* End the registration of the per realisation time */ + time_sub(&t0, time_current(&t1), &t0); + usec = (double)time_val(&t0, TIME_NSEC) * 0.001; + + /* Update the pixel accumulator of the current channel */ + pix_accums[ichannel].sum_weights += weight; + pix_accums[ichannel].sum_weights_sqr += weight*weight; + pix_accums[ichannel].nweights += 1; + + /* Update the pixel accumulator of per realisation time */ + pix_accums[HTRDR_ESTIMATE_TIME].sum_weights += usec; + pix_accums[HTRDR_ESTIMATE_TIME].sum_weights_sqr += usec*usec; + pix_accums[HTRDR_ESTIMATE_TIME].nweights += 1; + } + } + } + return RES_OK; +} + +static res_T +draw_image + (struct htrdr* htrdr, + const struct htrdr_camera* cam, + const size_t width, /* Image width */ + const size_t height, /* Image height */ + const size_t spp, + const size_t ntiles_x, + const size_t ntiles_y, + const size_t ntiles_adjusted, + const double pix_sz[2], /* Pixel size in the normalized image plane */ + struct proc_work* work, + struct list_node* tiles) +{ + struct ssp_rng* rng_proc = NULL; + size_t nthreads = 0; + size_t nthieves = 0; + size_t proc_ntiles = 0; + ATOMIC nsolved_tiles = 0; + ATOMIC res = RES_OK; + ASSERT(htrdr && cam && spp && ntiles_adjusted && work && tiles); + ASSERT(pix_sz && pix_sz[0] > 0 && pix_sz[1] > 0); + ASSERT(width && height); + (void)ntiles_x, (void)ntiles_y; + + res = ssp_rng_create(htrdr->allocator, &ssp_rng_mt19937_64, &rng_proc); + if(res != RES_OK) { + htrdr_log_err(htrdr, "could not create the RNG used to sample a process " + "to steal -- %s.\n", res_to_cstr((res_T)res)); + goto error; + } + + proc_ntiles = proc_work_get_ntiles(work); + nthreads = MMIN(htrdr->nthreads, proc_ntiles); + + /* The process is not considered as a working process for himself */ + htrdr->mpi_working_procs[htrdr->mpi_rank] = 0; + --htrdr->mpi_nworking_procs; + + omp_set_num_threads((int)nthreads); + #pragma omp parallel + for(;;) { + const int ithread = omp_get_thread_num(); + struct ssp_rng_proxy* rng_proxy = NULL; + struct ssp_rng* rng; + struct tile* tile; + uint32_t mcode = TILE_MCODE_NULL; + size_t tile_org[2]; + size_t tile_sz[2]; + size_t n; + res_T res_local = RES_OK; + int32_t pcent; + + /* Get a tile to draw */ + #pragma omp critical + { + mcode = proc_work_get_tile(work); + if(mcode == TILE_MCODE_NULL) { /* No more work on this process */ + /* Try to steal works to concurrent processes */ + proc_work_reset(work); + nthieves = mpi_steal_work(htrdr, rng_proc, work); + if(nthieves != 0) { + mcode = proc_work_get_tile(work); + } + } + } + if(mcode == TILE_MCODE_NULL) break; /* No more work */ + + /* Decode the morton code to retrieve the tile index */ + tile_org[0] = morton2D_decode((uint32_t)(mcode>>0)); + tile_org[1] = morton2D_decode((uint32_t)(mcode>>1)); + ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y); + + /* Create the tile */ + tile = tile_create(htrdr->allocator); + if(!tile) { + ATOMIC_SET(&res, RES_MEM_ERR); + htrdr_log_err(htrdr, + "could not allocate the memory space of the tile (%lu, %lu) -- %s.\n", + (unsigned long)tile_org[0], (unsigned long)tile_org[1], + res_to_cstr((res_T)ATOMIC_GET(&res))); + break; + } + + /* Register the tile */ + #pragma omp critical + list_add_tail(tiles, &tile->node); + + tile->data.x = (uint16_t)tile_org[0]; + tile->data.y = (uint16_t)tile_org[1]; + + /* Define the tile origin in pixel space */ + tile_org[0] *= TILE_SIZE; + tile_org[1] *= TILE_SIZE; + + /* Compute the size of the tile clamped by the borders of the buffer */ + tile_sz[0] = MMIN(TILE_SIZE, width - tile_org[0]); + tile_sz[1] = MMIN(TILE_SIZE, height - tile_org[1]); + + /* Create a proxy RNG for the current tile. This proxy is used for the + * current thread only and thus it has to manage only one RNG. This proxy + * is initialised in order to ensure that an unique and predictable set of + * random numbers is used for the current tile. */ + SSP(rng_proxy_create2 + (&htrdr->lifo_allocators[ithread], + &ssp_rng_threefry, + RNG_SEQUENCE_SIZE * (size_t)mcode, /* Offset */ + RNG_SEQUENCE_SIZE, /* Size */ + RNG_SEQUENCE_SIZE * (size_t)ntiles_adjusted, /* Pitch */ + 1, &rng_proxy)); + SSP(rng_proxy_create_rng(rng_proxy, 0, &rng)); + + /* Launch the tile rendering */ + res_local = draw_tile(htrdr, (size_t)ithread, mcode, tile_org, tile_sz, + pix_sz, cam, spp, rng, tile); + + SSP(rng_proxy_ref_put(rng_proxy)); + SSP(rng_ref_put(rng)); + + if(res_local != RES_OK) { + ATOMIC_SET(&res, res_local); + break; + } + + /* Update the progress status */ + n = (size_t)ATOMIC_INCR(&nsolved_tiles); + pcent = (int32_t)((double)n * 100.0 / (double)proc_ntiles + 0.5/*round*/); + + #pragma omp critical + if(pcent > htrdr->mpi_progress_render[0]) { + htrdr->mpi_progress_render[0] = pcent; + if(htrdr->mpi_rank == 0) { + update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); + } else { /* Send the progress percentage to the master process */ + send_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING, pcent); + } + } + } + + if(ATOMIC_GET(&res) != RES_OK) goto error; + + /* Synchronize the process */ + mutex_lock(htrdr->mpi_mutex); + MPI(Barrier(MPI_COMM_WORLD)); + mutex_unlock(htrdr->mpi_mutex); + +exit: + if(rng_proc) SSP(rng_ref_put(rng_proc)); + return (res_T)res; +error: + goto exit; +} + +/******************************************************************************* + * Local functions + ******************************************************************************/ +res_T +htrdr_draw_radiance + (struct htrdr* htrdr, + const struct htrdr_camera* cam, + const size_t width, + const size_t height, + const size_t spp, + struct htrdr_buffer* buf) +{ + char strbuf[128]; + struct time t0, t1; + struct list_node tiles; + size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted; + size_t itile; + struct proc_work work; + struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL; + size_t proc_ntiles_adjusted; + double pix_sz[2]; + ATOMIC probe_thieves = 1; + ATOMIC res = RES_OK; + ASSERT(htrdr && cam && width && height); + ASSERT(htrdr->mpi_rank != 0 || buf); + + list_init(&tiles); + proc_work_init(htrdr->allocator, &work); + + if(htrdr->mpi_rank == 0) { + htrdr_buffer_get_layout(buf, &layout); + ASSERT(layout.width || layout.height || layout.elmt_size); + ASSERT(layout.width == width && layout.height == height); + + if(layout.elmt_size != sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]) + || layout.alignment < ALIGNOF(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__])) { + htrdr_log_err(htrdr, + "%s: invalid buffer layout. " + "The pixel size must be the size of %lu accumulators.\n", + FUNC_NAME, (unsigned long)HTRDR_ESTIMATES_COUNT__); + res = RES_BAD_ARG; + goto error; + } + } + + /* Compute the overall number of tiles */ + ntiles_x = (width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; + ntiles_y = (height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; + ntiles = ntiles_x * ntiles_y; + + /* Compute the pixel size in the normalized image plane */ + pix_sz[0] = 1.0 / (double)width; + pix_sz[1] = 1.0 / (double)height; + + /* Adjust the #tiles for the morton-encoding procedure */ + ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y)); + ntiles_adjusted *= ntiles_adjusted; + + /* Define the initial number of tiles of the current process */ + proc_ntiles_adjusted = ntiles_adjusted / (size_t)htrdr->mpi_nprocs; + if(htrdr->mpi_rank == 0) { /* Affect the remaining tiles to the master proc */ + proc_ntiles_adjusted += + ntiles_adjusted - proc_ntiles_adjusted*(size_t)htrdr->mpi_nprocs; + } + + /* Define the initial list of tiles of the process */ + FOR_EACH(itile, 0, proc_ntiles_adjusted) { + uint32_t mcode; + uint16_t tile_org[2]; + + mcode = (uint32_t)itile*(uint32_t)htrdr->mpi_nprocs + + (uint32_t)htrdr->mpi_rank; + + tile_org[0] = morton2D_decode(mcode>>0); + if(tile_org[0] >= ntiles_x) continue; + tile_org[1] = morton2D_decode(mcode>>1); + if(tile_org[1] >= ntiles_y) continue; + proc_work_add_tile(&work, mcode); + } + + if(htrdr->mpi_rank == 0) { + fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); + print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); + } + + time_current(&t0); + + omp_set_nested(1); /* Enable nested threads for draw_image */ + #pragma omp parallel sections num_threads(2) + { + #pragma omp section + mpi_probe_thieves(htrdr, &work, &probe_thieves); + + #pragma omp section + { + draw_image(htrdr, cam, width, height, spp, ntiles_x, ntiles_y, + ntiles_adjusted, pix_sz, &work, &tiles); + /* The processes have no more work to do. Stop probing for thieves */ + ATOMIC_SET(&probe_thieves, 0); + } + } + + if(htrdr->mpi_rank == 0) { + update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); + fprintf(stderr, "\n"); /* Add a new line after the progress statuses */ + } + + time_sub(&t0, time_current(&t1), &t0); + time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf)); + htrdr_log(htrdr, "Rendering time: %s\n", strbuf); + + /* Gather accum buffers from the group of processes */ + time_current(&t0); + res = mpi_gather_tiles(htrdr, buf, ntiles, &tiles); + if(res != RES_OK) goto error; + time_sub(&t0, time_current(&t1), &t0); + time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf)); + htrdr_log(htrdr, "Image gathering time: %s\n", strbuf); + +exit: + { /* Free allocated tiles */ + struct list_node* node; + struct list_node* tmp; + LIST_FOR_EACH_SAFE(node, tmp, &tiles) { + struct tile* tile = CONTAINER_OF(node, struct tile, node); + list_del(node); + tile_ref_put(tile); + } + } + proc_work_release(&work); + return (res_T)res; +error: + goto exit; +} + diff --git a/src/htrdr_draw_radiance_sw.c b/src/htrdr_draw_radiance_sw.c @@ -1,851 +0,0 @@ -/* Copyright (C) 2018, 2019, 2020 |Meso|Star> (contact@meso-star.com) - * Copyright (C) 2018, 2019 CNRS, Université Paul Sabatier - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -#define _POSIX_C_SOURCE 199309L /* nanosleep */ - -#include "htrdr.h" -#include "htrdr_c.h" -#include "htrdr_buffer.h" -#include "htrdr_camera.h" -#include "htrdr_solve.h" - -#include <high_tune/htsky.h> - -#include <rsys/clock_time.h> -#include <rsys/cstr.h> -#include <rsys/dynamic_array_u32.h> -#include <rsys/math.h> -#include <rsys/mutex.h> -#include <star/ssp.h> - -#include <omp.h> -#include <mpi.h> -#include <time.h> -#include <unistd.h> - -#define RNG_SEQUENCE_SIZE 10000 - -#define TILE_MCODE_NULL UINT32_MAX -#define TILE_SIZE 32 /* Definition in X & Y of a tile */ -STATIC_ASSERT(IS_POW2(TILE_SIZE), TILE_SIZE_must_be_a_power_of_2); - -/* Tile of row ordered image pixels */ -struct tile { - struct list_node node; - struct mem_allocator* allocator; - ref_T ref; - - struct tile_data { - uint16_t x, y; /* 2D coordinates of the tile in tile space */ - /* Simulate the flexible array member of the C99 standard. */ - struct htrdr_accum accums[1/*dummy element*/]; - } data; -}; - -/* List of tile to compute onto the MPI process. */ -struct proc_work { - struct mutex* mutex; - struct darray_u32 tiles; /* #tiles to render */ - size_t itile; /* Next tile to render in the above list of tiles */ -}; - -/******************************************************************************* - * Helper functions - ******************************************************************************/ -static FINLINE uint16_t -morton2D_decode(const uint32_t u32) -{ - uint32_t x = u32 & 0x55555555; - x = (x | (x >> 1)) & 0x33333333; - x = (x | (x >> 2)) & 0x0F0F0F0F; - x = (x | (x >> 4)) & 0x00FF00FF; - x = (x | (x >> 8)) & 0x0000FFFF; - return (uint16_t)x; -} - -static FINLINE uint32_t -morton2D_encode(const uint16_t u16) -{ - uint32_t u32 = u16; - u32 = (u32 | (u32 << 8)) & 0x00FF00FF; - u32 = (u32 | (u32 << 4)) & 0X0F0F0F0F; - u32 = (u32 | (u32 << 2)) & 0x33333333; - u32 = (u32 | (u32 << 1)) & 0x55555555; - return u32; -} - -static FINLINE struct tile* -tile_create(struct mem_allocator* allocator) -{ - struct tile* tile; - const size_t tile_sz = - sizeof(struct tile) - sizeof(struct htrdr_accum)/*rm dummy accum*/; - const size_t buf_sz = /* Flexiblbe array element */ - TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]); - ASSERT(allocator); - - tile = MEM_ALLOC(allocator, tile_sz+buf_sz); - if(!tile) return NULL; - - ref_init(&tile->ref); - list_init(&tile->node); - tile->allocator = allocator; - ASSERT(IS_ALIGNED(&tile->data.accums, ALIGNOF(struct htrdr_accum))); - - return tile; -} - -static INLINE void -tile_ref_get(struct tile* tile) -{ - ASSERT(tile); - tile_ref_get(tile); -} - -static INLINE void -release_tile(ref_T* ref) -{ - struct tile* tile = CONTAINER_OF(ref, struct tile, ref); - ASSERT(ref); - MEM_RM(tile->allocator, tile); -} - -static INLINE void -tile_ref_put(struct tile* tile) -{ - ASSERT(tile); - ref_put(&tile->ref, release_tile); -} - -static FINLINE struct htrdr_accum* -tile_at - (struct tile* tile, - const size_t x, /* In tile space */ - const size_t y) /* In tile space */ -{ - ASSERT(tile && x < TILE_SIZE && y < TILE_SIZE); - return tile->data.accums + (y*TILE_SIZE + x) * HTRDR_ESTIMATES_COUNT__; -} - -static void -write_tile_data(struct htrdr_buffer* buf, const struct tile_data* tile_data) -{ - struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL; - size_t icol, irow; - size_t irow_tile; - size_t ncols_tile, nrows_tile; - char* buf_mem; - ASSERT(buf && tile_data); - - htrdr_buffer_get_layout(buf, &layout); - buf_mem = htrdr_buffer_get_data(buf); - ASSERT(layout.elmt_size == sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__])); - - /* Compute the row/column of the tile origin into the buffer */ - icol = tile_data->x * (size_t)TILE_SIZE; - irow = tile_data->y * (size_t)TILE_SIZE; - - /* Define the number of tile row/columns to write into the buffer */ - ncols_tile = MMIN(icol + TILE_SIZE, layout.width) - icol; - nrows_tile = MMIN(irow + TILE_SIZE, layout.height) - irow; - - /* Copy the tile data, row by row */ - FOR_EACH(irow_tile, 0, nrows_tile) { - char* buf_row = buf_mem + (irow + irow_tile) * layout.pitch; - const struct htrdr_accum* tile_row = - tile_data->accums + irow_tile*TILE_SIZE*HTRDR_ESTIMATES_COUNT__; - memcpy(buf_row + icol*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]), - tile_row, ncols_tile*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__])); - } -} - -static INLINE void -proc_work_init(struct mem_allocator* allocator, struct proc_work* work) -{ - ASSERT(work); - darray_u32_init(allocator, &work->tiles); - work->itile = 0; - CHK(work->mutex = mutex_create()); -} - -static INLINE void -proc_work_release(struct proc_work* work) -{ - darray_u32_release(&work->tiles); - mutex_destroy(work->mutex); -} - -static INLINE void -proc_work_reset(struct proc_work* work) -{ - ASSERT(work); - mutex_lock(work->mutex); - darray_u32_clear(&work->tiles); - work->itile = 0; - mutex_unlock(work->mutex); -} - -static INLINE void -proc_work_add_tile(struct proc_work* work, const uint32_t mcode) -{ - mutex_lock(work->mutex); - CHK(darray_u32_push_back(&work->tiles, &mcode) == RES_OK); - mutex_unlock(work->mutex); -} - -static INLINE uint32_t -proc_work_get_tile(struct proc_work* work) -{ - uint32_t mcode; - ASSERT(work); - mutex_lock(work->mutex); - if(work->itile >= darray_u32_size_get(&work->tiles)) { - mcode = TILE_MCODE_NULL; - } else { - mcode = darray_u32_cdata_get(&work->tiles)[work->itile]; - ++work->itile; - } - mutex_unlock(work->mutex); - return mcode; -} - -static INLINE size_t -proc_work_get_ntiles(struct proc_work* work) -{ - size_t sz = 0; - ASSERT(work); - mutex_lock(work->mutex); - sz = darray_u32_size_get(&work->tiles); - mutex_unlock(work->mutex); - return sz; -} - -static void -mpi_wait_for_request(struct htrdr* htrdr, MPI_Request* req) -{ - ASSERT(htrdr && req); - - /* Wait for process synchronisation */ - for(;;) { - struct timespec t; - int complete; - t.tv_sec = 0; - t.tv_nsec = 10000000; /* 10ms */ - - mutex_lock(htrdr->mpi_mutex); - MPI(Test(req, &complete, MPI_STATUS_IGNORE)); - mutex_unlock(htrdr->mpi_mutex); - if(complete) break; - - nanosleep(&t, NULL); - } -} - -static void -mpi_probe_thieves - (struct htrdr* htrdr, - struct proc_work* work, - ATOMIC* probe_thieves) -{ - uint32_t tiles[UINT8_MAX]; - struct timespec t; - ASSERT(htrdr && work && probe_thieves); - - if(htrdr->mpi_nprocs == 1) /* The process is alone. No thief is possible */ - return; - - t.tv_sec = 0; - - /* Protect MPI calls of multiple invocations from concurrent threads */ - #define P_MPI(Func) { \ - mutex_lock(htrdr->mpi_mutex); \ - MPI(Func); \ - mutex_unlock(htrdr->mpi_mutex); \ - } (void)0 - - while(ATOMIC_GET(probe_thieves)) { - MPI_Status status; - size_t itile; - int msg; - - /* Probe if a steal request was submitted by any processes */ - P_MPI(Iprobe(MPI_ANY_SOURCE, HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &msg, - &status)); - - if(msg) { /* A steal request was posted */ - MPI_Request req; - uint8_t ntiles_to_steal; - - /* Asynchronously receive the steal request */ - P_MPI(Irecv(&ntiles_to_steal, 1, MPI_UINT8_T, status.MPI_SOURCE, - HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &req)); - - /* Wait for the completion of the steal request */ - mpi_wait_for_request(htrdr, &req); - - /* Thief some tiles */ - FOR_EACH(itile, 0, ntiles_to_steal) { - tiles[itile] = proc_work_get_tile(work); - } - P_MPI(Send(&tiles, ntiles_to_steal, MPI_UINT32_T, status.MPI_SOURCE, - HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD)); - } - t.tv_nsec = 500000000; /* 500ms */ - nanosleep(&t, NULL); - } - #undef P_MPI -} - -static int -mpi_sample_working_process(struct htrdr* htrdr, struct ssp_rng* rng) -{ - int iproc, i; - int dst_rank; - ASSERT(htrdr && rng && htrdr->mpi_nworking_procs); - - /* Sample the index of the 1st active process */ - iproc = (int)(ssp_rng_canonical(rng) * (double)htrdr->mpi_nworking_procs); - - /* Find the rank of the sampled active process. Use a simple linear search - * since the overall number of processes should be quite low; at most few - * dozens. */ - i = 0; - FOR_EACH(dst_rank, 0, htrdr->mpi_nprocs) { - if(htrdr->mpi_working_procs[dst_rank] == 0) continue; /* Inactive process */ - if(i == iproc) break; /* The rank of the sampled process is found */ - ++i; - } - ASSERT(dst_rank < htrdr->mpi_nprocs); - return dst_rank; -} - -/* Return the number of stolen tiles */ -static size_t -mpi_steal_work - (struct htrdr* htrdr, - struct ssp_rng* rng, - struct proc_work* work) -{ - MPI_Request req; - size_t itile; - size_t nthieves = 0; - uint32_t tiles[UINT8_MAX]; /* Morton code of the stolen tile */ - int proc_to_steal; /* Process to steal */ - uint8_t ntiles_to_steal = MMIN((uint8_t)(htrdr->nthreads*2), 16); - ASSERT(htrdr && rng && work && htrdr->nthreads < UINT8_MAX); - - /* Protect MPI calls of multiple invocations from concurrent threads */ - #define P_MPI(Func) { \ - mutex_lock(htrdr->mpi_mutex); \ - MPI(Func); \ - mutex_unlock(htrdr->mpi_mutex); \ - } (void)0 - - /* No more working process => nohting to steal */ - if(!htrdr->mpi_nworking_procs) return 0; - - /* Sample a process to steal */ - proc_to_steal = mpi_sample_working_process(htrdr, rng); - - /* Send a steal request to the sampled process and wait for a response */ - P_MPI(Send(&ntiles_to_steal, 1, MPI_UINT8_T, proc_to_steal, - HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD)); - - /* Receive the stolen tile from the sampled process */ - P_MPI(Irecv(tiles, ntiles_to_steal, MPI_UINT32_T, proc_to_steal, - HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD, &req)); - - mpi_wait_for_request(htrdr, &req); - - FOR_EACH(itile, 0, ntiles_to_steal) { - if(tiles[itile] == TILE_MCODE_NULL) { - ASSERT(htrdr->mpi_working_procs[proc_to_steal] != 0); - htrdr->mpi_working_procs[proc_to_steal] = 0; - htrdr->mpi_nworking_procs--; - break; - } - proc_work_add_tile(work, tiles[itile]); - ++nthieves; - } - #undef P_MPI - return nthieves; -} - -static res_T -mpi_gather_tiles - (struct htrdr* htrdr, - struct htrdr_buffer* buf, - const size_t ntiles, - struct list_node* tiles) -{ - /* Compute the size of the tile_data */ - const size_t msg_sz = - sizeof(struct tile_data) - sizeof(struct htrdr_accum)/*dummy*/ - + TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]); - - struct list_node* node = NULL; - struct tile* tile = NULL; - res_T res = RES_OK; - ASSERT(htrdr && tiles); - ASSERT(htrdr->mpi_rank != 0 || buf); - (void)ntiles; - - if(htrdr->mpi_rank != 0) { /* Non master process */ - /* Send the computed tile to the master process */ - LIST_FOR_EACH(node, tiles) { - struct tile* t = CONTAINER_OF(node, struct tile, node); - MPI(Send(&t->data, (int)msg_sz, MPI_CHAR, 0, - HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD)); - } - } else { /* Master process */ - size_t itile = 0; - - LIST_FOR_EACH(node, tiles) { - struct tile* t = CONTAINER_OF(node, struct tile, node); - write_tile_data(buf, &t->data); - ++itile; - } - - if(itile != ntiles) { - ASSERT(htrdr->mpi_nprocs > 1); - - /* Create a temporary tile to receive the tile data computed by the - * concurrent MPI processes */ - tile = tile_create(htrdr->allocator); - if(!tile) { - res = RES_MEM_ERR; - htrdr_log_err(htrdr, - "could not allocate the temporary tile used to gather the process " - "output data -- %s.\n", res_to_cstr(res)); - goto error; - } - - /* Receive the tile data of the concurret MPI processes */ - FOR_EACH(itile, itile, ntiles) { - MPI(Recv(&tile->data, (int)msg_sz, MPI_CHAR, MPI_ANY_SOURCE, - HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE)); - write_tile_data(buf, &tile->data); - } - } - } - -exit: - if(tile) tile_ref_put(tile); - return res; -error: - goto exit; -} - -static res_T -draw_tile - (struct htrdr* htrdr, - const size_t ithread, - const int64_t tile_mcode, /* For debug only */ - const size_t tile_org[2], /* Origin of the tile in pixel space */ - const size_t tile_sz[2], /* Definition of the tile */ - const double pix_sz[2], /* Size of a pixel in the normalized image plane */ - const struct htrdr_camera* cam, - const size_t spp, /* #samples per pixel */ - struct ssp_rng* rng, - struct tile* tile) -{ - size_t npixels; - size_t mcode; /* Morton code of tile pixel */ - ASSERT(htrdr && tile_org && tile_sz && pix_sz && cam && spp && tile); - (void)tile_mcode; - /* Adjust the #pixels to process them wrt a morton order */ - npixels = round_up_pow2(MMAX(tile_sz[0], tile_sz[1])); - npixels *= npixels; - - FOR_EACH(mcode, 0, npixels) { - struct htrdr_accum* pix_accums; - size_t ipix_tile[2]; /* Pixel coord in the tile */ - size_t ipix[2]; /* Pixel coord in the buffer */ - size_t ichannel; - - ipix_tile[0] = morton2D_decode((uint32_t)(mcode>>0)); - if(ipix_tile[0] >= tile_sz[0]) continue; /* Pixel is out of tile */ - ipix_tile[1] = morton2D_decode((uint32_t)(mcode>>1)); - if(ipix_tile[1] >= tile_sz[1]) continue; /* Pixel is out of tile */ - - /* Fetch and reset the pixel accumulator */ - pix_accums = tile_at(tile, ipix_tile[0], ipix_tile[1]); - pix_accums[HTRDR_ESTIMATE_TIME] = HTRDR_ACCUM_NULL; /* Reset time per radiative path */ - - /* Compute the pixel coordinate */ - ipix[0] = tile_org[0] + ipix_tile[0]; - ipix[1] = tile_org[1] + ipix_tile[1]; - - FOR_EACH(ichannel, 0, 3) { - /* Check that the X, Y and Z estimates are stored in accumulators 0, 1 et - * 2, respectively */ - STATIC_ASSERT - ( HTRDR_ESTIMATE_X == 0 - && HTRDR_ESTIMATE_Y == 1 - && HTRDR_ESTIMATE_Z == 2, - Unexpected_htrdr_estimate_enumerate); - size_t isamp; - - pix_accums[ichannel] = HTRDR_ACCUM_NULL; - FOR_EACH(isamp, 0, spp) { - struct time t0, t1; - double pix_samp[2]; - double ray_org[3]; - double ray_dir[3]; - double weight; - double r0, r1; - size_t iband; - size_t iquad; - double usec; - - /* Sample a position into the pixel, in the normalized image plane */ - pix_samp[0] = ((double)ipix[0] + ssp_rng_canonical(rng)) * pix_sz[0]; - pix_samp[1] = ((double)ipix[1] + ssp_rng_canonical(rng)) * pix_sz[1]; - - /* Generate a ray starting from the pinhole camera and passing through the - * pixel sample */ - htrdr_camera_ray(cam, pix_samp, ray_org, ray_dir); - - /* Sample a spectral band and a quadrature point */ - r0 = ssp_rng_canonical(rng); - r1 = ssp_rng_canonical(rng); - switch(ichannel) { - case 0: - htsky_sample_sw_spectral_data_CIE_1931_X - (htrdr->sky, r0, r1, &iband, &iquad); - break; - case 1: - htsky_sample_sw_spectral_data_CIE_1931_Y - (htrdr->sky, r0, r1, &iband, &iquad); - break; - case 2: - htsky_sample_sw_spectral_data_CIE_1931_Z - (htrdr->sky, r0, r1, &iband, &iquad); - break; - default: FATAL("Unreachable code.\n"); break; - } - - /* Compute the radiance that reach the pixel through the ray */ - time_current(&t0); - weight = htrdr_compute_radiance_sw - (htrdr, ithread, rng, ray_org, ray_dir, iband, iquad); - ASSERT(weight >= 0); - time_sub(&t0, time_current(&t1), &t0); - usec = (double)time_val(&t0, TIME_NSEC) * 0.001; - - /* Update the pixel accumulator of the current channel */ - pix_accums[ichannel].sum_weights += weight; - pix_accums[ichannel].sum_weights_sqr += weight*weight; - pix_accums[ichannel].nweights += 1; - - /* Update the pixel accumulator of per realisation time */ - pix_accums[HTRDR_ESTIMATE_TIME].sum_weights += usec; - pix_accums[HTRDR_ESTIMATE_TIME].sum_weights_sqr += usec*usec; - pix_accums[HTRDR_ESTIMATE_TIME].nweights += 1; - } - } - } - return RES_OK; -} - -static res_T -draw_image - (struct htrdr* htrdr, - const struct htrdr_camera* cam, - const size_t width, /* Image width */ - const size_t height, /* Image height */ - const size_t spp, - const size_t ntiles_x, - const size_t ntiles_y, - const size_t ntiles_adjusted, - const double pix_sz[2], /* Pixel size in the normalized image plane */ - struct proc_work* work, - struct list_node* tiles) -{ - struct ssp_rng* rng_proc = NULL; - size_t nthreads = 0; - size_t nthieves = 0; - size_t proc_ntiles = 0; - ATOMIC nsolved_tiles = 0; - ATOMIC res = RES_OK; - ASSERT(htrdr && cam && spp && ntiles_adjusted && work && tiles); - ASSERT(pix_sz && pix_sz[0] > 0 && pix_sz[1] > 0); - ASSERT(width && height); - (void)ntiles_x, (void)ntiles_y; - - res = ssp_rng_create(htrdr->allocator, &ssp_rng_mt19937_64, &rng_proc); - if(res != RES_OK) { - htrdr_log_err(htrdr, "could not create the RNG used to sample a process " - "to steal -- %s.\n", res_to_cstr((res_T)res)); - goto error; - } - - proc_ntiles = proc_work_get_ntiles(work); - nthreads = MMIN(htrdr->nthreads, proc_ntiles); - - /* The process is not considered as a working process for himself */ - htrdr->mpi_working_procs[htrdr->mpi_rank] = 0; - --htrdr->mpi_nworking_procs; - - omp_set_num_threads((int)nthreads); - #pragma omp parallel - for(;;) { - const int ithread = omp_get_thread_num(); - struct ssp_rng_proxy* rng_proxy = NULL; - struct ssp_rng* rng; - struct tile* tile; - uint32_t mcode = TILE_MCODE_NULL; - size_t tile_org[2]; - size_t tile_sz[2]; - size_t n; - res_T res_local = RES_OK; - int32_t pcent; - - /* Get a tile to draw */ - #pragma omp critical - { - mcode = proc_work_get_tile(work); - if(mcode == TILE_MCODE_NULL) { /* No more work on this process */ - /* Try to steal works to concurrent processes */ - proc_work_reset(work); - nthieves = mpi_steal_work(htrdr, rng_proc, work); - if(nthieves != 0) { - mcode = proc_work_get_tile(work); - } - } - } - if(mcode == TILE_MCODE_NULL) break; /* No more work */ - - /* Decode the morton code to retrieve the tile index */ - tile_org[0] = morton2D_decode((uint32_t)(mcode>>0)); - tile_org[1] = morton2D_decode((uint32_t)(mcode>>1)); - ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y); - - /* Create the tile */ - tile = tile_create(htrdr->allocator); - if(!tile) { - ATOMIC_SET(&res, RES_MEM_ERR); - htrdr_log_err(htrdr, - "could not allocate the memory space of the tile (%lu, %lu) -- %s.\n", - (unsigned long)tile_org[0], (unsigned long)tile_org[1], - res_to_cstr((res_T)ATOMIC_GET(&res))); - break; - } - - /* Register the tile */ - #pragma omp critical - list_add_tail(tiles, &tile->node); - - tile->data.x = (uint16_t)tile_org[0]; - tile->data.y = (uint16_t)tile_org[1]; - - /* Define the tile origin in pixel space */ - tile_org[0] *= TILE_SIZE; - tile_org[1] *= TILE_SIZE; - - /* Compute the size of the tile clamped by the borders of the buffer */ - tile_sz[0] = MMIN(TILE_SIZE, width - tile_org[0]); - tile_sz[1] = MMIN(TILE_SIZE, height - tile_org[1]); - - /* Create a proxy RNG for the current tile. This proxy is used for the - * current thread only and thus it has to manage only one RNG. This proxy - * is initialised in order to ensure that a unique and predictable set of - * random numbers is used for the current tile. */ - SSP(rng_proxy_create2 - (&htrdr->lifo_allocators[ithread], - &ssp_rng_threefry, - RNG_SEQUENCE_SIZE * (size_t)mcode, /* Offset */ - RNG_SEQUENCE_SIZE, /* Size */ - RNG_SEQUENCE_SIZE * (size_t)ntiles_adjusted, /* Pitch */ - 1, &rng_proxy)); - SSP(rng_proxy_create_rng(rng_proxy, 0, &rng)); - - /* Launch the tile rendering */ - res_local = draw_tile(htrdr, (size_t)ithread, mcode, tile_org, tile_sz, - pix_sz, cam, spp, rng, tile); - - SSP(rng_proxy_ref_put(rng_proxy)); - SSP(rng_ref_put(rng)); - - if(res_local != RES_OK) { - ATOMIC_SET(&res, res_local); - break; - } - - /* Update the progress status */ - n = (size_t)ATOMIC_INCR(&nsolved_tiles); - pcent = (int32_t)((double)n * 100.0 / (double)proc_ntiles + 0.5/*round*/); - - #pragma omp critical - if(pcent > htrdr->mpi_progress_render[0]) { - htrdr->mpi_progress_render[0] = pcent; - if(htrdr->mpi_rank == 0) { - update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); - } else { /* Send the progress percentage to the master process */ - send_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING, pcent); - } - } - } - - if(ATOMIC_GET(&res) != RES_OK) goto error; - - /* Synchronize the process */ - mutex_lock(htrdr->mpi_mutex); - MPI(Barrier(MPI_COMM_WORLD)); - mutex_unlock(htrdr->mpi_mutex); - -exit: - if(rng_proc) SSP(rng_ref_put(rng_proc)); - return (res_T)res; -error: - goto exit; -} - -/******************************************************************************* - * Local functions - ******************************************************************************/ -res_T -htrdr_draw_radiance_sw - (struct htrdr* htrdr, - const struct htrdr_camera* cam, - const size_t width, - const size_t height, - const size_t spp, - struct htrdr_buffer* buf) -{ - char strbuf[128]; - struct time t0, t1; - struct list_node tiles; - size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted; - size_t itile; - struct proc_work work; - struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL; - size_t proc_ntiles_adjusted; - double pix_sz[2]; - ATOMIC probe_thieves = 1; - ATOMIC res = RES_OK; - ASSERT(htrdr && cam && width && height); - ASSERT(htrdr->mpi_rank != 0 || buf); - - list_init(&tiles); - proc_work_init(htrdr->allocator, &work); - - if(htrdr->mpi_rank == 0) { - htrdr_buffer_get_layout(buf, &layout); - ASSERT(layout.width || layout.height || layout.elmt_size); - ASSERT(layout.width == width && layout.height == height); - - if(layout.elmt_size != sizeof(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__]) - || layout.alignment < ALIGNOF(struct htrdr_accum[HTRDR_ESTIMATES_COUNT__])) { - htrdr_log_err(htrdr, - "%s: invalid buffer layout. " - "The pixel size must be the size of %lu accumulators.\n", - FUNC_NAME, (unsigned long)HTRDR_ESTIMATES_COUNT__); - res = RES_BAD_ARG; - goto error; - } - } - - /* Compute the overall number of tiles */ - ntiles_x = (width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; - ntiles_y = (height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; - ntiles = ntiles_x * ntiles_y; - - /* Compute the pixel size in the normalized image plane */ - pix_sz[0] = 1.0 / (double)width; - pix_sz[1] = 1.0 / (double)height; - - /* Adjust the #tiles for the morton-encoding procedure */ - ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y)); - ntiles_adjusted *= ntiles_adjusted; - - /* Define the initial number of tiles of the current process */ - proc_ntiles_adjusted = ntiles_adjusted / (size_t)htrdr->mpi_nprocs; - if(htrdr->mpi_rank == 0) { /* Affect the remaining tiles to the master proc */ - proc_ntiles_adjusted += - ntiles_adjusted - proc_ntiles_adjusted*(size_t)htrdr->mpi_nprocs; - } - - /* Define the initial list of tiles of the process */ - FOR_EACH(itile, 0, proc_ntiles_adjusted) { - uint32_t mcode; - uint16_t tile_org[2]; - - mcode = (uint32_t)itile*(uint32_t)htrdr->mpi_nprocs - + (uint32_t)htrdr->mpi_rank; - - tile_org[0] = morton2D_decode(mcode>>0); - if(tile_org[0] >= ntiles_x) continue; - tile_org[1] = morton2D_decode(mcode>>1); - if(tile_org[1] >= ntiles_y) continue; - proc_work_add_tile(&work, mcode); - } - - if(htrdr->mpi_rank == 0) { - fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); - print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); - } - - time_current(&t0); - - omp_set_nested(1); /* Enable nested threads for draw_image */ - #pragma omp parallel sections num_threads(2) - { - #pragma omp section - mpi_probe_thieves(htrdr, &work, &probe_thieves); - - #pragma omp section - { - draw_image(htrdr, cam, width, height, spp, ntiles_x, ntiles_y, - ntiles_adjusted, pix_sz, &work, &tiles); - /* The processes have no more work to do. Stop probing for thieves */ - ATOMIC_SET(&probe_thieves, 0); - } - } - - if(htrdr->mpi_rank == 0) { - update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); - fprintf(stderr, "\n"); /* Add a new line after the progress statuses */ - } - - time_sub(&t0, time_current(&t1), &t0); - time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf)); - htrdr_log(htrdr, "Rendering time: %s\n", strbuf); - - /* Gather accum buffers from the group of processes */ - time_current(&t0); - res = mpi_gather_tiles(htrdr, buf, ntiles, &tiles); - if(res != RES_OK) goto error; - time_sub(&t0, time_current(&t1), &t0); - time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf)); - htrdr_log(htrdr, "Image gathering time: %s\n", strbuf); - -exit: - { /* Free allocated tiles */ - struct list_node* node; - struct list_node* tmp; - LIST_FOR_EACH_SAFE(node, tmp, &tiles) { - struct tile* tile = CONTAINER_OF(node, struct tile, node); - list_del(node); - tile_ref_put(tile); - } - } - proc_work_release(&work); - return (res_T)res; -error: - goto exit; -} - diff --git a/src/htrdr_solve.h b/src/htrdr_solve.h @@ -56,7 +56,7 @@ htrdr_compute_radiance_lw const size_t iquad); /* Index of the quadrature point into the band */ extern LOCAL_SYM res_T -htrdr_draw_radiance_sw +htrdr_draw_radiance (struct htrdr* htrdr, const struct htrdr_camera* cam, const size_t width, /* Image width */ diff --git a/src/htrdr_sun.c b/src/htrdr_sun.c @@ -45,14 +45,6 @@ struct htrdr_sun { /******************************************************************************* * Helper functions ******************************************************************************/ -static INLINE int -cmp_dbl(const void* a, const void* b) -{ - const double d0 = *((const double*)a); - const double d1 = *((const double*)b); - return d0 < d1 ? -1 : (d0 > d1 ? 1 : 0); -} - static void release_sun(ref_T* ref) {