htrdr

Solving radiative transfer in heterogeneous media
git clone git://git.meso-star.fr/htrdr.git
Log | Files | Refs | README | LICENSE

commit f5938d975152de903266c5c10a322ed6580c0508
parent b47ba0173b5236c65c13abb7aa9fe6588aa89371
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Mon,  5 Nov 2018 14:05:34 +0100

Alloc the image buffer only on the master process

Diffstat:
Msrc/htrdr.c | 34++++++++++++++++------------------
Msrc/htrdr.h | 2++
Msrc/htrdr_draw_radiance_sw.c | 141+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
Msrc/htrdr_solve.h | 5++++-
4 files changed, 113 insertions(+), 69 deletions(-)

diff --git a/src/htrdr.c b/src/htrdr.c @@ -25,7 +25,6 @@ #include "htrdr_sun.h" #include "htrdr_solve.h" -#include <rsys/clock_time.h> #include <rsys/cstr.h> #include <rsys/mem_allocator.h> #include <rsys/str.h> @@ -457,6 +456,8 @@ htrdr_init htrdr->verbose = args->verbose; htrdr->nthreads = MMIN(args->nthreads, (unsigned)omp_get_num_procs()); htrdr->spp = args->image.spp; + htrdr->width = args->image.definition[0]; + htrdr->height = args->image.definition[1]; res = init_mpi(htrdr); if(res != RES_OK) goto error; @@ -515,14 +516,18 @@ htrdr_init args->camera.up, proj_ratio, MDEG2RAD(args->camera.fov_y), &htrdr->cam); if(res != RES_OK) goto error; - res = htrdr_buffer_create(htrdr, - args->image.definition[0], /* Width */ - args->image.definition[1], /* Height */ - args->image.definition[0]*sizeof(struct htrdr_accum[3]), /* Pitch */ - sizeof(struct htrdr_accum[3]), - ALIGNOF(struct htrdr_accum[3]), /* Alignment */ - &htrdr->buf); - if(res != RES_OK) goto error; + /* Create the image buffer only on the master process; the image parts + * rendered by the processes are gathered onto the master process. */ + if(!htrdr->dump_vtk && htrdr->mpi_rank == 0) { + res = htrdr_buffer_create(htrdr, + args->image.definition[0], /* Width */ + args->image.definition[1], /* Height */ + args->image.definition[0]*sizeof(struct htrdr_accum[3]), /* Pitch */ + sizeof(struct htrdr_accum[3]), + ALIGNOF(struct htrdr_accum[3]), /* Alignment */ + &htrdr->buf); + if(res != RES_OK) goto error; + } res = htrdr_sun_create(htrdr, &htrdr->sun); if(res != RES_OK) goto error; @@ -609,16 +614,9 @@ htrdr_run(struct htrdr* htrdr) } } } else { - struct time t0, t1; - char buf[128]; - - time_current(&t0); - res = htrdr_draw_radiance_sw(htrdr, htrdr->cam, htrdr->spp, htrdr->buf); + res = htrdr_draw_radiance_sw(htrdr, htrdr->cam, htrdr->width, + htrdr->height, htrdr->spp, htrdr->buf); if(res != RES_OK) goto error; - time_sub(&t0, time_current(&t1), &t0); - time_dump(&t0, TIME_ALL, NULL, buf, sizeof(buf)); - htrdr_log(htrdr, "Rendering time: %s\n", buf); - if(htrdr->mpi_rank == 0) { res = dump_accum_buffer (htrdr, htrdr->buf, str_cget(&htrdr->output_name), htrdr->output); diff --git a/src/htrdr.h b/src/htrdr.h @@ -53,6 +53,8 @@ struct htrdr { struct htrdr_camera* cam; struct htrdr_buffer* buf; size_t spp; /* #samples per pixel */ + size_t width; /* Image width */ + size_t height; /* Image height */ FILE* output; struct str output_name; diff --git a/src/htrdr_draw_radiance_sw.c b/src/htrdr_draw_radiance_sw.c @@ -20,6 +20,7 @@ #include "htrdr_sky.h" #include "htrdr_solve.h" +#include <rsys/clock_time.h> #include <rsys/cstr.h> #include <rsys/dynamic_array_u32.h> #include <rsys/math.h> @@ -31,12 +32,13 @@ #include <time.h> #include <unistd.h> -#define RNG_SEQUENCE_SIZE 100000 +#define RNG_SEQUENCE_SIZE 10000 #define TILE_MCODE_NULL UINT32_MAX #define TILE_SIZE 32 /* Definition in X & Y of a tile */ STATIC_ASSERT(IS_POW2(TILE_SIZE), TILE_SIZE_must_be_a_power_of_2); +/* Tile of row ordered image pixels */ struct tile { struct list_node node; struct mem_allocator* allocator; @@ -44,11 +46,12 @@ struct tile { struct tile_data { uint16_t x, y; /* 2D coordinates of the tile in tile space */ - struct htrdr_accum accums[1/*dummy element*/]; /* Row ordered */ + /* Simulate the flexible array member of the C99 standard. */ + struct htrdr_accum accums[1/*dummy element*/]; } data; }; -/* Overall work of a process */ +/* List of tile to compute onto the MPI process. */ struct proc_work { struct mutex* mutex; struct darray_u32 tiles; /* #tiles to render */ @@ -86,7 +89,7 @@ tile_create(struct mem_allocator* allocator) struct tile* tile; const size_t tile_sz = sizeof(struct tile) - sizeof(struct htrdr_accum)/*rm dummy accum*/; - const size_t buf_sz = + const size_t buf_sz = /* Flexiblbe array element */ TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum)*3/*#channels*/; ASSERT(allocator); @@ -146,20 +149,21 @@ write_tile_data(struct htrdr_buffer* buf, const struct tile_data* tile_data) htrdr_buffer_get_layout(buf, &layout); buf_mem = htrdr_buffer_get_data(buf); + /* Compute the row/column of the tile origin into the buffer */ icol = tile_data->x * (size_t)TILE_SIZE; irow = tile_data->y * (size_t)TILE_SIZE; + + /* Define the number of tile row/columns to write into the buffer */ ncols_tile = MMIN(icol + TILE_SIZE, layout.width) - icol; nrows_tile = MMIN(irow + TILE_SIZE, layout.height) - irow; + /* Copy the tile data, row by row */ FOR_EACH(irow_tile, 0, nrows_tile) { char* buf_row = buf_mem + (irow + irow_tile) * layout.pitch; const struct htrdr_accum* tile_row = tile_data->accums + irow_tile*TILE_SIZE*3/*#channels*/; - - memcpy - (buf_row + icol*sizeof(struct htrdr_accum)*3, - tile_row, - ncols_tile*sizeof(struct htrdr_accum)*3/*#channels*/); + memcpy(buf_row + icol*sizeof(struct htrdr_accum)*3, tile_row, + ncols_tile*sizeof(struct htrdr_accum)*3/*#channels*/); } } @@ -311,7 +315,7 @@ mpi_sample_working_process(struct htrdr* htrdr, struct ssp_rng* rng) iproc = (int)(ssp_rng_canonical(rng) * (double)htrdr->mpi_nworking_procs); /* Find the rank of the sampled active process. Use a simple linear search - * since the overall number of process should be quite low; at most few + * since the overall number of processes should be quite low; at most few * dozens. */ i = 0; FOR_EACH(dst_rank, 0, htrdr->mpi_nprocs) { @@ -382,43 +386,54 @@ mpi_gather_tiles const size_t ntiles, struct list_node* tiles) { + /* Compute the size of the tile_data */ const size_t msg_sz = sizeof(struct tile_data) - sizeof(struct htrdr_accum)/*dummy*/ + TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum)*3/*#channels*/; - struct list_node* node; + + struct list_node* node = NULL; struct tile* tile = NULL; res_T res = RES_OK; - ASSERT(htrdr && buf && tiles); + ASSERT(htrdr && tiles); + ASSERT(htrdr->mpi_rank != 0 || buf); (void)ntiles; - if(htrdr->mpi_rank != 0) { + if(htrdr->mpi_rank != 0) { /* Non master process */ + /* Send the computed tile to the master process */ LIST_FOR_EACH(node, tiles) { struct tile* t = CONTAINER_OF(node, struct tile, node); MPI(Send(&t->data, (int)msg_sz, MPI_CHAR, 0, HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD)); } - } else { + } else { /* Master process */ size_t itile = 0; - tile = tile_create(htrdr->allocator); - if(!tile) { - res = RES_MEM_ERR; - htrdr_log_err(htrdr, - "could not allocate the temporary tile used to gather the process " - "output data -- %s.\n", res_to_cstr(res)); - goto error; - - } LIST_FOR_EACH(node, tiles) { struct tile* t = CONTAINER_OF(node, struct tile, node); write_tile_data(buf, &t->data); ++itile; } - FOR_EACH(itile, itile, ntiles) { - MPI(Recv(&tile->data, (int)msg_sz, MPI_CHAR, MPI_ANY_SOURCE, - HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE)); - write_tile_data(buf, &tile->data); + if(itile != ntiles) { + ASSERT(htrdr->mpi_nprocs > 1); + + /* Create a temporary tile to receive the tile data computed by the + * concurrent MPI processes */ + tile = tile_create(htrdr->allocator); + if(!tile) { + res = RES_MEM_ERR; + htrdr_log_err(htrdr, + "could not allocate the temporary tile used to gather the process " + "output data -- %s.\n", res_to_cstr(res)); + goto error; + } + + /* Receive the tile data of the concurret MPI processes */ + FOR_EACH(itile, itile, ntiles) { + MPI(Recv(&tile->data, (int)msg_sz, MPI_CHAR, MPI_ANY_SOURCE, + HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE)); + write_tile_data(buf, &tile->data); + } } } @@ -524,13 +539,14 @@ static res_T draw_image (struct htrdr* htrdr, const struct htrdr_camera* cam, + const size_t width, /* Image width */ + const size_t height, /* Image height */ const size_t spp, const size_t ntiles_x, const size_t ntiles_y, const size_t ntiles_adjusted, const double pix_sz[2], /* Pixel size in the normalized image plane */ struct proc_work* work, - const struct htrdr_buffer_layout* layout, struct list_node* tiles) { struct ssp_rng* rng_proc = NULL; @@ -542,6 +558,7 @@ draw_image ATOMIC res = RES_OK; ASSERT(htrdr && cam && spp && ntiles_adjusted && work && tiles); ASSERT(pix_sz && pix_sz[0] > 0 && pix_sz[1] > 0); + ASSERT(width && height); (void)ntiles_x, (void)ntiles_y; res = ssp_rng_create(htrdr->allocator, &ssp_rng_mt19937_64, &rng_proc); @@ -615,9 +632,13 @@ draw_image tile_org[1] *= TILE_SIZE; /* Compute the size of the tile clamped by the borders of the buffer */ - tile_sz[0] = MMIN(TILE_SIZE, layout->width - tile_org[0]); - tile_sz[1] = MMIN(TILE_SIZE, layout->height - tile_org[1]); + tile_sz[0] = MMIN(TILE_SIZE, width - tile_org[0]); + tile_sz[1] = MMIN(TILE_SIZE, height - tile_org[1]); + /* Create a proxy RNG for the current tile. This proxy is used for the + * current thread only and thus it has to manage only one RNG. This proxy + * is initialised in order to ensure that a unique and predictable set of + * random numbers is used for the current tile. */ SSP(rng_proxy_create2 (&htrdr->lifo_allocators[ithread], &ssp_rng_threefry, @@ -627,6 +648,7 @@ draw_image 1, &rng_proxy)); SSP(rng_proxy_create_rng(rng_proxy, 0, &rng)); + /* Launch the tile rendering */ res_local = draw_tile(htrdr, (size_t)ithread, mcode, tile_org, tile_sz, pix_sz, cam, spp, rng, tile); @@ -638,6 +660,7 @@ draw_image break; } + /* Update the progress status */ n = (size_t)ATOMIC_INCR(&nsolved_tiles); pcent = (int32_t)((double)n * 100.0 / (double)proc_ntiles + 0.5/*round*/); @@ -659,7 +682,7 @@ draw_image MPI(Ibarrier(MPI_COMM_WORLD, &req)); mutex_unlock(htrdr->mpi_mutex); - /* Wait for synchronization */ + /* Wait for processes synchronization */ mpi_wait_for_request(htrdr, &req); exit: @@ -676,44 +699,52 @@ res_T htrdr_draw_radiance_sw (struct htrdr* htrdr, const struct htrdr_camera* cam, + const size_t width, + const size_t height, const size_t spp, struct htrdr_buffer* buf) { + char strbuf[128]; + struct time t0, t1; struct list_node tiles; size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted; size_t itile; struct proc_work work; - struct htrdr_buffer_layout layout; + struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL; size_t proc_ntiles_adjusted; double pix_sz[2]; ATOMIC probe_thieves = 1; ATOMIC res = RES_OK; - ASSERT(htrdr && cam && buf); + ASSERT(htrdr && cam && width && height); + ASSERT(htrdr->mpi_rank != 0 || buf); list_init(&tiles); proc_work_init(htrdr->allocator, &work); - htrdr_buffer_get_layout(buf, &layout); - ASSERT(layout.width || layout.height || layout.elmt_size); - - if(layout.elmt_size != sizeof(struct htrdr_accum[3])/*#channels*/ - || layout.alignment < ALIGNOF(struct htrdr_accum[3])) { - htrdr_log_err(htrdr, - "%s: invalid buffer layout. " - "The pixel size must be the size of 3 * accumulators.\n", - FUNC_NAME); - res = RES_BAD_ARG; - goto error; + if(htrdr->mpi_rank == 0) { + htrdr_buffer_get_layout(buf, &layout); + ASSERT(layout.width || layout.height || layout.elmt_size); + ASSERT(layout.width == width && layout.height == height); + + if(layout.elmt_size != sizeof(struct htrdr_accum[3])/*#channels*/ + || layout.alignment < ALIGNOF(struct htrdr_accum[3])) { + htrdr_log_err(htrdr, + "%s: invalid buffer layout. " + "The pixel size must be the size of 3 * accumulators.\n", + FUNC_NAME); + res = RES_BAD_ARG; + goto error; + } } /* Compute the overall number of tiles */ - ntiles_x = (layout.width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; - ntiles_y = (layout.height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; + ntiles_x = (width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; + ntiles_y = (height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; ntiles = ntiles_x * ntiles_y; /* Compute the pixel size in the normalized image plane */ - pix_sz[0] = 1.0 / (double)layout.width; - pix_sz[1] = 1.0 / (double)layout.height; + pix_sz[0] = 1.0 / (double)width; + pix_sz[1] = 1.0 / (double)height; /* Adjust the #tiles for the morton-encoding procedure */ ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y)); @@ -746,6 +777,8 @@ htrdr_draw_radiance_sw print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); } + time_current(&t0); + omp_set_nested(1); /* Enable nested threads for draw_image */ #pragma omp parallel sections num_threads(2) { @@ -754,8 +787,8 @@ htrdr_draw_radiance_sw #pragma omp section { - draw_image(htrdr, cam, spp, ntiles_x, ntiles_y, ntiles_adjusted, pix_sz, - &work, &layout, &tiles); + draw_image(htrdr, cam, width, height, spp, ntiles_x, ntiles_y, + ntiles_adjusted, pix_sz, &work, &tiles); /* The processes have no more work to do. Stop probing for thieves */ ATOMIC_SET(&probe_thieves, 0); } @@ -766,9 +799,17 @@ htrdr_draw_radiance_sw fprintf(stderr, "\n"); /* Add a new line after the progress statuses */ } + time_sub(&t0, time_current(&t1), &t0); + time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf)); + htrdr_log(htrdr, "Rendering time: %s\n", strbuf); + /* Gather accum buffers from the group of processes */ + time_current(&t0); res = mpi_gather_tiles(htrdr, buf, ntiles, &tiles); if(res != RES_OK) goto error; + time_sub(&t0, time_current(&t1), &t0); + time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf)); + htrdr_log(htrdr, "Image gathering time: %s\n", strbuf); exit: { /* Free allocated tiles */ diff --git a/src/htrdr_solve.h b/src/htrdr_solve.h @@ -48,8 +48,11 @@ extern LOCAL_SYM res_T htrdr_draw_radiance_sw (struct htrdr* htrdr, const struct htrdr_camera* cam, + const size_t width, /* Image width */ + const size_t height, /* Image height */ const size_t spp, /* #samples per pixel, i.e. #realisations */ - struct htrdr_buffer* buf); /* Buffer of struct htrdr_accum[3] */ + /* Buffer of struct htrdr_accum[3]. May be NULL on on non master processes */ + struct htrdr_buffer* buf); extern LOCAL_SYM int htrdr_ground_filter