htrdr

Solving radiative transfer in heterogeneous media
git clone git://git.meso-star.fr/htrdr.git
Log | Files | Refs | README | LICENSE

commit b47ba0173b5236c65c13abb7aa9fe6588aa89371
parent 9c3d5d0cec87f0ea316c2ec9247d3b149248cb3c
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Wed, 31 Oct 2018 17:47:13 +0100

Send only the computed tile to the master process

Do not gather the whole images of a process. Each process lists the set
of tiles that it computes and send them to the master process.

Diffstat:
Msrc/htrdr_c.h | 3++-
Msrc/htrdr_draw_radiance_sw.c | 265++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
2 files changed, 185 insertions(+), 83 deletions(-)

diff --git a/src/htrdr_c.h b/src/htrdr_c.h @@ -28,7 +28,8 @@ enum htrdr_mpi_message { HTRDR_MPI_PROGRESS_BUILD_OCTREE, HTRDR_MPI_PROGRESS_RENDERING, HTRDR_MPI_STEAL_REQUEST, - HTRDR_MPI_WORK_STEALING + HTRDR_MPI_WORK_STEALING, + HTRDR_MPI_TILE_DATA }; struct htrdr; diff --git a/src/htrdr_draw_radiance_sw.c b/src/htrdr_draw_radiance_sw.c @@ -33,9 +33,21 @@ #define RNG_SEQUENCE_SIZE 100000 +#define TILE_MCODE_NULL UINT32_MAX #define TILE_SIZE 32 /* Definition in X & Y of a tile */ STATIC_ASSERT(IS_POW2(TILE_SIZE), TILE_SIZE_must_be_a_power_of_2); +struct tile { + struct list_node node; + struct mem_allocator* allocator; + ref_T ref; + + struct tile_data { + uint16_t x, y; /* 2D coordinates of the tile in tile space */ + struct htrdr_accum accums[1/*dummy element*/]; /* Row ordered */ + } data; +}; + /* Overall work of a process */ struct proc_work { struct mutex* mutex; @@ -43,8 +55,6 @@ struct proc_work { size_t itile; /* Next tile to render in the above list of tiles */ }; -#define TILE_MCODE_NULL UINT32_MAX - /******************************************************************************* * Helper functions ******************************************************************************/ @@ -70,6 +80,89 @@ morton2D_encode(const uint16_t u16) return u32; } +static FINLINE struct tile* +tile_create(struct mem_allocator* allocator) +{ + struct tile* tile; + const size_t tile_sz = + sizeof(struct tile) - sizeof(struct htrdr_accum)/*rm dummy accum*/; + const size_t buf_sz = + TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum)*3/*#channels*/; + ASSERT(allocator); + + tile = MEM_ALLOC(allocator, tile_sz+buf_sz); + if(!tile) return NULL; + + ref_init(&tile->ref); + list_init(&tile->node); + tile->allocator = allocator; + ASSERT(IS_ALIGNED(&tile->data.accums, ALIGNOF(struct htrdr_accum))); + + return tile; +} + +static INLINE void +tile_ref_get(struct tile* tile) +{ + ASSERT(tile); + tile_ref_get(tile); +} + +static INLINE void +release_tile(ref_T* ref) +{ + struct tile* tile = CONTAINER_OF(ref, struct tile, ref); + ASSERT(ref); + MEM_RM(tile->allocator, tile); +} + +static INLINE void +tile_ref_put(struct tile* tile) +{ + ASSERT(tile); + ref_put(&tile->ref, release_tile); +} + +static FINLINE struct htrdr_accum* +tile_at + (struct tile* tile, + const size_t x, /* In tile space */ + const size_t y) /* In tile space */ +{ + ASSERT(tile && x < TILE_SIZE && y < TILE_SIZE); + return tile->data.accums + (y*TILE_SIZE + x)*3/*#channels*/; +} + +static void +write_tile_data(struct htrdr_buffer* buf, const struct tile_data* tile_data) +{ + struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL; + size_t icol, irow; + size_t irow_tile; + size_t ncols_tile, nrows_tile; + char* buf_mem; + ASSERT(buf && tile_data); + + htrdr_buffer_get_layout(buf, &layout); + buf_mem = htrdr_buffer_get_data(buf); + + icol = tile_data->x * (size_t)TILE_SIZE; + irow = tile_data->y * (size_t)TILE_SIZE; + ncols_tile = MMIN(icol + TILE_SIZE, layout.width) - icol; + nrows_tile = MMIN(irow + TILE_SIZE, layout.height) - irow; + + FOR_EACH(irow_tile, 0, nrows_tile) { + char* buf_row = buf_mem + (irow + irow_tile) * layout.pitch; + const struct htrdr_accum* tile_row = + tile_data->accums + irow_tile*TILE_SIZE*3/*#channels*/; + + memcpy + (buf_row + icol*sizeof(struct htrdr_accum)*3, + tile_row, + ncols_tile*sizeof(struct htrdr_accum)*3/*#channels*/); + } +} + static INLINE void proc_work_init(struct mem_allocator* allocator, struct proc_work* work) { @@ -283,78 +376,54 @@ mpi_steal_work } static res_T -mpi_gather_buffer +mpi_gather_tiles (struct htrdr* htrdr, - struct htrdr_buffer* buf) + struct htrdr_buffer* buf, + const size_t ntiles, + struct list_node* tiles) { - struct htrdr_buffer_layout layout; - struct htrdr_accum* gathered_accums = NULL; - size_t x, y; - int iproc; + const size_t msg_sz = + sizeof(struct tile_data) - sizeof(struct htrdr_accum)/*dummy*/ + + TILE_SIZE*TILE_SIZE*sizeof(struct htrdr_accum)*3/*#channels*/; + struct list_node* node; + struct tile* tile = NULL; res_T res = RES_OK; - ASSERT(htrdr && buf); - - /* The process is alone. There is nothing to gather since all work was done - * by it */ - if(htrdr->mpi_nprocs == 1) - goto exit; - - /* Fetch the memory layout of the submitted buffer */ - htrdr_buffer_get_layout(buf, &layout); - ASSERT(layout.elmt_size == sizeof(struct htrdr_accum) * 3/*#channels*/); - ASSERT(layout.alignment <= ALIGNOF(struct htrdr_accum)); + ASSERT(htrdr && buf && tiles); + (void)ntiles; + + if(htrdr->mpi_rank != 0) { + LIST_FOR_EACH(node, tiles) { + struct tile* t = CONTAINER_OF(node, struct tile, node); + MPI(Send(&t->data, (int)msg_sz, MPI_CHAR, 0, + HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD)); + } + } else { + size_t itile = 0; - /* The process 0 allocates the memory used to store the gathered buffer lines - * of the MPI processes */ - if(htrdr->mpi_rank == 0) { - gathered_accums = MEM_ALLOC - (htrdr->allocator, layout.pitch * (size_t)htrdr->mpi_nprocs); - if(!gathered_accums) { + tile = tile_create(htrdr->allocator); + if(!tile) { res = RES_MEM_ERR; htrdr_log_err(htrdr, - "could not allocate the temporary memory for MPI gathering -- %s.\n", - res_to_cstr(res)); + "could not allocate the temporary tile used to gather the process " + "output data -- %s.\n", res_to_cstr(res)); goto error; - } - } - FOR_EACH(y, 0, layout.height) { - struct htrdr_accum* buf_row_accums = (struct htrdr_accum*) - ((char*)htrdr_buffer_get_data(buf) + y * layout.pitch); - int err; /* MPI error */ - - /* Gather the buffer lines */ - mutex_lock(htrdr->mpi_mutex); - err = MPI_Gather(buf_row_accums, (int)layout.pitch, MPI_CHAR, gathered_accums, - (int)layout.pitch, MPI_CHAR, 0, MPI_COMM_WORLD); - mutex_unlock(htrdr->mpi_mutex); - if(err != MPI_SUCCESS) { - htrdr_log_err(htrdr, - "could not gather the buffer line `%lu' from the group of processes -- " - "%s.\n", - (unsigned long)y, htrdr_mpi_error_string(htrdr, err)); - res = RES_UNKNOWN_ERR; - goto error; + } + LIST_FOR_EACH(node, tiles) { + struct tile* t = CONTAINER_OF(node, struct tile, node); + write_tile_data(buf, &t->data); + ++itile; } - /* Accumulates the gathered lines into the buffer of the process 0 */ - if(htrdr->mpi_rank == 0) { - memset(buf_row_accums, 0, layout.pitch); - FOR_EACH(iproc, 0, htrdr->mpi_nprocs) { - struct htrdr_accum* proc_accums = (struct htrdr_accum*) - ((char*)gathered_accums + (size_t)iproc * layout.pitch); - FOR_EACH(x, 0, layout.width * 3/*#channels*/) { - buf_row_accums[x].sum_weights += proc_accums[x].sum_weights; - buf_row_accums[x].sum_weights_sqr += proc_accums[x].sum_weights_sqr; - buf_row_accums[x].nweights += proc_accums[x].nweights; - buf_row_accums[x].nfailures += proc_accums[x].nfailures; - } - } + FOR_EACH(itile, itile, ntiles) { + MPI(Recv(&tile->data, (int)msg_sz, MPI_CHAR, MPI_ANY_SOURCE, + HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE)); + write_tile_data(buf, &tile->data); } } exit: - if(gathered_accums) MEM_RM(htrdr->allocator, gathered_accums); + if(tile) tile_ref_put(tile); return res; error: goto exit; @@ -371,11 +440,11 @@ draw_tile const struct htrdr_camera* cam, const size_t spp, /* #samples per pixel */ struct ssp_rng* rng, - struct htrdr_buffer* buf) + struct tile* tile) { size_t npixels; size_t mcode; /* Morton code of tile pixel */ - ASSERT(htrdr && tile_org && tile_sz && pix_sz && cam && spp && buf); + ASSERT(htrdr && tile_org && tile_sz && pix_sz && cam && spp && tile); (void)tile_mcode; /* Adjust the #pixels to process them wrt a morton order */ npixels = round_up_pow2(MMAX(tile_sz[0], tile_sz[1])); @@ -392,13 +461,13 @@ draw_tile ipix_tile[1] = morton2D_decode((uint32_t)(mcode>>1)); if(ipix_tile[1] >= tile_sz[1]) continue; /* Pixel is out of tile */ + /* Fetch and reset the pixel accumulator */ + pix_accums = tile_at(tile, ipix_tile[0], ipix_tile[1]); + /* Compute the pixel coordinate */ ipix[0] = tile_org[0] + ipix_tile[0]; ipix[1] = tile_org[1] + ipix_tile[1]; - /* Fetch and reset the pixel accumulator */ - pix_accums = htrdr_buffer_at(buf, ipix[0], ipix[1]); - FOR_EACH(ichannel, 0, 3) { size_t isamp; pix_accums[ichannel] = HTRDR_ACCUM_NULL; @@ -459,19 +528,20 @@ draw_image const size_t ntiles_x, const size_t ntiles_y, const size_t ntiles_adjusted, + const double pix_sz[2], /* Pixel size in the normalized image plane */ struct proc_work* work, - struct htrdr_buffer* buf) + const struct htrdr_buffer_layout* layout, + struct list_node* tiles) { - struct htrdr_buffer_layout layout; struct ssp_rng* rng_proc = NULL; MPI_Request req; - double pix_sz[2]; size_t nthreads = 0; size_t nthieves = 0; size_t proc_ntiles = 0; ATOMIC nsolved_tiles = 0; ATOMIC res = RES_OK; - ASSERT(htrdr && cam && spp && ntiles_adjusted && work && buf); + ASSERT(htrdr && cam && spp && ntiles_adjusted && work && tiles); + ASSERT(pix_sz && pix_sz[0] > 0 && pix_sz[1] > 0); (void)ntiles_x, (void)ntiles_y; res = ssp_rng_create(htrdr->allocator, &ssp_rng_mt19937_64, &rng_proc); @@ -481,11 +551,6 @@ draw_image goto error; } - /* Compute the size of a pixel in the normalized image plane */ - htrdr_buffer_get_layout(buf, &layout); - pix_sz[0] = 1.0 / (double)layout.width; - pix_sz[1] = 1.0 / (double)layout.height; - proc_ntiles = proc_work_get_ntiles(work); nthreads = MMIN(htrdr->nthreads, proc_ntiles); @@ -499,7 +564,8 @@ draw_image const int ithread = omp_get_thread_num(); struct ssp_rng_proxy* rng_proxy = NULL; struct ssp_rng* rng; - uint32_t mcode; + struct tile* tile; + uint32_t mcode = TILE_MCODE_NULL; size_t tile_org[2]; size_t tile_sz[2]; size_t n; @@ -526,13 +592,31 @@ draw_image tile_org[1] = morton2D_decode((uint32_t)(mcode>>1)); ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y); + /* Create the tile */ + tile = tile_create(htrdr->allocator); + if(!tile) { + ATOMIC_SET(&res, RES_MEM_ERR); + htrdr_log_err(htrdr, + "could not allocate the memory space of the tile (%lu, %lu) -- %s.\n", + (unsigned long)tile_org[0], (unsigned long)tile_org[1], + res_to_cstr((res_T)ATOMIC_GET(&res))); + break; + } + + /* Register the tile */ + #pragma omp critical + list_add_tail(tiles, &tile->node); + + tile->data.x = (uint16_t)tile_org[0]; + tile->data.y = (uint16_t)tile_org[1]; + /* Define the tile origin in pixel space */ tile_org[0] *= TILE_SIZE; tile_org[1] *= TILE_SIZE; /* Compute the size of the tile clamped by the borders of the buffer */ - tile_sz[0] = MMIN(TILE_SIZE, layout.width - tile_org[0]); - tile_sz[1] = MMIN(TILE_SIZE, layout.height - tile_org[1]); + tile_sz[0] = MMIN(TILE_SIZE, layout->width - tile_org[0]); + tile_sz[1] = MMIN(TILE_SIZE, layout->height - tile_org[1]); SSP(rng_proxy_create2 (&htrdr->lifo_allocators[ithread], @@ -544,7 +628,7 @@ draw_image SSP(rng_proxy_create_rng(rng_proxy, 0, &rng)); res_local = draw_tile(htrdr, (size_t)ithread, mcode, tile_org, tile_sz, - pix_sz, cam, spp, rng, buf); + pix_sz, cam, spp, rng, tile); SSP(rng_proxy_ref_put(rng_proxy)); SSP(rng_ref_put(rng)); @@ -595,15 +679,18 @@ htrdr_draw_radiance_sw const size_t spp, struct htrdr_buffer* buf) { - size_t ntiles_x, ntiles_y, ntiles_adjusted; + struct list_node tiles; + size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted; size_t itile; struct proc_work work; struct htrdr_buffer_layout layout; size_t proc_ntiles_adjusted; + double pix_sz[2]; ATOMIC probe_thieves = 1; ATOMIC res = RES_OK; ASSERT(htrdr && cam && buf); + list_init(&tiles); proc_work_init(htrdr->allocator, &work); htrdr_buffer_get_layout(buf, &layout); @@ -622,6 +709,11 @@ htrdr_draw_radiance_sw /* Compute the overall number of tiles */ ntiles_x = (layout.width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; ntiles_y = (layout.height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; + ntiles = ntiles_x * ntiles_y; + + /* Compute the pixel size in the normalized image plane */ + pix_sz[0] = 1.0 / (double)layout.width; + pix_sz[1] = 1.0 / (double)layout.height; /* Adjust the #tiles for the morton-encoding procedure */ ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y)); @@ -662,8 +754,8 @@ htrdr_draw_radiance_sw #pragma omp section { - draw_image(htrdr, cam, spp, ntiles_x, ntiles_y, ntiles_adjusted, &work, - buf); + draw_image(htrdr, cam, spp, ntiles_x, ntiles_y, ntiles_adjusted, pix_sz, + &work, &layout, &tiles); /* The processes have no more work to do. Stop probing for thieves */ ATOMIC_SET(&probe_thieves, 0); } @@ -675,10 +767,19 @@ htrdr_draw_radiance_sw } /* Gather accum buffers from the group of processes */ - res = mpi_gather_buffer(htrdr, buf); + res = mpi_gather_tiles(htrdr, buf, ntiles, &tiles); if(res != RES_OK) goto error; exit: + { /* Free allocated tiles */ + struct list_node* node; + struct list_node* tmp; + LIST_FOR_EACH_SAFE(node, tmp, &tiles) { + struct tile* tile = CONTAINER_OF(node, struct tile, node); + list_del(node); + tile_ref_put(tile); + } + } proc_work_release(&work); return (res_T)res; error: