htrdr

Solving radiative transfer in heterogeneous media
git clone git://git.meso-star.fr/htrdr.git
Log | Files | Refs | README | LICENSE

commit f319647ac65b2c79f20c06726a422e3dc2ca9306
parent 2b49f2b63e8635f5334d77415c4279f308663243
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Fri, 26 Oct 2018 12:27:11 +0200

Upd the work distribution in MPI

Distribute the image tiles rather than the tile realisations. Note that
currently even though only a sub set of tiles are computed on a given
process, we still gather the whole tiled image of the processes toward
the master process.

Diffstat:
Msrc/htrdr.c | 59++++++++++++++++++++++++++++++++++++++---------------------
Msrc/htrdr_c.h | 14++++++++++++++
Msrc/htrdr_draw_radiance_sw.c | 73+++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Msrc/htrdr_sky.c | 12++++--------
4 files changed, 105 insertions(+), 53 deletions(-)

diff --git a/src/htrdr.c b/src/htrdr.c @@ -355,17 +355,6 @@ htrdr_init htrdr->cache_grids = 0; } - if((size_t)htrdr->mpi_nprocs > htrdr->spp) { - htrdr_log_err(htrdr, - "%s: insufficient number samples per pixel `%lu': it must be greater or " - "equal to the number of running processes, i.e. `%lu'.\n", - FUNC_NAME, - (unsigned long)htrdr->spp, - (unsigned long)htrdr->mpi_nprocs); - res = RES_BAD_ARG; - goto error; - } - if(!args->output) { htrdr->output = stdout; output_name = "<stdout>"; @@ -792,7 +781,7 @@ fetch_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_progress tag) { int8_t* progress = NULL; int iproc; - ASSERT(htrdr && progress && htrdr->mpi_rank == 0); + ASSERT(htrdr && htrdr->mpi_rank == 0); switch(tag) { case HTRDR_MPI_PROGRESS_BUILD_OCTREE: @@ -823,22 +812,50 @@ fetch_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_progress tag) void print_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_progress tag) { - int iproc; ASSERT(htrdr && htrdr->mpi_rank == 0); - FOR_EACH(iproc, 0, htrdr->mpi_nprocs) { + + if(htrdr->mpi_nprocs == 1) { switch(tag) { case HTRDR_MPI_PROGRESS_BUILD_OCTREE: - htrdr_fprintf(htrdr, stderr, - "\033[2K\rProcess %d -- building octree: %3d%%\n", - iproc, htrdr->mpi_progress_octree[iproc]); + htrdr_fprintf(htrdr, stderr, "\033[2K\rBuilding octree: %3d%%", + htrdr->mpi_progress_octree[0]); break; case HTRDR_MPI_PROGRESS_RENDERING: - htrdr_fprintf(htrdr, stderr, - "\033[2K\rProcess %d -- rendering: %3d%%\n", - iproc, htrdr->mpi_progress_render[iproc]); + htrdr_fprintf(htrdr, stderr, "\033[2K\rRendering: %3d%%", + htrdr->mpi_progress_render[0]); break; default: FATAL("Unreachable code.\n"); break; } + htrdr_fflush(htrdr, stderr); + } else { + int iproc; + FOR_EACH(iproc, 0, htrdr->mpi_nprocs) { + switch(tag) { + case HTRDR_MPI_PROGRESS_BUILD_OCTREE: + htrdr_fprintf(htrdr, stderr, + "\033[2K\rProcess %d -- building octree: %3d%%%c", + iproc, htrdr->mpi_progress_octree[iproc], + iproc == htrdr->mpi_nprocs - 1 ? '\r' : '\n'); + break; + case HTRDR_MPI_PROGRESS_RENDERING: + htrdr_fprintf(htrdr, stderr, + "\033[2K\rProcess %d -- rendering: %3d%%%c", + iproc, htrdr->mpi_progress_render[iproc], + iproc == htrdr->mpi_nprocs - 1 ? '\r' : '\n'); + break; + default: FATAL("Unreachable code.\n"); break; + } + } + } +} + +void +clear_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_progress tag) +{ + ASSERT(htrdr); + (void)tag; + if(htrdr->mpi_nprocs > 1) { + htrdr_fprintf(htrdr, stderr, "\033[%dA", htrdr->mpi_nprocs-1); } } @@ -848,7 +865,7 @@ total_mpi_progress(const struct htrdr* htrdr, const enum htrdr_mpi_progress tag) const int8_t* progress = NULL; int total = 0; int iproc; - ASSERT(htrdr && progress && htrdr->mpi_rank == 0); + ASSERT(htrdr && htrdr->mpi_rank == 0); switch(tag) { case HTRDR_MPI_PROGRESS_BUILD_OCTREE: diff --git a/src/htrdr_c.h b/src/htrdr_c.h @@ -119,10 +119,24 @@ print_mpi_progress (struct htrdr* htrdr, const enum htrdr_mpi_progress progress); +extern LOCAL_SYM void +clear_mpi_progress + (struct htrdr* htrdr, + const enum htrdr_mpi_progress progress); + extern int8_t total_mpi_progress (const struct htrdr* htrdr, const enum htrdr_mpi_progress progress); +static INLINE void +update_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_progress progress) +{ + ASSERT(htrdr); + fetch_mpi_progress(htrdr, progress); + clear_mpi_progress(htrdr, progress); + print_mpi_progress(htrdr, progress); +} + #endif /* HTRDR_C_H */ diff --git a/src/htrdr_draw_radiance_sw.c b/src/htrdr_draw_radiance_sw.c @@ -215,17 +215,19 @@ res_T htrdr_draw_radiance_sw (struct htrdr* htrdr, const struct htrdr_camera* cam, - const size_t total_spp, + const size_t spp, struct htrdr_buffer* buf) { struct ssp_rng_proxy* rng_proxy = NULL; struct ssp_rng** rngs = NULL; - size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted; + size_t ntiles_x, ntiles_y, ntiles_adjusted; size_t i; - int32_t mcode; /* Morton code of the tile */ + int64_t* proc_tiles = NULL; + int64_t itile; struct htrdr_buffer_layout layout; double pix_sz[2]; /* Pixel size in the normalized image plane */ - size_t spp; + size_t proc_ntiles; + size_t proc_ntiles_adjusted; ATOMIC nsolved_tiles = 0; ATOMIC res = RES_OK; ASSERT(htrdr && cam && buf); @@ -233,13 +235,6 @@ htrdr_draw_radiance_sw htrdr_buffer_get_layout(buf, &layout); ASSERT(layout.width || layout.height || layout.elmt_size); - spp = total_spp / (size_t)htrdr->mpi_nprocs; - - /* Add the remaining realisations to the 1st process */ - if(htrdr->mpi_rank == 0) { - spp += total_spp - (spp*(size_t)htrdr->mpi_nprocs); - } - if(layout.elmt_size != sizeof(struct htrdr_accum[3])/*#channels*/ || layout.alignment < ALIGNOF(struct htrdr_accum[3])) { htrdr_log_err(htrdr, @@ -283,11 +278,43 @@ htrdr_draw_radiance_sw ntiles_y = (layout.height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE; ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y)); ntiles_adjusted *= ntiles_adjusted; - ntiles = ntiles_x * ntiles_y; pix_sz[0] = 1.0 / (double)layout.width; pix_sz[1] = 1.0 / (double)layout.height; + /* Define the initial number of tiles of the current process */ + proc_ntiles = ntiles_adjusted / (size_t)htrdr->mpi_nprocs; + if(htrdr->mpi_rank == 0) {/* Affect the remaining tiles to the master proc */ + ASSERT(ntiles_adjusted >= proc_ntiles * (size_t)htrdr->mpi_nprocs); + proc_ntiles += ntiles_adjusted - proc_ntiles*(size_t)htrdr->mpi_nprocs; + } + + /* Allocate the per process list of tiles */ + proc_tiles = MEM_CALLOC(htrdr->allocator, proc_ntiles, sizeof(*proc_tiles)); + if(!proc_tiles) { + res = RES_MEM_ERR; + htrdr_log_err(htrdr, + "%s: could not allocate the per process list of tiles -- %s.\n", + FUNC_NAME, res_to_cstr((res_T)res)); + goto error; + } + + /* Define the initial list of tiles of the process */ + proc_ntiles_adjusted = 0; + FOR_EACH(itile, 0, proc_ntiles) { + size_t tile_org[2]; + int64_t mcode = htrdr->mpi_rank + (itile*htrdr->mpi_nprocs); + + /* Decode the morton code to retrieve the tile index */ + tile_org[0] = morton2D_decode((uint32_t)(mcode>>0)); + if(tile_org[0] >= ntiles_x) continue; /* Skip border tile */ + tile_org[1] = morton2D_decode((uint32_t)(mcode>>1)); + if(tile_org[1] >= ntiles_y) continue; /* Skip border tile */ + + proc_tiles[proc_ntiles_adjusted] = mcode; + proc_ntiles_adjusted++; + } + if(htrdr->mpi_rank == 0) { fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); @@ -295,9 +322,10 @@ htrdr_draw_radiance_sw omp_set_num_threads((int)htrdr->nthreads); #pragma omp parallel for schedule(static, 1/*chunck size*/) - for(mcode=0; mcode<(int64_t)ntiles_adjusted; ++mcode) { + for(itile=0; itile<(int64_t)proc_ntiles_adjusted; ++itile) { const int ithread = omp_get_thread_num(); struct ssp_rng* rng = rngs[ithread]; + int64_t mcode = proc_tiles[itile]; size_t tile_org[2]; size_t tile_sz[2]; size_t n; @@ -306,9 +334,8 @@ htrdr_draw_radiance_sw /* Decode the morton code to retrieve the tile index */ tile_org[0] = morton2D_decode((uint32_t)(mcode>>0)); - if(tile_org[0] >= ntiles_x) continue; /* Skip border tile */ tile_org[1] = morton2D_decode((uint32_t)(mcode>>1)); - if(tile_org[1] >= ntiles_y) continue; /* Skip border tile */ + ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y); /* Define the tile origin in pixel space */ tile_org[0] *= TILE_SIZE; @@ -326,19 +353,17 @@ htrdr_draw_radiance_sw } n = (size_t)ATOMIC_INCR(&nsolved_tiles); - pcent = (int8_t)(n * 100 / ntiles); + pcent = (int8_t)(n * 100 / proc_ntiles_adjusted); #pragma omp critical if(pcent > htrdr->mpi_progress_render[0]) { htrdr->mpi_progress_render[0] = pcent; - if(htrdr->mpi_rank != 0) { + if(htrdr->mpi_rank == 0) { + update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); + } else { /* Send the progress percentage of the process to the master process */ CHK(MPI_Send(&pcent, sizeof(pcent), MPI_CHAR, 0/*dst*/, HTRDR_MPI_PROGRESS_RENDERING/*tag*/, MPI_COMM_WORLD) == MPI_SUCCESS); - } else { - fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); - htrdr_fprintf(htrdr, stderr, "\033[%dA", htrdr->mpi_nprocs); - print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); } } @@ -347,11 +372,10 @@ htrdr_draw_radiance_sw if(htrdr->mpi_rank == 0) { while(total_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING) != 100) { - fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); - htrdr_fprintf(htrdr, stderr, "\033[%dA", htrdr->mpi_nprocs); - print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); + update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING); sleep(1); } + fprintf(stderr, "\n"); } /* Gather accum buffers from the group of processes */ @@ -360,6 +384,7 @@ htrdr_draw_radiance_sw exit: if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy)); + if(proc_tiles) MEM_RM(htrdr->allocator, proc_tiles); if(rngs) { FOR_EACH(i, 0, htrdr->nthreads) { if(rngs[i]) SSP(rng_ref_put(rngs[i])); diff --git a/src/htrdr_sky.c b/src/htrdr_sky.c @@ -1273,14 +1273,12 @@ setup_clouds #pragma omp critical if(pcent > sky->htrdr->mpi_progress_octree[0]) { sky->htrdr->mpi_progress_octree[0] = pcent; - if(sky->htrdr->mpi_rank != 0) { + if(sky->htrdr->mpi_rank == 0) { + update_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE); + } else { /* Send the progress percentage of the process to the master process */ CHK(MPI_Send(&pcent, sizeof(pcent), MPI_CHAR, 0/*dst*/, HTRDR_MPI_PROGRESS_BUILD_OCTREE, MPI_COMM_WORLD) == MPI_SUCCESS); - } else { - fetch_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE); - htrdr_fprintf(sky->htrdr, stderr, "\033[%dA", sky->htrdr->mpi_nprocs); - print_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE); } } } @@ -1288,9 +1286,7 @@ setup_clouds if(!sky->htrdr->cache_grids && sky->htrdr->mpi_rank == 0) { while(total_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE) != 100) { - fetch_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE); - htrdr_fprintf(sky->htrdr, stderr, "\033[%dA", sky->htrdr->mpi_nprocs); - print_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE); + update_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE); sleep(1); } }