Upd the work distribution in MPI - htrdr - Solving radiative transfer in heterogeneous media

commit f319647ac65b2c79f20c06726a422e3dc2ca9306
parent 2b49f2b63e8635f5334d77415c4279f308663243
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Fri, 26 Oct 2018 12:27:11 +0200

Upd the work distribution in MPI

Distribute the image tiles rather than the tile realisations. Note that
currently even though only a sub set of tiles are computed on a given
process, we still gather the whole tiled image of the processes toward
the master process.

Diffstat:
M src/htrdr.c  | 59 ++++++++++++++++++++++++++++++++++++++---------------------
M src/htrdr_c.h  | 14 ++++++++++++++
M src/htrdr_draw_radiance_sw.c  | 73 +++++++++++++++++++++++++++++++++++++++++++++++++------------------------
M src/htrdr_sky.c  | 12 ++++--------

4 files changed, 105 insertions(+), 53 deletions(-)
diff --git a/src/htrdr.c b/src/htrdr.c
@@ -355,17 +355,6 @@ htrdr_init
     htrdr->cache_grids = 0;
   }
 
-  if((size_t)htrdr->mpi_nprocs > htrdr->spp) {
-    htrdr_log_err(htrdr,
-      "%s: insufficient number samples per pixel `%lu': it must be greater or "
-      "equal to the number of running processes, i.e. `%lu'.\n",
-      FUNC_NAME,
-      (unsigned long)htrdr->spp,
-      (unsigned long)htrdr->mpi_nprocs);
-    res = RES_BAD_ARG;
-    goto error;
-  }
-
   if(!args->output) {
     htrdr->output = stdout;
     output_name = "<stdout>";
@@ -792,7 +781,7 @@ fetch_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_progress tag)
 {
   int8_t* progress = NULL;
   int iproc;
-  ASSERT(htrdr && progress && htrdr->mpi_rank == 0);
+  ASSERT(htrdr && htrdr->mpi_rank == 0);
 
   switch(tag) {
     case HTRDR_MPI_PROGRESS_BUILD_OCTREE: 
@@ -823,22 +812,50 @@ fetch_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_progress tag)
 void
 print_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_progress tag)
 {
-  int iproc;
   ASSERT(htrdr && htrdr->mpi_rank == 0);
-  FOR_EACH(iproc, 0, htrdr->mpi_nprocs) {
+
+  if(htrdr->mpi_nprocs == 1) {
     switch(tag) {
       case HTRDR_MPI_PROGRESS_BUILD_OCTREE:
-        htrdr_fprintf(htrdr, stderr,
-          "\033[2K\rProcess %d -- building octree: %3d%%\n",
-          iproc, htrdr->mpi_progress_octree[iproc]);
+        htrdr_fprintf(htrdr, stderr, "\033[2K\rBuilding octree: %3d%%",
+          htrdr->mpi_progress_octree[0]);
         break;
       case HTRDR_MPI_PROGRESS_RENDERING:
-        htrdr_fprintf(htrdr, stderr,
-          "\033[2K\rProcess %d -- rendering: %3d%%\n",
-          iproc, htrdr->mpi_progress_render[iproc]);
+        htrdr_fprintf(htrdr, stderr, "\033[2K\rRendering: %3d%%",
+          htrdr->mpi_progress_render[0]);
         break;
       default: FATAL("Unreachable code.\n"); break;
     }
+    htrdr_fflush(htrdr, stderr);
+  } else {
+    int iproc;
+    FOR_EACH(iproc, 0, htrdr->mpi_nprocs) {
+      switch(tag) {
+        case HTRDR_MPI_PROGRESS_BUILD_OCTREE:
+          htrdr_fprintf(htrdr, stderr,
+            "\033[2K\rProcess %d -- building octree: %3d%%%c",
+            iproc, htrdr->mpi_progress_octree[iproc],
+            iproc == htrdr->mpi_nprocs - 1 ? '\r' : '\n');
+          break;
+        case HTRDR_MPI_PROGRESS_RENDERING:
+          htrdr_fprintf(htrdr, stderr,
+            "\033[2K\rProcess %d -- rendering: %3d%%%c",
+            iproc, htrdr->mpi_progress_render[iproc],
+            iproc == htrdr->mpi_nprocs - 1 ? '\r' : '\n');
+          break;
+        default: FATAL("Unreachable code.\n"); break;
+      }
+    }
+  }
+}
+
+void
+clear_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_progress tag)
+{
+  ASSERT(htrdr);
+  (void)tag;
+  if(htrdr->mpi_nprocs > 1) {
+    htrdr_fprintf(htrdr, stderr, "\033[%dA", htrdr->mpi_nprocs-1);
   }
 }
 
@@ -848,7 +865,7 @@ total_mpi_progress(const struct htrdr* htrdr, const enum htrdr_mpi_progress tag)
   const int8_t* progress = NULL;
   int total = 0;
   int iproc;
-  ASSERT(htrdr && progress && htrdr->mpi_rank == 0);
+  ASSERT(htrdr && htrdr->mpi_rank == 0);
 
   switch(tag) {
     case HTRDR_MPI_PROGRESS_BUILD_OCTREE: 
diff --git a/src/htrdr_c.h b/src/htrdr_c.h
@@ -119,10 +119,24 @@ print_mpi_progress
   (struct htrdr* htrdr,
    const enum htrdr_mpi_progress progress);
 
+extern LOCAL_SYM void
+clear_mpi_progress
+  (struct htrdr* htrdr,
+   const enum htrdr_mpi_progress progress);
+
 extern int8_t
 total_mpi_progress
   (const struct htrdr* htrdr,
    const enum htrdr_mpi_progress progress);
 
+static INLINE void
+update_mpi_progress(struct htrdr* htrdr, const enum htrdr_mpi_progress progress)
+{
+  ASSERT(htrdr);
+  fetch_mpi_progress(htrdr, progress);
+  clear_mpi_progress(htrdr, progress);
+  print_mpi_progress(htrdr, progress);
+}
+
 #endif /* HTRDR_C_H */
 
diff --git a/src/htrdr_draw_radiance_sw.c b/src/htrdr_draw_radiance_sw.c
@@ -215,17 +215,19 @@ res_T
 htrdr_draw_radiance_sw
   (struct htrdr* htrdr,
    const struct htrdr_camera* cam,
-   const size_t total_spp,
+   const size_t spp,
    struct htrdr_buffer* buf)
 {
   struct ssp_rng_proxy* rng_proxy = NULL;
   struct ssp_rng** rngs = NULL;
-  size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted;
+  size_t ntiles_x, ntiles_y, ntiles_adjusted;
   size_t i;
-  int32_t mcode; /* Morton code of the tile */
+  int64_t* proc_tiles = NULL;
+  int64_t itile;
   struct htrdr_buffer_layout layout;
   double pix_sz[2]; /* Pixel size in the normalized image plane */
-  size_t spp;
+  size_t proc_ntiles;
+  size_t proc_ntiles_adjusted;
   ATOMIC nsolved_tiles = 0;
   ATOMIC res = RES_OK;
   ASSERT(htrdr && cam && buf);
@@ -233,13 +235,6 @@ htrdr_draw_radiance_sw
   htrdr_buffer_get_layout(buf, &layout);
   ASSERT(layout.width || layout.height || layout.elmt_size);
 
-  spp = total_spp / (size_t)htrdr->mpi_nprocs;
-
-  /* Add the remaining realisations to the 1st process */
-  if(htrdr->mpi_rank == 0) {
-    spp += total_spp - (spp*(size_t)htrdr->mpi_nprocs);
-  }
-
   if(layout.elmt_size != sizeof(struct htrdr_accum[3])/*#channels*/
   || layout.alignment < ALIGNOF(struct htrdr_accum[3])) {
     htrdr_log_err(htrdr,
@@ -283,11 +278,43 @@ htrdr_draw_radiance_sw
   ntiles_y = (layout.height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
   ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y));
   ntiles_adjusted *= ntiles_adjusted;
-  ntiles = ntiles_x * ntiles_y;
 
   pix_sz[0] = 1.0 / (double)layout.width;
   pix_sz[1] = 1.0 / (double)layout.height;
 
+  /* Define the initial number of tiles of the current process */
+  proc_ntiles = ntiles_adjusted / (size_t)htrdr->mpi_nprocs;
+  if(htrdr->mpi_rank == 0) {/* Affect the remaining tiles to the master proc */
+    ASSERT(ntiles_adjusted >= proc_ntiles * (size_t)htrdr->mpi_nprocs);
+    proc_ntiles += ntiles_adjusted - proc_ntiles*(size_t)htrdr->mpi_nprocs;
+  }
+
+  /* Allocate the per process list of tiles */
+  proc_tiles = MEM_CALLOC(htrdr->allocator, proc_ntiles, sizeof(*proc_tiles));
+  if(!proc_tiles) {
+    res = RES_MEM_ERR;
+    htrdr_log_err(htrdr, 
+      "%s: could not allocate the per process list of tiles -- %s.\n",
+      FUNC_NAME, res_to_cstr((res_T)res));
+    goto error;
+  }
+
+    /* Define the initial list of tiles of the process */
+  proc_ntiles_adjusted = 0;
+  FOR_EACH(itile, 0, proc_ntiles) {
+    size_t tile_org[2];
+    int64_t mcode = htrdr->mpi_rank + (itile*htrdr->mpi_nprocs);
+
+    /* Decode the morton code to retrieve the tile index  */
+    tile_org[0] = morton2D_decode((uint32_t)(mcode>>0));
+    if(tile_org[0] >= ntiles_x) continue; /* Skip border tile */
+    tile_org[1] = morton2D_decode((uint32_t)(mcode>>1));
+    if(tile_org[1] >= ntiles_y) continue; /* Skip border tile */
+
+    proc_tiles[proc_ntiles_adjusted] = mcode;
+    proc_ntiles_adjusted++;
+  }
+
   if(htrdr->mpi_rank == 0) {
     fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
     print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
@@ -295,9 +322,10 @@ htrdr_draw_radiance_sw
 
   omp_set_num_threads((int)htrdr->nthreads);
   #pragma omp parallel for schedule(static, 1/*chunck size*/)
-  for(mcode=0; mcode<(int64_t)ntiles_adjusted; ++mcode) {
+  for(itile=0; itile<(int64_t)proc_ntiles_adjusted; ++itile) {
     const int ithread = omp_get_thread_num();
     struct ssp_rng* rng = rngs[ithread];
+    int64_t mcode = proc_tiles[itile];
     size_t tile_org[2];
     size_t tile_sz[2];
     size_t n;
@@ -306,9 +334,8 @@ htrdr_draw_radiance_sw
 
     /* Decode the morton code to retrieve the tile index  */
     tile_org[0] = morton2D_decode((uint32_t)(mcode>>0));
-    if(tile_org[0] >= ntiles_x) continue; /* Skip border tile */
     tile_org[1] = morton2D_decode((uint32_t)(mcode>>1));
-    if(tile_org[1] >= ntiles_y) continue; /* Skip border tile */
+    ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y);
 
     /* Define the tile origin in pixel space */
     tile_org[0] *= TILE_SIZE;
@@ -326,19 +353,17 @@ htrdr_draw_radiance_sw
     }
 
     n = (size_t)ATOMIC_INCR(&nsolved_tiles);
-    pcent = (int8_t)(n * 100 / ntiles);
+    pcent = (int8_t)(n * 100 / proc_ntiles_adjusted);
 
     #pragma omp critical
     if(pcent > htrdr->mpi_progress_render[0]) {
       htrdr->mpi_progress_render[0] = pcent;
-      if(htrdr->mpi_rank != 0) {
+      if(htrdr->mpi_rank == 0) {
+        update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+      } else {
         /* Send the progress percentage of the process to the master process */
         CHK(MPI_Send(&pcent, sizeof(pcent), MPI_CHAR, 0/*dst*/,
           HTRDR_MPI_PROGRESS_RENDERING/*tag*/, MPI_COMM_WORLD) == MPI_SUCCESS);
-      } else {
-        fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
-        htrdr_fprintf(htrdr, stderr, "\033[%dA", htrdr->mpi_nprocs);
-        print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
       }
     }
 
@@ -347,11 +372,10 @@ htrdr_draw_radiance_sw
 
   if(htrdr->mpi_rank == 0) {
     while(total_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING) != 100) {
-      fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
-      htrdr_fprintf(htrdr, stderr, "\033[%dA", htrdr->mpi_nprocs);
-      print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+      update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
       sleep(1);
     }
+    fprintf(stderr, "\n");
   }
 
   /* Gather accum buffers from the group of processes */
@@ -360,6 +384,7 @@ htrdr_draw_radiance_sw
 
 exit:
   if(rng_proxy) SSP(rng_proxy_ref_put(rng_proxy));
+  if(proc_tiles) MEM_RM(htrdr->allocator, proc_tiles);
   if(rngs) {
     FOR_EACH(i, 0, htrdr->nthreads) {
       if(rngs[i]) SSP(rng_ref_put(rngs[i]));
diff --git a/src/htrdr_sky.c b/src/htrdr_sky.c
@@ -1273,14 +1273,12 @@ setup_clouds
       #pragma omp critical
       if(pcent > sky->htrdr->mpi_progress_octree[0]) {
         sky->htrdr->mpi_progress_octree[0] = pcent;
-        if(sky->htrdr->mpi_rank != 0) {
+        if(sky->htrdr->mpi_rank == 0) {
+          update_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE);
+        } else {
           /* Send the progress percentage of the process to the master process */
           CHK(MPI_Send(&pcent, sizeof(pcent), MPI_CHAR, 0/*dst*/,
             HTRDR_MPI_PROGRESS_BUILD_OCTREE, MPI_COMM_WORLD) == MPI_SUCCESS);
-        } else {
-          fetch_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE);
-          htrdr_fprintf(sky->htrdr, stderr, "\033[%dA", sky->htrdr->mpi_nprocs);
-          print_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE);
         }
       }
     }
@@ -1288,9 +1286,7 @@ setup_clouds
 
   if(!sky->htrdr->cache_grids && sky->htrdr->mpi_rank == 0) {
     while(total_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE) != 100) {
-      fetch_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE);
-      htrdr_fprintf(sky->htrdr, stderr, "\033[%dA", sky->htrdr->mpi_nprocs);
-      print_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE);
+      update_mpi_progress(sky->htrdr, HTRDR_MPI_PROGRESS_BUILD_OCTREE);
       sleep(1);
     }
   }

	htrdr Solving radiative transfer in heterogeneous media
	git clone git://git.meso-star.fr/htrdr.git
	Log \| Files \| Refs \| README \| LICENSE

M	src/htrdr.c	\|	59	++++++++++++++++++++++++++++++++++++++---------------------
M	src/htrdr_c.h	\|	14	++++++++++++++
M	src/htrdr_draw_radiance_sw.c	\|	73	+++++++++++++++++++++++++++++++++++++++++++++++++------------------------
M	src/htrdr_sky.c	\|	12	++++--------