Handle the generic sensor in the draw function - htrdr - Solving radiative transfer in heterogeneous media

commit 82dc870b3514ca1675d99bec881dfbe6071efc85
parent d2c338c4ff5ee08ad98e741697a83d6aef409bae
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Tue, 15 Sep 2020 12:20:32 +0200

Handle the generic sensor in the draw function

Implement the htrdr_sensor_sample_primary_ray function. Rename the
htrdr_draw_radiance function in htrdr_draw_map and refactor it to handle
the generic sensor.

Diffstat:
M cmake/CMakeLists.txt  | 3 ++-
M src/htrdr.c  | 9 +++++----
A src/htrdr_draw_map.c  | 1079 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D src/htrdr_draw_radiance.c  | 1081 -------------------------------------------------------------------------------
M src/htrdr_rectangle.c  | 9 +++++++++
M src/htrdr_rectangle.h  | 5 +++++
A src/htrdr_sensor.c  | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/htrdr_sensor.h  | 16 ++++++++++++++++
M src/htrdr_solve.h  | 4 ++--

9 files changed, 1233 insertions(+), 1088 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -94,7 +94,7 @@ set(HTRDR_FILES_SRC
   htrdr_cie_xyz.c
   htrdr_compute_radiance_sw.c
   htrdr_compute_radiance_lw.c
-  htrdr_draw_radiance.c
+  htrdr_draw_map.c
   htrdr_grid.c
   htrdr_ground.c
   htrdr_interface.c
@@ -102,6 +102,7 @@ set(HTRDR_FILES_SRC
   htrdr_mtl.c
   htrdr_ran_wlen.c
   htrdr_rectangle.c
+  htrdr_sensor.c
   htrdr_slab.c
   htrdr_spectral.c
   htrdr_sun.c)
diff --git a/src/htrdr.c b/src/htrdr.c
@@ -25,6 +25,7 @@
 #include "htrdr_ground.h"
 #include "htrdr_mtl.h"
 #include "htrdr_ran_wlen.h"
+#include "htrdr_rectangle.h"
 #include "htrdr_sun.h"
 #include "htrdr_solve.h"
 
@@ -427,7 +428,6 @@ htrdr_init
    struct htrdr* htrdr)
 {
   struct htsky_args htsky_args = HTSKY_ARGS_DEFAULT;
-  double proj_ratio;
   double sun_dir[3];
   double spectral_range[2];
   const char* output_name = NULL;
@@ -619,7 +619,8 @@ htrdr_release(struct htrdr* htrdr)
   if(htrdr->ground) htrdr_ground_ref_put(htrdr->ground);
   if(htrdr->sky) HTSKY(ref_put(htrdr->sky));
   if(htrdr->sun) htrdr_sun_ref_put(htrdr->sun);
-  if(htrdr->cam) htrdr_camera_ref_put(htrdr->cam);
+  if(htrdr->sensor.camera) htrdr_camera_ref_put(htrdr->sensor.camera);
+  if(htrdr->sensor.rectangle) htrdr_rectangle_ref_put(htrdr->sensor.rectangle);
   if(htrdr->buf) htrdr_buffer_ref_put(htrdr->buf);
   if(htrdr->mtl) htrdr_mtl_ref_put(htrdr->mtl);
   if(htrdr->cie) htrdr_cie_xyz_ref_put(htrdr->cie);
@@ -659,8 +660,8 @@ htrdr_run(struct htrdr* htrdr)
       }
     }
   } else {
-    res = htrdr_draw_radiance(htrdr, htrdr->cam, htrdr->width,
-      htrdr->height, htrdr->spp, htrdr->buf);
+    res = htrdr_draw_map(htrdr, &htrdr->sensor, htrdr->width, htrdr->height,
+      htrdr->spp, htrdr->buf);
     if(res != RES_OK) goto error;
     if(htrdr->mpi_rank == 0) {
       struct htrdr_accum path_time_acc = HTRDR_ACCUM_NULL;
diff --git a/src/htrdr_draw_map.c b/src/htrdr_draw_map.c
@@ -0,0 +1,1079 @@
+/* Copyright (C) 2018, 2019, 2020 |Meso|Star> (contact@meso-star.com)
+ * Copyright (C) 2018, 2019 CNRS, Université Paul Sabatier
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#define _POSIX_C_SOURCE 200112L /* nanosleep && nextafter */
+
+#include "htrdr.h"
+#include "htrdr_c.h"
+#include "htrdr_buffer.h"
+#include "htrdr_camera.h"
+#include "htrdr_cie_xyz.h"
+#include "htrdr_ran_wlen.h"
+#include "htrdr_solve.h"
+
+#include <high_tune/htsky.h>
+
+#include <rsys/algorithm.h>
+#include <rsys/clock_time.h>
+#include <rsys/cstr.h>
+#include <rsys/dynamic_array_u32.h>
+#include <rsys/math.h>
+#include <rsys/mutex.h>
+#include <star/ssp.h>
+
+#include <omp.h>
+#include <mpi.h>
+#include <time.h>
+#include <unistd.h>
+
+#define RNG_SEQUENCE_SIZE 10000
+
+#define TILE_MCODE_NULL UINT32_MAX
+#define TILE_SIZE 32 /* Definition in X & Y of a tile */
+STATIC_ASSERT(IS_POW2(TILE_SIZE), TILE_SIZE_must_be_a_power_of_2);
+
+enum pixel_format {
+  PIXEL_XWAVE,
+  PIXEL_IMAGE
+};
+
+union pixel {
+  struct htrdr_pixel_xwave xwave;
+  struct htrdr_pixel_image image;
+};
+
+/* Tile of row ordered image pixels */
+struct tile {
+  struct list_node node;
+  struct mem_allocator* allocator;
+  ref_T ref;
+
+  struct tile_data {
+    uint16_t x, y; /* 2D coordinates of the tile in tile space */
+    enum pixel_format format;
+    /* Simulate the flexible array member of the C99 standard. */
+    union pixel pixels[1/*Dummy element*/];
+  } data;
+};
+
+/* List of tile to compute onto the MPI process. */
+struct proc_work {
+  struct mutex* mutex;
+  struct darray_u32 tiles; /* #tiles to render */
+  size_t itile; /* Next tile to render in the above list of tiles */
+};
+
+/*******************************************************************************
+ * Helper functions
+ ******************************************************************************/
+static FINLINE uint16_t
+morton2D_decode(const uint32_t u32)
+{
+  uint32_t x = u32 & 0x55555555;
+  x = (x | (x >> 1)) & 0x33333333;
+  x = (x | (x >> 2)) & 0x0F0F0F0F;
+  x = (x | (x >> 4)) & 0x00FF00FF;
+  x = (x | (x >> 8)) & 0x0000FFFF;
+  return (uint16_t)x;
+}
+
+static FINLINE uint32_t
+morton2D_encode(const uint16_t u16)
+{
+  uint32_t u32 = u16;
+  u32 = (u32 | (u32 << 8)) & 0x00FF00FF;
+  u32 = (u32 | (u32 << 4)) & 0X0F0F0F0F;
+  u32 = (u32 | (u32 << 2)) & 0x33333333;
+  u32 = (u32 | (u32 << 1)) & 0x55555555;
+  return u32;
+}
+
+static INLINE enum pixel_format
+spectral_type_to_pixfmt(const enum htrdr_spectral_type spectral_type)
+{
+  enum pixel_format pixfmt;
+  switch(spectral_type) {
+    case HTRDR_SPECTRAL_LW: pixfmt = PIXEL_XWAVE; break;
+    case HTRDR_SPECTRAL_SW: pixfmt = PIXEL_XWAVE; break;
+    case HTRDR_SPECTRAL_SW_CIE_XYZ: pixfmt = PIXEL_IMAGE; break;
+    default: FATAL("Unreachable code.\n"); break;
+  }
+  return pixfmt;
+}
+
+static FINLINE struct tile*
+tile_create(struct mem_allocator* allocator, const enum pixel_format fmt)
+{
+  struct tile* tile;
+  const size_t tile_sz =
+    sizeof(struct tile) - sizeof(union pixel)/*rm dummy pixel*/;
+  const size_t buf_sz = /* Flexiblbe array element */
+    TILE_SIZE*TILE_SIZE*sizeof(union pixel);
+  ASSERT(allocator);
+
+  tile = MEM_ALLOC(allocator, tile_sz+buf_sz);
+  if(!tile) return NULL;
+
+  tile->data.format = fmt;
+
+  ref_init(&tile->ref);
+  list_init(&tile->node);
+  tile->allocator = allocator;
+  ASSERT(IS_ALIGNED(&tile->data.pixels, ALIGNOF(union pixel)));
+
+  return tile;
+}
+
+static INLINE void
+tile_ref_get(struct tile* tile)
+{
+  ASSERT(tile);
+  tile_ref_get(tile);
+}
+
+static INLINE void
+release_tile(ref_T* ref)
+{
+  struct tile* tile = CONTAINER_OF(ref, struct tile, ref);
+  ASSERT(ref);
+  MEM_RM(tile->allocator, tile);
+}
+
+static INLINE void
+tile_ref_put(struct tile* tile)
+{
+  ASSERT(tile);
+  ref_put(&tile->ref, release_tile);
+}
+
+static FINLINE union pixel*
+tile_at
+  (struct tile* tile,
+   const size_t x, /* In tile space */
+   const size_t y) /* In tile space */
+{
+  ASSERT(tile && x < TILE_SIZE && y < TILE_SIZE);
+  return tile->data.pixels + (y*TILE_SIZE + x);
+}
+
+static void
+write_tile_data
+  (struct htrdr* htrdr,
+   struct htrdr_buffer* buf,
+   const struct tile_data* tile_data)
+{
+  struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL;
+  size_t icol, irow;
+  size_t irow_tile;
+  size_t ncols_tile, nrows_tile;
+  char* buf_mem;
+  ASSERT(htrdr && buf && tile_data);
+  (void)htrdr;
+
+  htrdr_buffer_get_layout(buf, &layout);
+  buf_mem = htrdr_buffer_get_data(buf);
+  ASSERT(layout.elmt_size==htrdr_spectral_type_get_pixsz(htrdr->spectral_type));
+
+  /* Compute the row/column of the tile origin into the buffer */
+  icol = tile_data->x * (size_t)TILE_SIZE;
+  irow = tile_data->y * (size_t)TILE_SIZE;
+
+  /* Define the number of tile row/columns to write into the buffer */
+  ncols_tile = MMIN(icol + TILE_SIZE, layout.width)  - icol;
+  nrows_tile = MMIN(irow + TILE_SIZE, layout.height) - irow;
+
+  /* Copy the row ordered tile data */
+  FOR_EACH(irow_tile, 0, nrows_tile) {
+    char* buf_row = buf_mem + (irow + irow_tile) * layout.pitch;
+    char* buf_col = buf_row + icol * layout.elmt_size;
+    const union pixel* tile_row = tile_data->pixels + irow_tile*TILE_SIZE;
+    size_t x;
+
+    FOR_EACH(x, 0, ncols_tile) {
+      switch(tile_data->format) {
+        case PIXEL_XWAVE:
+          ((struct htrdr_pixel_xwave*)buf_col)[x] = tile_row[x].xwave;
+          break;
+        case PIXEL_IMAGE:
+          ((struct htrdr_pixel_image*)buf_col)[x] = tile_row[x].image;
+          break;
+        default: FATAL("Unreachable code.\n"); break;
+      }
+    }
+  }
+}
+
+static INLINE void
+proc_work_init(struct mem_allocator* allocator, struct proc_work* work)
+{
+  ASSERT(work);
+  darray_u32_init(allocator, &work->tiles);
+  work->itile = 0;
+  CHK(work->mutex = mutex_create());
+}
+
+static INLINE void
+proc_work_release(struct proc_work* work)
+{
+  darray_u32_release(&work->tiles);
+  mutex_destroy(work->mutex);
+}
+
+static INLINE void
+proc_work_reset(struct proc_work* work)
+{
+  ASSERT(work);
+  mutex_lock(work->mutex);
+  darray_u32_clear(&work->tiles);
+  work->itile = 0;
+  mutex_unlock(work->mutex);
+}
+
+static INLINE void
+proc_work_add_tile(struct proc_work* work, const uint32_t mcode)
+{
+  mutex_lock(work->mutex);
+  CHK(darray_u32_push_back(&work->tiles, &mcode) == RES_OK);
+  mutex_unlock(work->mutex);
+}
+
+static INLINE uint32_t
+proc_work_get_tile(struct proc_work* work)
+{
+  uint32_t mcode;
+  ASSERT(work);
+  mutex_lock(work->mutex);
+  if(work->itile >= darray_u32_size_get(&work->tiles)) {
+    mcode = TILE_MCODE_NULL;
+  } else {
+    mcode = darray_u32_cdata_get(&work->tiles)[work->itile];
+    ++work->itile;
+  }
+  mutex_unlock(work->mutex);
+  return mcode;
+}
+
+static INLINE size_t
+proc_work_get_ntiles(struct proc_work* work)
+{
+  size_t sz = 0;
+  ASSERT(work);
+  mutex_lock(work->mutex);
+  sz = darray_u32_size_get(&work->tiles);
+  mutex_unlock(work->mutex);
+  return sz;
+}
+
+static void
+mpi_wait_for_request(struct htrdr* htrdr, MPI_Request* req)
+{
+  ASSERT(htrdr && req);
+
+  /* Wait for process synchronisation */
+  for(;;) {
+    struct timespec t;
+    int complete;
+    t.tv_sec = 0;
+    t.tv_nsec = 10000000; /* 10ms */
+
+    mutex_lock(htrdr->mpi_mutex);
+    MPI(Test(req, &complete, MPI_STATUS_IGNORE));
+    mutex_unlock(htrdr->mpi_mutex);
+    if(complete) break;
+
+    nanosleep(&t, NULL);
+  }
+}
+
+static void
+mpi_probe_thieves
+  (struct htrdr* htrdr,
+   struct proc_work* work,
+   ATOMIC* probe_thieves)
+{
+  uint32_t tiles[UINT8_MAX];
+  struct timespec t;
+  ASSERT(htrdr && work && probe_thieves);
+
+  if(htrdr->mpi_nprocs == 1) /* The process is alone. No thief is possible */
+    return;
+
+  t.tv_sec = 0;
+
+  /* Protect MPI calls of multiple invocations from concurrent threads */
+  #define P_MPI(Func) {                                                        \
+    mutex_lock(htrdr->mpi_mutex);                                              \
+    MPI(Func);                                                                 \
+    mutex_unlock(htrdr->mpi_mutex);                                            \
+  } (void)0
+
+  while(ATOMIC_GET(probe_thieves)) {
+    MPI_Status status;
+    size_t itile;
+    int msg;
+
+    /* Probe if a steal request was submitted by any processes */
+    P_MPI(Iprobe(MPI_ANY_SOURCE, HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &msg,
+      &status));
+
+    if(msg) { /* A steal request was posted */
+      MPI_Request req;
+      uint8_t ntiles_to_steal;
+
+      /* Asynchronously receive the steal request */
+      P_MPI(Irecv(&ntiles_to_steal, 1, MPI_UINT8_T, status.MPI_SOURCE,
+        HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &req));
+
+      /* Wait for the completion of the steal request */
+      mpi_wait_for_request(htrdr, &req);
+
+      /* Thief some tiles */
+      FOR_EACH(itile, 0, ntiles_to_steal) {
+        tiles[itile] = proc_work_get_tile(work);
+      }
+      P_MPI(Send(&tiles, ntiles_to_steal, MPI_UINT32_T, status.MPI_SOURCE,
+        HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD));
+    }
+    t.tv_nsec = 500000000; /* 500ms */
+    nanosleep(&t, NULL);
+  }
+  #undef P_MPI
+}
+
+static int
+mpi_sample_working_process(struct htrdr* htrdr, struct ssp_rng* rng)
+{
+  int iproc, i;
+  int dst_rank;
+  ASSERT(htrdr && rng && htrdr->mpi_nworking_procs);
+
+  /* Sample the index of the 1st active process */
+  iproc = (int)(ssp_rng_canonical(rng) * (double)htrdr->mpi_nworking_procs);
+
+  /* Find the rank of the sampled active process. Use a simple linear search
+   * since the overall number of processes should be quite low; at most few
+   * dozens.  */
+  i = 0;
+  FOR_EACH(dst_rank, 0, htrdr->mpi_nprocs) {
+    if(htrdr->mpi_working_procs[dst_rank] == 0) continue; /* Inactive process */
+    if(i == iproc) break; /* The rank of the sampled process is found */
+    ++i;
+  }
+  ASSERT(dst_rank < htrdr->mpi_nprocs);
+  return dst_rank;
+}
+
+/* Return the number of stolen tiles */
+static size_t
+mpi_steal_work
+  (struct htrdr* htrdr,
+   struct ssp_rng* rng,
+   struct proc_work* work)
+{
+  MPI_Request req;
+  size_t itile;
+  size_t nthieves = 0;
+  uint32_t tiles[UINT8_MAX]; /* Morton code of the stolen tile */
+  int proc_to_steal; /* Process to steal */
+  uint8_t ntiles_to_steal = MMIN((uint8_t)(htrdr->nthreads*2), 16);
+  ASSERT(htrdr && rng && work && htrdr->nthreads < UINT8_MAX);
+
+  /* Protect MPI calls of multiple invocations from concurrent threads */
+  #define P_MPI(Func) {                                                        \
+    mutex_lock(htrdr->mpi_mutex);                                              \
+    MPI(Func);                                                                 \
+    mutex_unlock(htrdr->mpi_mutex);                                            \
+  } (void)0
+
+  /* No more working process => nohting to steal */
+  if(!htrdr->mpi_nworking_procs) return 0;
+
+  /* Sample a process to steal */
+  proc_to_steal = mpi_sample_working_process(htrdr, rng);
+
+  /* Send a steal request to the sampled process and wait for a response */
+  P_MPI(Send(&ntiles_to_steal, 1, MPI_UINT8_T, proc_to_steal,
+    HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD));
+
+  /* Receive the stolen tile from the sampled process */
+  P_MPI(Irecv(tiles, ntiles_to_steal, MPI_UINT32_T, proc_to_steal,
+    HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD, &req));
+
+  mpi_wait_for_request(htrdr, &req);
+
+  FOR_EACH(itile, 0, ntiles_to_steal) {
+    if(tiles[itile] == TILE_MCODE_NULL) {
+      ASSERT(htrdr->mpi_working_procs[proc_to_steal] != 0);
+      htrdr->mpi_working_procs[proc_to_steal] = 0;
+      htrdr->mpi_nworking_procs--;
+      break;
+    }
+    proc_work_add_tile(work, tiles[itile]);
+    ++nthieves;
+  }
+  #undef P_MPI
+  return nthieves;
+}
+
+static res_T
+mpi_gather_tiles
+  (struct htrdr* htrdr,
+   struct htrdr_buffer* buf,
+   const size_t ntiles,
+   struct list_node* tiles)
+{
+  /* Compute the size of the tile_data */
+  const size_t msg_sz =
+    sizeof(struct tile_data) - sizeof(union pixel)/*dummy*/
+  + TILE_SIZE*TILE_SIZE*sizeof(union pixel);
+
+  struct list_node* node = NULL;
+  struct tile* tile = NULL;
+  res_T res = RES_OK;
+  ASSERT(htrdr && tiles);
+  ASSERT(htrdr->mpi_rank != 0 || buf);
+  (void)ntiles;
+
+  if(htrdr->mpi_rank != 0) { /* Non master process */
+    /* Send the computed tile to the master process */
+    LIST_FOR_EACH(node, tiles) {
+      struct tile* t = CONTAINER_OF(node, struct tile, node);
+      MPI(Send(&t->data, (int)msg_sz, MPI_CHAR, 0,
+        HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD));
+    }
+  } else { /* Master process */
+    size_t itile = 0;
+
+    LIST_FOR_EACH(node, tiles) {
+      struct tile* t = CONTAINER_OF(node, struct tile, node);
+      write_tile_data(htrdr, buf, &t->data);
+      ++itile;
+    }
+
+    if(itile != ntiles) {
+      enum pixel_format pixfmt;
+      ASSERT(htrdr->mpi_nprocs > 1);
+
+      /* Create a temporary tile to receive the tile data computed by the
+       * concurrent MPI processes */
+      pixfmt = spectral_type_to_pixfmt(htrdr->spectral_type);
+      tile = tile_create(htrdr->allocator, pixfmt);
+      if(!tile) {
+        res = RES_MEM_ERR;
+        htrdr_log_err(htrdr,
+          "could not allocate the temporary tile used to gather the process "
+          "output data -- %s.\n", res_to_cstr(res));
+        goto error;
+      }
+
+      /* Receive the tile data of the concurrent MPI processes */
+      FOR_EACH(itile, itile, ntiles) {
+        MPI(Recv(&tile->data, (int)msg_sz, MPI_CHAR, MPI_ANY_SOURCE,
+          HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE));
+        write_tile_data(htrdr, buf, &tile->data);
+      }
+    }
+  }
+
+exit:
+  if(tile) tile_ref_put(tile);
+  return res;
+error:
+  goto exit;
+}
+
+static void
+draw_pixel_image
+  (struct htrdr* htrdr,
+   const size_t ithread,
+   const size_t ipix[2],
+   const double pix_sz[2], /* Size of a pixel in the normalized image plane */
+   const struct htrdr_camera* cam,
+   const size_t spp,
+   struct ssp_rng* rng,
+   struct htrdr_pixel_image* pixel)
+{
+  struct htrdr_accum XYZ[3]; /* X, Y, and Z */
+  struct htrdr_accum time;
+  size_t ichannel;
+  ASSERT(ipix && ipix && pix_sz && cam && rng && pixel);
+  ASSERT(htrdr->spectral_type == HTRDR_SPECTRAL_SW_CIE_XYZ);
+
+  /* Reset accumulators */
+  XYZ[0] = HTRDR_ACCUM_NULL;
+  XYZ[1] = HTRDR_ACCUM_NULL;
+  XYZ[2] = HTRDR_ACCUM_NULL;
+  time = HTRDR_ACCUM_NULL;
+
+  FOR_EACH(ichannel, 0, 3) {
+    size_t isamp;
+
+    FOR_EACH(isamp, 0, spp) {
+      struct time t0, t1;
+      double pix_samp[2];
+      double ray_org[3];
+      double ray_dir[3];
+      double weight;
+      double r0, r1, r2;
+      double wlen; /* Sampled wavelength into the spectral band */
+      double pdf;
+      size_t iband; /* Sampled spectral band */
+      size_t iquad; /* Sampled quadrature point into the spectral band */
+      double usec;
+
+      /* Begin the registration of the time spent to in the realisation */
+      time_current(&t0);
+
+      /* Sample a position into the pixel, in the normalized image plane */
+      pix_samp[0] = ((double)ipix[0] + ssp_rng_canonical(rng)) * pix_sz[0];
+      pix_samp[1] = ((double)ipix[1] + ssp_rng_canonical(rng)) * pix_sz[1];
+
+      /* Generate a ray starting from the pinhole camera and passing through the
+       * pixel sample */
+      htrdr_camera_ray(cam, pix_samp, ray_org, ray_dir);
+
+      r0 = ssp_rng_canonical(rng);
+      r1 = ssp_rng_canonical(rng);
+      r2 = ssp_rng_canonical(rng);
+
+      /* Sample a spectral band and a quadrature point */
+      switch(ichannel) {
+        case 0: wlen = htrdr_cie_xyz_sample_X(htrdr->cie, r0, r1, &pdf); break;
+        case 1: wlen = htrdr_cie_xyz_sample_Y(htrdr->cie, r0, r1, &pdf); break;
+        case 2: wlen = htrdr_cie_xyz_sample_Z(htrdr->cie, r0, r1, &pdf); break;
+        default: FATAL("Unreachable code.\n"); break;
+      }
+
+      iband = htsky_find_spectral_band(htrdr->sky, wlen);
+      iquad = htsky_spectral_band_sample_quadrature(htrdr->sky, r2, iband);
+
+      /* Compute the radiance in W/m^2/sr/m */
+      weight = htrdr_compute_radiance_sw
+        (htrdr, ithread, rng, ray_org, ray_dir, wlen, iband, iquad);
+      ASSERT(weight >= 0);
+
+      pdf *= 1.e9; /* Transform the pdf from nm^-1 to m^-1 */
+      weight /= pdf; /* In W/m^2/sr */
+
+      /* End the registration of the per realisation time */
+      time_sub(&t0, time_current(&t1), &t0);
+      usec = (double)time_val(&t0, TIME_NSEC) * 0.001;
+
+      /* Update the pixel accumulator of the current channel */
+      XYZ[ichannel].sum_weights += weight;
+      XYZ[ichannel].sum_weights_sqr += weight*weight;
+      XYZ[ichannel].nweights += 1;
+
+      /* Update the pixel accumulator of per realisation time */
+      time.sum_weights += usec;
+      time.sum_weights_sqr += usec*usec;
+      time.nweights += 1;
+    }
+  }
+
+  /* Flush pixel data */
+  htrdr_accum_get_estimation(XYZ+0, &pixel->X);
+  htrdr_accum_get_estimation(XYZ+1, &pixel->Y);
+  htrdr_accum_get_estimation(XYZ+2, &pixel->Z);
+  pixel->time = time;
+}
+
+
+static INLINE double
+radiance_temperature
+  (struct htrdr* htrdr,
+   const double radiance) /* In W/m^2/sr */
+{
+  double temperature = 0;
+  double radiance_avg = radiance;
+  res_T res = RES_OK;
+  ASSERT(htrdr && radiance >= 0);
+
+  /* From integrated radiance to average radiance in W/m^2/sr/m */
+  if(htrdr->wlen_range_m[0] != htrdr->wlen_range_m[1]) { /* !monochromatic */
+    radiance_avg /= (htrdr->wlen_range_m[1] - htrdr->wlen_range_m[0]);
+  }
+
+  res = brightness_temperature
+    (htrdr,
+     htrdr->wlen_range_m[0],
+     htrdr->wlen_range_m[1],
+     radiance_avg,
+     &temperature);
+  if(res != RES_OK) {
+    htrdr_log_warn(htrdr,
+      "Could not compute the brightness temperature for the radiance %g.\n",
+       radiance_avg);
+    temperature = 0;
+  }
+  return temperature;
+}
+
+static void
+draw_pixel_xwave
+  (struct htrdr* htrdr,
+   const size_t ithread,
+   const size_t ipix[2],
+   const double pix_sz[2], /* Size of a pixel in the normalized image plane */
+   const struct htrdr_sensor* sensor,
+   const size_t spp,
+   struct ssp_rng* rng,
+   struct htrdr_pixel_xwave* pixel)
+{
+  struct htrdr_accum radiance;
+  struct htrdr_accum time;
+  size_t isamp;
+  double temp_min, temp_max;
+  ASSERT(ipix && ipix && pix_sz && sensor && rng && pixel);
+  ASSERT(htrdr->spectral_type == HTRDR_SPECTRAL_LW 
+      || htrdr->spectral_type == HTRDR_SPECTRAL_SW);
+
+  /* Reset the pixel accumulators */
+  radiance = HTRDR_ACCUM_NULL;
+  time = HTRDR_ACCUM_NULL;
+
+  FOR_EACH(isamp, 0, spp) {
+    struct time t0, t1;
+    double ray_org[3];
+    double ray_dir[3];
+    double weight;
+    double r0, r1, r2;
+    double wlen;
+    size_t iband;
+    size_t iquad;
+    double usec;
+    double band_pdf;
+    res_T res = RES_OK;
+
+    /* Begin the registration of the time spent in the realisation */
+    time_current(&t0);
+
+    res = htrdr_sensor_sample_primary_ray(&htrdr->sensor, htrdr->ground, ipix,
+      pix_sz, rng, ray_org, ray_dir);
+    if(res != RES_OK) continue; /* Reject the current sample */
+
+    r0 = ssp_rng_canonical(rng);
+    r1 = ssp_rng_canonical(rng);
+    r2 = ssp_rng_canonical(rng);
+
+    /* Sample a wavelength */
+    wlen = htrdr_ran_wlen_sample(htrdr->ran_wlen, r0, r1, &band_pdf);
+
+    /* Select the associated band and sample a quadrature point */
+    iband = htsky_find_spectral_band(htrdr->sky, wlen);
+    iquad = htsky_spectral_band_sample_quadrature(htrdr->sky, r2, iband);
+
+    /* Compute the spectral radiance in W/m^2/sr/m */
+    switch(htrdr->spectral_type) {
+      case HTRDR_SPECTRAL_LW:
+        weight = htrdr_compute_radiance_lw(htrdr, ithread, rng, ray_org,
+          ray_dir, wlen, iband, iquad);
+        break;
+      case HTRDR_SPECTRAL_SW:
+        weight = htrdr_compute_radiance_sw(htrdr, ithread, rng, ray_org,
+          ray_dir, wlen, iband, iquad);
+        break;
+      default: FATAL("Unreachable code.\n"); break;
+    }
+    ASSERT(weight >= 0);
+
+    /* Importance sampling: correct weight with pdf */
+    weight /= band_pdf; /* In W/m^2/sr */
+
+    /* End the registration of the per realisation time */
+    time_sub(&t0, time_current(&t1), &t0);
+    usec = (double)time_val(&t0, TIME_NSEC) * 0.001;
+
+    /* Update the pixel accumulator of the current channel */
+    radiance.sum_weights += weight;
+    radiance.sum_weights_sqr += weight*weight;
+    radiance.nweights += 1;
+
+    /* Update the pixel accumulator of per realisation time */
+    time.sum_weights += usec;
+    time.sum_weights_sqr += usec*usec;
+    time.nweights += 1;
+  }
+
+  /* Compute the estimation of the pixel radiance */
+  htrdr_accum_get_estimation(&radiance, &pixel->radiance);
+
+  /* Save the per realisation integration time */
+  pixel->time = time;
+
+  /* Compute the brightness_temperature of the pixel and estimate its standard
+   * error if the sources were in the medium (<=> longwave) */
+  if(htrdr->spectral_type == HTRDR_SPECTRAL_LW) {
+    pixel->radiance_temperature.E = radiance_temperature(htrdr, pixel->radiance.E);
+    temp_min = radiance_temperature(htrdr, pixel->radiance.E - pixel->radiance.SE);
+    temp_max = radiance_temperature(htrdr, pixel->radiance.E + pixel->radiance.SE);
+    pixel->radiance_temperature.SE = temp_max - temp_min;
+  }
+}
+
+static res_T
+draw_tile
+  (struct htrdr* htrdr,
+   const size_t ithread,
+   const int64_t tile_mcode, /* For debug only */
+   const size_t tile_org[2], /* Origin of the tile in pixel space */
+   const size_t tile_sz[2], /* Definition of the tile */
+   const double pix_sz[2], /* Size of a pixel in the normalized image plane */
+   const struct htrdr_sensor* sensor,
+   const size_t spp, /* #samples per pixel */
+   struct ssp_rng* rng,
+   struct tile* tile)
+{
+  size_t npixels;
+  size_t mcode; /* Morton code of tile pixel */
+  ASSERT(htrdr && tile_org && tile_sz && pix_sz && sensor && spp && tile);
+  (void)tile_mcode;
+  /* Adjust the #pixels to process them wrt a morton order */
+  npixels = round_up_pow2(MMAX(tile_sz[0], tile_sz[1]));
+  npixels *= npixels;
+
+  FOR_EACH(mcode, 0, npixels) {
+    union pixel* pixel;
+    size_t ipix_tile[2]; /* Pixel coord in the tile */
+    size_t ipix[2]; /* Pixel coord in the buffer */
+
+    ipix_tile[0] = morton2D_decode((uint32_t)(mcode>>0));
+    if(ipix_tile[0] >= tile_sz[0]) continue; /* Pixel is out of tile */
+    ipix_tile[1] = morton2D_decode((uint32_t)(mcode>>1));
+    if(ipix_tile[1] >= tile_sz[1]) continue; /* Pixel is out of tile */
+
+    /* Fetch and reset the pixel accumulator */
+    pixel = tile_at(tile, ipix_tile[0], ipix_tile[1]);
+
+    /* Compute the pixel coordinate */
+    ipix[0] = tile_org[0] + ipix_tile[0];
+    ipix[1] = tile_org[1] + ipix_tile[1];
+
+    /* Draw the pixel */
+    switch(htrdr->spectral_type) {
+      case HTRDR_SPECTRAL_LW:
+      case HTRDR_SPECTRAL_SW:
+        draw_pixel_xwave(htrdr, ithread, ipix, pix_sz, sensor, spp, rng, &pixel->xwave);
+        break;
+      case HTRDR_SPECTRAL_SW_CIE_XYZ:
+        ASSERT(sensor->type == HTRDR_SENSOR_CAMERA); /* TODO handle this */
+        draw_pixel_image(htrdr, ithread, ipix, pix_sz, sensor->camera, spp,
+          rng, &pixel->image);
+        break;
+      default: FATAL("Unreachable code.\n"); break;
+    }
+  }
+  return RES_OK;
+}
+
+static res_T
+draw_image
+  (struct htrdr* htrdr,
+   const struct htrdr_sensor* sensor,
+   const size_t width, /* Image width */
+   const size_t height, /* Image height */
+   const size_t spp,
+   const size_t ntiles_x,
+   const size_t ntiles_y,
+   const size_t ntiles_adjusted,
+   const double pix_sz[2], /* Pixel size in the normalized image plane */
+   struct proc_work* work,
+   struct list_node* tiles)
+{
+  struct ssp_rng* rng_proc = NULL;
+  size_t nthreads = 0;
+  size_t nthieves = 0;
+  size_t proc_ntiles = 0;
+  enum pixel_format pixfmt;
+  ATOMIC nsolved_tiles = 0;
+  ATOMIC res = RES_OK;
+  ASSERT(htrdr && sensor && spp && ntiles_adjusted && work && tiles);
+  ASSERT(pix_sz && pix_sz[0] > 0 && pix_sz[1] > 0);
+  ASSERT(width && height);
+  (void)ntiles_x, (void)ntiles_y;
+
+  res = ssp_rng_create(htrdr->allocator, &ssp_rng_mt19937_64, &rng_proc);
+  if(res != RES_OK) {
+    htrdr_log_err(htrdr, "could not create the RNG used to sample a process "
+      "to steal -- %s.\n", res_to_cstr((res_T)res));
+    goto error;
+  }
+
+  proc_ntiles = proc_work_get_ntiles(work);
+  nthreads = MMIN(htrdr->nthreads, proc_ntiles);
+
+  /* The process is not considered as a working process for himself */
+  htrdr->mpi_working_procs[htrdr->mpi_rank] = 0;
+  --htrdr->mpi_nworking_procs;
+
+  pixfmt = spectral_type_to_pixfmt(htrdr->spectral_type);
+
+  omp_set_num_threads((int)nthreads);
+  #pragma omp parallel
+  for(;;) {
+    const int ithread = omp_get_thread_num();
+    struct ssp_rng_proxy* rng_proxy = NULL;
+    struct ssp_rng* rng;
+    struct tile* tile;
+    uint32_t mcode = TILE_MCODE_NULL;
+    size_t tile_org[2];
+    size_t tile_sz[2];
+    size_t n;
+    res_T res_local = RES_OK;
+    int32_t pcent;
+
+    /* Get a tile to draw */
+    #pragma omp critical
+    {
+      mcode = proc_work_get_tile(work);
+      if(mcode == TILE_MCODE_NULL) { /* No more work on this process */
+        /* Try to steal works to concurrent processes */
+        proc_work_reset(work);
+        nthieves = mpi_steal_work(htrdr, rng_proc, work);
+        if(nthieves != 0) {
+          mcode = proc_work_get_tile(work);
+        }
+      }
+    }
+    if(mcode == TILE_MCODE_NULL) break; /* No more work */
+
+    /* Decode the morton code to retrieve the tile index  */
+    tile_org[0] = morton2D_decode((uint32_t)(mcode>>0));
+    tile_org[1] = morton2D_decode((uint32_t)(mcode>>1));
+    ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y);
+
+    /* Create the tile */
+    tile = tile_create(htrdr->allocator, pixfmt);
+    if(!tile) {
+      ATOMIC_SET(&res, RES_MEM_ERR);
+      htrdr_log_err(htrdr,
+        "could not allocate the memory space of the tile (%lu, %lu) -- %s.\n",
+        (unsigned long)tile_org[0], (unsigned long)tile_org[1],
+         res_to_cstr((res_T)ATOMIC_GET(&res)));
+      break;
+    }
+
+    /* Register the tile */
+    #pragma omp critical
+    list_add_tail(tiles, &tile->node);
+
+    tile->data.x = (uint16_t)tile_org[0];
+    tile->data.y = (uint16_t)tile_org[1];
+
+    /* Define the tile origin in pixel space */
+    tile_org[0] *= TILE_SIZE;
+    tile_org[1] *= TILE_SIZE;
+
+    /* Compute the size of the tile clamped by the borders of the buffer */
+    tile_sz[0] = MMIN(TILE_SIZE, width - tile_org[0]);
+    tile_sz[1] = MMIN(TILE_SIZE, height - tile_org[1]);
+
+    /* Create a proxy RNG for the current tile. This proxy is used for the
+     * current thread only and thus it has to manage only one RNG. This proxy
+     * is initialised in order to ensure that an unique and predictable set of
+     * random numbers is used for the current tile. */
+    SSP(rng_proxy_create2
+      (&htrdr->lifo_allocators[ithread],
+       &ssp_rng_threefry,
+       RNG_SEQUENCE_SIZE * (size_t)mcode, /* Offset */
+       RNG_SEQUENCE_SIZE, /* Size */
+       RNG_SEQUENCE_SIZE * (size_t)ntiles_adjusted, /* Pitch */
+       1, &rng_proxy));
+    SSP(rng_proxy_create_rng(rng_proxy, 0, &rng));
+
+    /* Launch the tile rendering */
+    res_local = draw_tile(htrdr, (size_t)ithread, mcode, tile_org, tile_sz,
+      pix_sz, sensor, spp, rng, tile);
+
+    SSP(rng_proxy_ref_put(rng_proxy));
+    SSP(rng_ref_put(rng));
+
+    if(res_local != RES_OK) {
+      ATOMIC_SET(&res, res_local);
+      break;
+    }
+
+    /* Update the progress status */
+    n = (size_t)ATOMIC_INCR(&nsolved_tiles);
+    pcent = (int32_t)((double)n * 100.0 / (double)proc_ntiles + 0.5/*round*/);
+
+    #pragma omp critical
+    if(pcent > htrdr->mpi_progress_render[0]) {
+      htrdr->mpi_progress_render[0] = pcent;
+      if(htrdr->mpi_rank == 0) {
+        update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+      } else { /* Send the progress percentage to the master process */
+        send_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING, pcent);
+      }
+    }
+  }
+
+  if(ATOMIC_GET(&res) != RES_OK) goto error;
+
+  /* Asynchronously wait for processes completion. Use an asynchronous barrier to
+   * avoid a dead lock with the `mpi_probe_thieves' thread that requires also
+   * the `mpi_mutex'. */
+  {
+    MPI_Request req;
+
+    mutex_lock(htrdr->mpi_mutex);
+    MPI(Ibarrier(MPI_COMM_WORLD, &req));
+    mutex_unlock(htrdr->mpi_mutex);
+
+    mpi_wait_for_request(htrdr, &req);
+  }
+
+exit:
+  if(rng_proc) SSP(rng_ref_put(rng_proc));
+  return (res_T)res;
+error:
+  goto exit;
+}
+
+/*******************************************************************************
+ * Local functions
+ ******************************************************************************/
+res_T
+htrdr_draw_map
+  (struct htrdr* htrdr,
+   const struct htrdr_sensor* sensor,
+   const size_t width,
+   const size_t height,
+   const size_t spp,
+   struct htrdr_buffer* buf)
+{
+  char strbuf[128];
+  struct time t0, t1;
+  struct list_node tiles;
+  size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted;
+  size_t itile;
+  struct proc_work work;
+  struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL;
+  size_t proc_ntiles_adjusted;
+  double pix_sz[2];
+  ATOMIC probe_thieves = 1;
+  ATOMIC res = RES_OK;
+  ASSERT(htrdr && sensor && width && height);
+  ASSERT(htrdr->mpi_rank != 0 || buf);
+
+  list_init(&tiles);
+  proc_work_init(htrdr->allocator, &work);
+
+  if(htrdr->mpi_rank == 0) {
+    const size_t pixsz = htrdr_spectral_type_get_pixsz(htrdr->spectral_type);
+    const size_t pixal = htrdr_spectral_type_get_pixal(htrdr->spectral_type);
+
+    htrdr_buffer_get_layout(buf, &layout);
+    ASSERT(layout.width || layout.height || layout.elmt_size);
+    ASSERT(layout.width == width && layout.height == height);
+
+    if(layout.elmt_size != pixsz || layout.alignment < pixal) {
+      htrdr_log_err(htrdr, "%s: invalid buffer layout.\n", FUNC_NAME);
+      res = RES_BAD_ARG;
+      goto error;
+    }
+  }
+
+  /* Compute the overall number of tiles */
+  ntiles_x = (width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
+  ntiles_y = (height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
+  ntiles = ntiles_x * ntiles_y;
+
+  /* Compute the pixel size in the normalized image plane */
+  pix_sz[0] = 1.0 / (double)width;
+  pix_sz[1] = 1.0 / (double)height;
+
+  /* Adjust the #tiles for the morton-encoding procedure */
+  ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y));
+  ntiles_adjusted *= ntiles_adjusted;
+
+  /* Define the initial number of tiles of the current process */
+  proc_ntiles_adjusted = ntiles_adjusted / (size_t)htrdr->mpi_nprocs;
+  if(htrdr->mpi_rank == 0) { /* Affect the remaining tiles to the master proc */
+    proc_ntiles_adjusted +=
+      ntiles_adjusted - proc_ntiles_adjusted*(size_t)htrdr->mpi_nprocs;
+  }
+
+  /* Define the initial list of tiles of the process */
+  FOR_EACH(itile, 0, proc_ntiles_adjusted) {
+    uint32_t mcode;
+    uint16_t tile_org[2];
+
+    mcode = (uint32_t)itile*(uint32_t)htrdr->mpi_nprocs
+          + (uint32_t)htrdr->mpi_rank;
+
+    tile_org[0] = morton2D_decode(mcode>>0);
+    if(tile_org[0] >= ntiles_x) continue;
+    tile_org[1] = morton2D_decode(mcode>>1);
+    if(tile_org[1] >= ntiles_y) continue;
+    proc_work_add_tile(&work, mcode);
+  }
+
+  if(htrdr->mpi_rank == 0) {
+    fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+    print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+  }
+
+  time_current(&t0);
+
+  omp_set_nested(1); /* Enable nested threads for draw_image */
+  #pragma omp parallel sections num_threads(2)
+  {
+    #pragma omp section
+    mpi_probe_thieves(htrdr, &work, &probe_thieves);
+
+    #pragma omp section
+    {
+      draw_image(htrdr, sensor, width, height, spp, ntiles_x, ntiles_y,
+        ntiles_adjusted, pix_sz, &work, &tiles);
+      /* The processes have no more work to do. Stop probing for thieves */
+      ATOMIC_SET(&probe_thieves, 0);
+    }
+  }
+
+  if(htrdr->mpi_rank == 0) {
+    update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
+    fprintf(stderr, "\n"); /* Add a new line after the progress statuses */
+  }
+
+  time_sub(&t0, time_current(&t1), &t0);
+  time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf));
+  htrdr_log(htrdr, "Rendering time: %s\n", strbuf);
+
+  /* Gather accum buffers from the group of processes */
+  time_current(&t0);
+  res = mpi_gather_tiles(htrdr, buf, ntiles, &tiles);
+  if(res != RES_OK) goto error;
+  time_sub(&t0, time_current(&t1), &t0);
+  time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf));
+  htrdr_log(htrdr, "Image gathering time: %s\n", strbuf);
+
+exit:
+  { /* Free allocated tiles */
+    struct list_node* node;
+    struct list_node* tmp;
+    LIST_FOR_EACH_SAFE(node, tmp, &tiles) {
+      struct tile* tile = CONTAINER_OF(node, struct tile, node);
+      list_del(node);
+      tile_ref_put(tile);
+    }
+  }
+  proc_work_release(&work);
+  return (res_T)res;
+error:
+  goto exit;
+}
+
diff --git a/src/htrdr_draw_radiance.c b/src/htrdr_draw_radiance.c
@@ -1,1081 +0,0 @@
-/* Copyright (C) 2018, 2019, 2020 |Meso|Star> (contact@meso-star.com)
- * Copyright (C) 2018, 2019 CNRS, Université Paul Sabatier
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-#define _POSIX_C_SOURCE 200112L /* nanosleep && nextafter */
-
-#include "htrdr.h"
-#include "htrdr_c.h"
-#include "htrdr_buffer.h"
-#include "htrdr_camera.h"
-#include "htrdr_cie_xyz.h"
-#include "htrdr_ran_wlen.h"
-#include "htrdr_solve.h"
-
-#include <high_tune/htsky.h>
-
-#include <rsys/algorithm.h>
-#include <rsys/clock_time.h>
-#include <rsys/cstr.h>
-#include <rsys/dynamic_array_u32.h>
-#include <rsys/math.h>
-#include <rsys/mutex.h>
-#include <star/ssp.h>
-
-#include <omp.h>
-#include <mpi.h>
-#include <time.h>
-#include <unistd.h>
-
-#define RNG_SEQUENCE_SIZE 10000
-
-#define TILE_MCODE_NULL UINT32_MAX
-#define TILE_SIZE 32 /* Definition in X & Y of a tile */
-STATIC_ASSERT(IS_POW2(TILE_SIZE), TILE_SIZE_must_be_a_power_of_2);
-
-enum pixel_format {
-  PIXEL_XWAVE,
-  PIXEL_IMAGE
-};
-
-union pixel {
-  struct htrdr_pixel_xwave xwave;
-  struct htrdr_pixel_image image;
-};
-
-/* Tile of row ordered image pixels */
-struct tile {
-  struct list_node node;
-  struct mem_allocator* allocator;
-  ref_T ref;
-
-  struct tile_data {
-    uint16_t x, y; /* 2D coordinates of the tile in tile space */
-    enum pixel_format format;
-    /* Simulate the flexible array member of the C99 standard. */
-    union pixel pixels[1/*Dummy element*/];
-  } data;
-};
-
-/* List of tile to compute onto the MPI process. */
-struct proc_work {
-  struct mutex* mutex;
-  struct darray_u32 tiles; /* #tiles to render */
-  size_t itile; /* Next tile to render in the above list of tiles */
-};
-
-/*******************************************************************************
- * Helper functions
- ******************************************************************************/
-static FINLINE uint16_t
-morton2D_decode(const uint32_t u32)
-{
-  uint32_t x = u32 & 0x55555555;
-  x = (x | (x >> 1)) & 0x33333333;
-  x = (x | (x >> 2)) & 0x0F0F0F0F;
-  x = (x | (x >> 4)) & 0x00FF00FF;
-  x = (x | (x >> 8)) & 0x0000FFFF;
-  return (uint16_t)x;
-}
-
-static FINLINE uint32_t
-morton2D_encode(const uint16_t u16)
-{
-  uint32_t u32 = u16;
-  u32 = (u32 | (u32 << 8)) & 0x00FF00FF;
-  u32 = (u32 | (u32 << 4)) & 0X0F0F0F0F;
-  u32 = (u32 | (u32 << 2)) & 0x33333333;
-  u32 = (u32 | (u32 << 1)) & 0x55555555;
-  return u32;
-}
-
-static INLINE enum pixel_format
-spectral_type_to_pixfmt(const enum htrdr_spectral_type spectral_type)
-{
-  enum pixel_format pixfmt;
-  switch(spectral_type) {
-    case HTRDR_SPECTRAL_LW: pixfmt = PIXEL_XWAVE; break;
-    case HTRDR_SPECTRAL_SW: pixfmt = PIXEL_XWAVE; break;
-    case HTRDR_SPECTRAL_SW_CIE_XYZ: pixfmt = PIXEL_IMAGE; break;
-    default: FATAL("Unreachable code.\n"); break;
-  }
-  return pixfmt;
-}
-
-static FINLINE struct tile*
-tile_create(struct mem_allocator* allocator, const enum pixel_format fmt)
-{
-  struct tile* tile;
-  const size_t tile_sz =
-    sizeof(struct tile) - sizeof(union pixel)/*rm dummy pixel*/;
-  const size_t buf_sz = /* Flexiblbe array element */
-    TILE_SIZE*TILE_SIZE*sizeof(union pixel);
-  ASSERT(allocator);
-
-  tile = MEM_ALLOC(allocator, tile_sz+buf_sz);
-  if(!tile) return NULL;
-
-  tile->data.format = fmt;
-
-  ref_init(&tile->ref);
-  list_init(&tile->node);
-  tile->allocator = allocator;
-  ASSERT(IS_ALIGNED(&tile->data.pixels, ALIGNOF(union pixel)));
-
-  return tile;
-}
-
-static INLINE void
-tile_ref_get(struct tile* tile)
-{
-  ASSERT(tile);
-  tile_ref_get(tile);
-}
-
-static INLINE void
-release_tile(ref_T* ref)
-{
-  struct tile* tile = CONTAINER_OF(ref, struct tile, ref);
-  ASSERT(ref);
-  MEM_RM(tile->allocator, tile);
-}
-
-static INLINE void
-tile_ref_put(struct tile* tile)
-{
-  ASSERT(tile);
-  ref_put(&tile->ref, release_tile);
-}
-
-static FINLINE union pixel*
-tile_at
-  (struct tile* tile,
-   const size_t x, /* In tile space */
-   const size_t y) /* In tile space */
-{
-  ASSERT(tile && x < TILE_SIZE && y < TILE_SIZE);
-  return tile->data.pixels + (y*TILE_SIZE + x);
-}
-
-static void
-write_tile_data
-  (struct htrdr* htrdr,
-   struct htrdr_buffer* buf,
-   const struct tile_data* tile_data)
-{
-  struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL;
-  size_t icol, irow;
-  size_t irow_tile;
-  size_t ncols_tile, nrows_tile;
-  char* buf_mem;
-  ASSERT(htrdr && buf && tile_data);
-  (void)htrdr;
-
-  htrdr_buffer_get_layout(buf, &layout);
-  buf_mem = htrdr_buffer_get_data(buf);
-  ASSERT(layout.elmt_size==htrdr_spectral_type_get_pixsz(htrdr->spectral_type));
-
-  /* Compute the row/column of the tile origin into the buffer */
-  icol = tile_data->x * (size_t)TILE_SIZE;
-  irow = tile_data->y * (size_t)TILE_SIZE;
-
-  /* Define the number of tile row/columns to write into the buffer */
-  ncols_tile = MMIN(icol + TILE_SIZE, layout.width)  - icol;
-  nrows_tile = MMIN(irow + TILE_SIZE, layout.height) - irow;
-
-  /* Copy the row ordered tile data */
-  FOR_EACH(irow_tile, 0, nrows_tile) {
-    char* buf_row = buf_mem + (irow + irow_tile) * layout.pitch;
-    char* buf_col = buf_row + icol * layout.elmt_size;
-    const union pixel* tile_row = tile_data->pixels + irow_tile*TILE_SIZE;
-    size_t x;
-
-    FOR_EACH(x, 0, ncols_tile) {
-      switch(tile_data->format) {
-        case PIXEL_XWAVE:
-          ((struct htrdr_pixel_xwave*)buf_col)[x] = tile_row[x].xwave;
-          break;
-        case PIXEL_IMAGE:
-          ((struct htrdr_pixel_image*)buf_col)[x] = tile_row[x].image;
-          break;
-        default: FATAL("Unreachable code.\n"); break;
-      }
-    }
-  }
-}
-
-static INLINE void
-proc_work_init(struct mem_allocator* allocator, struct proc_work* work)
-{
-  ASSERT(work);
-  darray_u32_init(allocator, &work->tiles);
-  work->itile = 0;
-  CHK(work->mutex = mutex_create());
-}
-
-static INLINE void
-proc_work_release(struct proc_work* work)
-{
-  darray_u32_release(&work->tiles);
-  mutex_destroy(work->mutex);
-}
-
-static INLINE void
-proc_work_reset(struct proc_work* work)
-{
-  ASSERT(work);
-  mutex_lock(work->mutex);
-  darray_u32_clear(&work->tiles);
-  work->itile = 0;
-  mutex_unlock(work->mutex);
-}
-
-static INLINE void
-proc_work_add_tile(struct proc_work* work, const uint32_t mcode)
-{
-  mutex_lock(work->mutex);
-  CHK(darray_u32_push_back(&work->tiles, &mcode) == RES_OK);
-  mutex_unlock(work->mutex);
-}
-
-static INLINE uint32_t
-proc_work_get_tile(struct proc_work* work)
-{
-  uint32_t mcode;
-  ASSERT(work);
-  mutex_lock(work->mutex);
-  if(work->itile >= darray_u32_size_get(&work->tiles)) {
-    mcode = TILE_MCODE_NULL;
-  } else {
-    mcode = darray_u32_cdata_get(&work->tiles)[work->itile];
-    ++work->itile;
-  }
-  mutex_unlock(work->mutex);
-  return mcode;
-}
-
-static INLINE size_t
-proc_work_get_ntiles(struct proc_work* work)
-{
-  size_t sz = 0;
-  ASSERT(work);
-  mutex_lock(work->mutex);
-  sz = darray_u32_size_get(&work->tiles);
-  mutex_unlock(work->mutex);
-  return sz;
-}
-
-static void
-mpi_wait_for_request(struct htrdr* htrdr, MPI_Request* req)
-{
-  ASSERT(htrdr && req);
-
-  /* Wait for process synchronisation */
-  for(;;) {
-    struct timespec t;
-    int complete;
-    t.tv_sec = 0;
-    t.tv_nsec = 10000000; /* 10ms */
-
-    mutex_lock(htrdr->mpi_mutex);
-    MPI(Test(req, &complete, MPI_STATUS_IGNORE));
-    mutex_unlock(htrdr->mpi_mutex);
-    if(complete) break;
-
-    nanosleep(&t, NULL);
-  }
-}
-
-static void
-mpi_probe_thieves
-  (struct htrdr* htrdr,
-   struct proc_work* work,
-   ATOMIC* probe_thieves)
-{
-  uint32_t tiles[UINT8_MAX];
-  struct timespec t;
-  ASSERT(htrdr && work && probe_thieves);
-
-  if(htrdr->mpi_nprocs == 1) /* The process is alone. No thief is possible */
-    return;
-
-  t.tv_sec = 0;
-
-  /* Protect MPI calls of multiple invocations from concurrent threads */
-  #define P_MPI(Func) {                                                        \
-    mutex_lock(htrdr->mpi_mutex);                                              \
-    MPI(Func);                                                                 \
-    mutex_unlock(htrdr->mpi_mutex);                                            \
-  } (void)0
-
-  while(ATOMIC_GET(probe_thieves)) {
-    MPI_Status status;
-    size_t itile;
-    int msg;
-
-    /* Probe if a steal request was submitted by any processes */
-    P_MPI(Iprobe(MPI_ANY_SOURCE, HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &msg,
-      &status));
-
-    if(msg) { /* A steal request was posted */
-      MPI_Request req;
-      uint8_t ntiles_to_steal;
-
-      /* Asynchronously receive the steal request */
-      P_MPI(Irecv(&ntiles_to_steal, 1, MPI_UINT8_T, status.MPI_SOURCE,
-        HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD, &req));
-
-      /* Wait for the completion of the steal request */
-      mpi_wait_for_request(htrdr, &req);
-
-      /* Thief some tiles */
-      FOR_EACH(itile, 0, ntiles_to_steal) {
-        tiles[itile] = proc_work_get_tile(work);
-      }
-      P_MPI(Send(&tiles, ntiles_to_steal, MPI_UINT32_T, status.MPI_SOURCE,
-        HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD));
-    }
-    t.tv_nsec = 500000000; /* 500ms */
-    nanosleep(&t, NULL);
-  }
-  #undef P_MPI
-}
-
-static int
-mpi_sample_working_process(struct htrdr* htrdr, struct ssp_rng* rng)
-{
-  int iproc, i;
-  int dst_rank;
-  ASSERT(htrdr && rng && htrdr->mpi_nworking_procs);
-
-  /* Sample the index of the 1st active process */
-  iproc = (int)(ssp_rng_canonical(rng) * (double)htrdr->mpi_nworking_procs);
-
-  /* Find the rank of the sampled active process. Use a simple linear search
-   * since the overall number of processes should be quite low; at most few
-   * dozens.  */
-  i = 0;
-  FOR_EACH(dst_rank, 0, htrdr->mpi_nprocs) {
-    if(htrdr->mpi_working_procs[dst_rank] == 0) continue; /* Inactive process */
-    if(i == iproc) break; /* The rank of the sampled process is found */
-    ++i;
-  }
-  ASSERT(dst_rank < htrdr->mpi_nprocs);
-  return dst_rank;
-}
-
-/* Return the number of stolen tiles */
-static size_t
-mpi_steal_work
-  (struct htrdr* htrdr,
-   struct ssp_rng* rng,
-   struct proc_work* work)
-{
-  MPI_Request req;
-  size_t itile;
-  size_t nthieves = 0;
-  uint32_t tiles[UINT8_MAX]; /* Morton code of the stolen tile */
-  int proc_to_steal; /* Process to steal */
-  uint8_t ntiles_to_steal = MMIN((uint8_t)(htrdr->nthreads*2), 16);
-  ASSERT(htrdr && rng && work && htrdr->nthreads < UINT8_MAX);
-
-  /* Protect MPI calls of multiple invocations from concurrent threads */
-  #define P_MPI(Func) {                                                        \
-    mutex_lock(htrdr->mpi_mutex);                                              \
-    MPI(Func);                                                                 \
-    mutex_unlock(htrdr->mpi_mutex);                                            \
-  } (void)0
-
-  /* No more working process => nohting to steal */
-  if(!htrdr->mpi_nworking_procs) return 0;
-
-  /* Sample a process to steal */
-  proc_to_steal = mpi_sample_working_process(htrdr, rng);
-
-  /* Send a steal request to the sampled process and wait for a response */
-  P_MPI(Send(&ntiles_to_steal, 1, MPI_UINT8_T, proc_to_steal,
-    HTRDR_MPI_STEAL_REQUEST, MPI_COMM_WORLD));
-
-  /* Receive the stolen tile from the sampled process */
-  P_MPI(Irecv(tiles, ntiles_to_steal, MPI_UINT32_T, proc_to_steal,
-    HTRDR_MPI_WORK_STEALING, MPI_COMM_WORLD, &req));
-
-  mpi_wait_for_request(htrdr, &req);
-
-  FOR_EACH(itile, 0, ntiles_to_steal) {
-    if(tiles[itile] == TILE_MCODE_NULL) {
-      ASSERT(htrdr->mpi_working_procs[proc_to_steal] != 0);
-      htrdr->mpi_working_procs[proc_to_steal] = 0;
-      htrdr->mpi_nworking_procs--;
-      break;
-    }
-    proc_work_add_tile(work, tiles[itile]);
-    ++nthieves;
-  }
-  #undef P_MPI
-  return nthieves;
-}
-
-static res_T
-mpi_gather_tiles
-  (struct htrdr* htrdr,
-   struct htrdr_buffer* buf,
-   const size_t ntiles,
-   struct list_node* tiles)
-{
-  /* Compute the size of the tile_data */
-  const size_t msg_sz =
-    sizeof(struct tile_data) - sizeof(union pixel)/*dummy*/
-  + TILE_SIZE*TILE_SIZE*sizeof(union pixel);
-
-  struct list_node* node = NULL;
-  struct tile* tile = NULL;
-  res_T res = RES_OK;
-  ASSERT(htrdr && tiles);
-  ASSERT(htrdr->mpi_rank != 0 || buf);
-  (void)ntiles;
-
-  if(htrdr->mpi_rank != 0) { /* Non master process */
-    /* Send the computed tile to the master process */
-    LIST_FOR_EACH(node, tiles) {
-      struct tile* t = CONTAINER_OF(node, struct tile, node);
-      MPI(Send(&t->data, (int)msg_sz, MPI_CHAR, 0,
-        HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD));
-    }
-  } else { /* Master process */
-    size_t itile = 0;
-
-    LIST_FOR_EACH(node, tiles) {
-      struct tile* t = CONTAINER_OF(node, struct tile, node);
-      write_tile_data(htrdr, buf, &t->data);
-      ++itile;
-    }
-
-    if(itile != ntiles) {
-      enum pixel_format pixfmt;
-      ASSERT(htrdr->mpi_nprocs > 1);
-
-      /* Create a temporary tile to receive the tile data computed by the
-       * concurrent MPI processes */
-      pixfmt = spectral_type_to_pixfmt(htrdr->spectral_type);
-      tile = tile_create(htrdr->allocator, pixfmt);
-      if(!tile) {
-        res = RES_MEM_ERR;
-        htrdr_log_err(htrdr,
-          "could not allocate the temporary tile used to gather the process "
-          "output data -- %s.\n", res_to_cstr(res));
-        goto error;
-      }
-
-      /* Receive the tile data of the concurrent MPI processes */
-      FOR_EACH(itile, itile, ntiles) {
-        MPI(Recv(&tile->data, (int)msg_sz, MPI_CHAR, MPI_ANY_SOURCE,
-          HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE));
-        write_tile_data(htrdr, buf, &tile->data);
-      }
-    }
-  }
-
-exit:
-  if(tile) tile_ref_put(tile);
-  return res;
-error:
-  goto exit;
-}
-
-static void
-draw_pixel_image
-  (struct htrdr* htrdr,
-   const size_t ithread,
-   const size_t ipix[2],
-   const double pix_sz[2], /* Size of a pixel in the normalized image plane */
-   const struct htrdr_camera* cam,
-   const size_t spp,
-   struct ssp_rng* rng,
-   struct htrdr_pixel_image* pixel)
-{
-  struct htrdr_accum XYZ[3]; /* X, Y, and Z */
-  struct htrdr_accum time;
-  size_t ichannel;
-  ASSERT(ipix && ipix && pix_sz && cam && rng && pixel);
-  ASSERT(htrdr->spectral_type == HTRDR_SPECTRAL_SW_CIE_XYZ);
-
-  /* Reset accumulators */
-  XYZ[0] = HTRDR_ACCUM_NULL;
-  XYZ[1] = HTRDR_ACCUM_NULL;
-  XYZ[2] = HTRDR_ACCUM_NULL;
-  time = HTRDR_ACCUM_NULL;
-
-  FOR_EACH(ichannel, 0, 3) {
-    size_t isamp;
-
-    FOR_EACH(isamp, 0, spp) {
-      struct time t0, t1;
-      double pix_samp[2];
-      double ray_org[3];
-      double ray_dir[3];
-      double weight;
-      double r0, r1, r2;
-      double wlen; /* Sampled wavelength into the spectral band */
-      double pdf;
-      size_t iband; /* Sampled spectral band */
-      size_t iquad; /* Sampled quadrature point into the spectral band */
-      double usec;
-
-      /* Begin the registration of the time spent to in the realisation */
-      time_current(&t0);
-
-      /* Sample a position into the pixel, in the normalized image plane */
-      pix_samp[0] = ((double)ipix[0] + ssp_rng_canonical(rng)) * pix_sz[0];
-      pix_samp[1] = ((double)ipix[1] + ssp_rng_canonical(rng)) * pix_sz[1];
-
-      /* Generate a ray starting from the pinhole camera and passing through the
-       * pixel sample */
-      htrdr_camera_ray(cam, pix_samp, ray_org, ray_dir);
-
-      r0 = ssp_rng_canonical(rng);
-      r1 = ssp_rng_canonical(rng);
-      r2 = ssp_rng_canonical(rng);
-
-      /* Sample a spectral band and a quadrature point */
-      switch(ichannel) {
-        case 0: wlen = htrdr_cie_xyz_sample_X(htrdr->cie, r0, r1, &pdf); break;
-        case 1: wlen = htrdr_cie_xyz_sample_Y(htrdr->cie, r0, r1, &pdf); break;
-        case 2: wlen = htrdr_cie_xyz_sample_Z(htrdr->cie, r0, r1, &pdf); break;
-        default: FATAL("Unreachable code.\n"); break;
-      }
-
-      iband = htsky_find_spectral_band(htrdr->sky, wlen);
-      iquad = htsky_spectral_band_sample_quadrature(htrdr->sky, r2, iband);
-
-      /* Compute the radiance in W/m^2/sr/m */
-      weight = htrdr_compute_radiance_sw
-        (htrdr, ithread, rng, ray_org, ray_dir, wlen, iband, iquad);
-      ASSERT(weight >= 0);
-
-      pdf *= 1.e9; /* Transform the pdf from nm^-1 to m^-1 */
-      weight /= pdf; /* In W/m^2/sr */
-
-      /* End the registration of the per realisation time */
-      time_sub(&t0, time_current(&t1), &t0);
-      usec = (double)time_val(&t0, TIME_NSEC) * 0.001;
-
-      /* Update the pixel accumulator of the current channel */
-      XYZ[ichannel].sum_weights += weight;
-      XYZ[ichannel].sum_weights_sqr += weight*weight;
-      XYZ[ichannel].nweights += 1;
-
-      /* Update the pixel accumulator of per realisation time */
-      time.sum_weights += usec;
-      time.sum_weights_sqr += usec*usec;
-      time.nweights += 1;
-    }
-  }
-
-  /* Flush pixel data */
-  htrdr_accum_get_estimation(XYZ+0, &pixel->X);
-  htrdr_accum_get_estimation(XYZ+1, &pixel->Y);
-  htrdr_accum_get_estimation(XYZ+2, &pixel->Z);
-  pixel->time = time;
-}
-
-
-static INLINE double
-radiance_temperature
-  (struct htrdr* htrdr,
-   const double radiance) /* In W/m^2/sr */
-{
-  double temperature = 0;
-  double radiance_avg = radiance;
-  res_T res = RES_OK;
-  ASSERT(htrdr && radiance >= 0);
-
-  /* From integrated radiance to average radiance in W/m^2/sr/m */
-  if(htrdr->wlen_range_m[0] != htrdr->wlen_range_m[1]) { /* !monochromatic */
-    radiance_avg /= (htrdr->wlen_range_m[1] - htrdr->wlen_range_m[0]);
-  }
-
-  res = brightness_temperature
-    (htrdr,
-     htrdr->wlen_range_m[0],
-     htrdr->wlen_range_m[1],
-     radiance_avg,
-     &temperature);
-  if(res != RES_OK) {
-    htrdr_log_warn(htrdr,
-      "Could not compute the brightness temperature for the radiance %g.\n",
-       radiance_avg);
-    temperature = 0;
-  }
-  return temperature;
-}
-
-static void
-draw_pixel_xwave
-  (struct htrdr* htrdr,
-   const size_t ithread,
-   const size_t ipix[2],
-   const double pix_sz[2], /* Size of a pixel in the normalized image plane */
-   const struct htrdr_camera* cam,
-   const size_t spp,
-   struct ssp_rng* rng,
-   struct htrdr_pixel_xwave* pixel)
-{
-  struct htrdr_accum radiance;
-  struct htrdr_accum time;
-  size_t isamp;
-  double temp_min, temp_max;
-  ASSERT(ipix && ipix && pix_sz && cam && rng && pixel);
-  ASSERT(htrdr->spectral_type == HTRDR_SPECTRAL_LW 
-      || htrdr->spectral_type == HTRDR_SPECTRAL_SW);
-
-  /* Reset the pixel accumulators */
-  radiance = HTRDR_ACCUM_NULL;
-  time = HTRDR_ACCUM_NULL;
-
-  FOR_EACH(isamp, 0, spp) {
-    struct time t0, t1;
-    double pix_samp[2];
-    double ray_org[3];
-    double ray_dir[3];
-    double weight;
-    double r0, r1, r2;
-    double wlen;
-    size_t iband;
-    size_t iquad;
-    double usec;
-    double band_pdf;
-
-    /* Begin the registration of the time spent in the realisation */
-    time_current(&t0);
-
-    /* Sample a position into the pixel, in the normalized image plane */
-    pix_samp[0] = ((double)ipix[0] + ssp_rng_canonical(rng)) * pix_sz[0];
-    pix_samp[1] = ((double)ipix[1] + ssp_rng_canonical(rng)) * pix_sz[1];
-
-    /* Generate a ray starting from the pinhole camera and passing through the
-     * pixel sample */
-    htrdr_camera_ray(cam, pix_samp, ray_org, ray_dir);
-
-    r0 = ssp_rng_canonical(rng);
-    r1 = ssp_rng_canonical(rng);
-    r2 = ssp_rng_canonical(rng);
-
-    /* Sample a wavelength */
-    wlen = htrdr_ran_wlen_sample(htrdr->ran_wlen, r0, r1, &band_pdf);
-
-    /* Select the associated band and sample a quadrature point */
-    iband = htsky_find_spectral_band(htrdr->sky, wlen);
-    iquad = htsky_spectral_band_sample_quadrature(htrdr->sky, r2, iband);
-
-    /* Compute the spectral radiance in W/m^2/sr/m */
-    switch(htrdr->spectral_type) {
-      case HTRDR_SPECTRAL_LW:
-        weight = htrdr_compute_radiance_lw(htrdr, ithread, rng, ray_org,
-          ray_dir, wlen, iband, iquad);
-        break;
-      case HTRDR_SPECTRAL_SW:
-        weight = htrdr_compute_radiance_sw(htrdr, ithread, rng, ray_org,
-          ray_dir, wlen, iband, iquad);
-        break;
-      default: FATAL("Unreachable code.\n"); break;
-    }
-    ASSERT(weight >= 0);
-
-    /* Importance sampling: correct weight with pdf */
-    weight /= band_pdf; /* In W/m^2/sr */
-
-    /* End the registration of the per realisation time */
-    time_sub(&t0, time_current(&t1), &t0);
-    usec = (double)time_val(&t0, TIME_NSEC) * 0.001;
-
-    /* Update the pixel accumulator of the current channel */
-    radiance.sum_weights += weight;
-    radiance.sum_weights_sqr += weight*weight;
-    radiance.nweights += 1;
-
-    /* Update the pixel accumulator of per realisation time */
-    time.sum_weights += usec;
-    time.sum_weights_sqr += usec*usec;
-    time.nweights += 1;
-  }
-
-  /* Compute the estimation of the pixel radiance */
-  htrdr_accum_get_estimation(&radiance, &pixel->radiance);
-
-  /* Save the per realisation integration time */
-  pixel->time = time;
-
-  /* Compute the brightness_temperature of the pixel and estimate its standard
-   * error if the sources were in the medium (<=> longwave) */
-  if(htrdr->spectral_type == HTRDR_SPECTRAL_LW) {
-    pixel->radiance_temperature.E = radiance_temperature(htrdr, pixel->radiance.E);
-    temp_min = radiance_temperature(htrdr, pixel->radiance.E - pixel->radiance.SE);
-    temp_max = radiance_temperature(htrdr, pixel->radiance.E + pixel->radiance.SE);
-    pixel->radiance_temperature.SE = temp_max - temp_min;
-  }
-}
-
-static res_T
-draw_tile
-  (struct htrdr* htrdr,
-   const size_t ithread,
-   const int64_t tile_mcode, /* For debug only */
-   const size_t tile_org[2], /* Origin of the tile in pixel space */
-   const size_t tile_sz[2], /* Definition of the tile */
-   const double pix_sz[2], /* Size of a pixel in the normalized image plane */
-   const struct htrdr_camera* cam,
-   const size_t spp, /* #samples per pixel */
-   struct ssp_rng* rng,
-   struct tile* tile)
-{
-  size_t npixels;
-  size_t mcode; /* Morton code of tile pixel */
-  ASSERT(htrdr && tile_org && tile_sz && pix_sz && cam && spp && tile);
-  (void)tile_mcode;
-  /* Adjust the #pixels to process them wrt a morton order */
-  npixels = round_up_pow2(MMAX(tile_sz[0], tile_sz[1]));
-  npixels *= npixels;
-
-  FOR_EACH(mcode, 0, npixels) {
-    union pixel* pixel;
-    size_t ipix_tile[2]; /* Pixel coord in the tile */
-    size_t ipix[2]; /* Pixel coord in the buffer */
-
-    ipix_tile[0] = morton2D_decode((uint32_t)(mcode>>0));
-    if(ipix_tile[0] >= tile_sz[0]) continue; /* Pixel is out of tile */
-    ipix_tile[1] = morton2D_decode((uint32_t)(mcode>>1));
-    if(ipix_tile[1] >= tile_sz[1]) continue; /* Pixel is out of tile */
-
-    /* Fetch and reset the pixel accumulator */
-    pixel = tile_at(tile, ipix_tile[0], ipix_tile[1]);
-
-    /* Compute the pixel coordinate */
-    ipix[0] = tile_org[0] + ipix_tile[0];
-    ipix[1] = tile_org[1] + ipix_tile[1];
-
-    /* Draw the pixel */
-    switch(htrdr->spectral_type) {
-      case HTRDR_SPECTRAL_LW:
-      case HTRDR_SPECTRAL_SW:
-        draw_pixel_xwave(htrdr, ithread, ipix, pix_sz, cam, spp, rng, &pixel->xwave);
-        break;
-      case HTRDR_SPECTRAL_SW_CIE_XYZ:
-        draw_pixel_image(htrdr, ithread, ipix, pix_sz, cam, spp, rng, &pixel->image);
-        break;
-      default: FATAL("Unreachable code.\n"); break;
-    }
-  }
-  return RES_OK;
-}
-
-static res_T
-draw_image
-  (struct htrdr* htrdr,
-   const struct htrdr_camera* cam,
-   const size_t width, /* Image width */
-   const size_t height, /* Image height */
-   const size_t spp,
-   const size_t ntiles_x,
-   const size_t ntiles_y,
-   const size_t ntiles_adjusted,
-   const double pix_sz[2], /* Pixel size in the normalized image plane */
-   struct proc_work* work,
-   struct list_node* tiles)
-{
-  struct ssp_rng* rng_proc = NULL;
-  size_t nthreads = 0;
-  size_t nthieves = 0;
-  size_t proc_ntiles = 0;
-  enum pixel_format pixfmt;
-  ATOMIC nsolved_tiles = 0;
-  ATOMIC res = RES_OK;
-  ASSERT(htrdr && cam && spp && ntiles_adjusted && work && tiles);
-  ASSERT(pix_sz && pix_sz[0] > 0 && pix_sz[1] > 0);
-  ASSERT(width && height);
-  (void)ntiles_x, (void)ntiles_y;
-
-  res = ssp_rng_create(htrdr->allocator, &ssp_rng_mt19937_64, &rng_proc);
-  if(res != RES_OK) {
-    htrdr_log_err(htrdr, "could not create the RNG used to sample a process "
-      "to steal -- %s.\n", res_to_cstr((res_T)res));
-    goto error;
-  }
-
-  proc_ntiles = proc_work_get_ntiles(work);
-  nthreads = MMIN(htrdr->nthreads, proc_ntiles);
-
-  /* The process is not considered as a working process for himself */
-  htrdr->mpi_working_procs[htrdr->mpi_rank] = 0;
-  --htrdr->mpi_nworking_procs;
-
-  pixfmt = spectral_type_to_pixfmt(htrdr->spectral_type);
-
-  omp_set_num_threads((int)nthreads);
-  #pragma omp parallel
-  for(;;) {
-    const int ithread = omp_get_thread_num();
-    struct ssp_rng_proxy* rng_proxy = NULL;
-    struct ssp_rng* rng;
-    struct tile* tile;
-    uint32_t mcode = TILE_MCODE_NULL;
-    size_t tile_org[2];
-    size_t tile_sz[2];
-    size_t n;
-    res_T res_local = RES_OK;
-    int32_t pcent;
-
-    /* Get a tile to draw */
-    #pragma omp critical
-    {
-      mcode = proc_work_get_tile(work);
-      if(mcode == TILE_MCODE_NULL) { /* No more work on this process */
-        /* Try to steal works to concurrent processes */
-        proc_work_reset(work);
-        nthieves = mpi_steal_work(htrdr, rng_proc, work);
-        if(nthieves != 0) {
-          mcode = proc_work_get_tile(work);
-        }
-      }
-    }
-    if(mcode == TILE_MCODE_NULL) break; /* No more work */
-
-    /* Decode the morton code to retrieve the tile index  */
-    tile_org[0] = morton2D_decode((uint32_t)(mcode>>0));
-    tile_org[1] = morton2D_decode((uint32_t)(mcode>>1));
-    ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y);
-
-    /* Create the tile */
-    tile = tile_create(htrdr->allocator, pixfmt);
-    if(!tile) {
-      ATOMIC_SET(&res, RES_MEM_ERR);
-      htrdr_log_err(htrdr,
-        "could not allocate the memory space of the tile (%lu, %lu) -- %s.\n",
-        (unsigned long)tile_org[0], (unsigned long)tile_org[1],
-         res_to_cstr((res_T)ATOMIC_GET(&res)));
-      break;
-    }
-
-    /* Register the tile */
-    #pragma omp critical
-    list_add_tail(tiles, &tile->node);
-
-    tile->data.x = (uint16_t)tile_org[0];
-    tile->data.y = (uint16_t)tile_org[1];
-
-    /* Define the tile origin in pixel space */
-    tile_org[0] *= TILE_SIZE;
-    tile_org[1] *= TILE_SIZE;
-
-    /* Compute the size of the tile clamped by the borders of the buffer */
-    tile_sz[0] = MMIN(TILE_SIZE, width - tile_org[0]);
-    tile_sz[1] = MMIN(TILE_SIZE, height - tile_org[1]);
-
-    /* Create a proxy RNG for the current tile. This proxy is used for the
-     * current thread only and thus it has to manage only one RNG. This proxy
-     * is initialised in order to ensure that an unique and predictable set of
-     * random numbers is used for the current tile. */
-    SSP(rng_proxy_create2
-      (&htrdr->lifo_allocators[ithread],
-       &ssp_rng_threefry,
-       RNG_SEQUENCE_SIZE * (size_t)mcode, /* Offset */
-       RNG_SEQUENCE_SIZE, /* Size */
-       RNG_SEQUENCE_SIZE * (size_t)ntiles_adjusted, /* Pitch */
-       1, &rng_proxy));
-    SSP(rng_proxy_create_rng(rng_proxy, 0, &rng));
-
-    /* Launch the tile rendering */
-    res_local = draw_tile(htrdr, (size_t)ithread, mcode, tile_org, tile_sz,
-      pix_sz, cam, spp, rng, tile);
-
-    SSP(rng_proxy_ref_put(rng_proxy));
-    SSP(rng_ref_put(rng));
-
-    if(res_local != RES_OK) {
-      ATOMIC_SET(&res, res_local);
-      break;
-    }
-
-    /* Update the progress status */
-    n = (size_t)ATOMIC_INCR(&nsolved_tiles);
-    pcent = (int32_t)((double)n * 100.0 / (double)proc_ntiles + 0.5/*round*/);
-
-    #pragma omp critical
-    if(pcent > htrdr->mpi_progress_render[0]) {
-      htrdr->mpi_progress_render[0] = pcent;
-      if(htrdr->mpi_rank == 0) {
-        update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
-      } else { /* Send the progress percentage to the master process */
-        send_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING, pcent);
-      }
-    }
-  }
-
-  if(ATOMIC_GET(&res) != RES_OK) goto error;
-
-  /* Asynchronously wait for processes completion. Use an asynchronous barrier to
-   * avoid a dead lock with the `mpi_probe_thieves' thread that requires also
-   * the `mpi_mutex'. */
-  {
-    MPI_Request req;
-
-    mutex_lock(htrdr->mpi_mutex);
-    MPI(Ibarrier(MPI_COMM_WORLD, &req));
-    mutex_unlock(htrdr->mpi_mutex);
-
-    mpi_wait_for_request(htrdr, &req);
-  }
-
-exit:
-  if(rng_proc) SSP(rng_ref_put(rng_proc));
-  return (res_T)res;
-error:
-  goto exit;
-}
-
-/*******************************************************************************
- * Local functions
- ******************************************************************************/
-res_T
-htrdr_draw_radiance
-  (struct htrdr* htrdr,
-   const struct htrdr_camera* cam,
-   const size_t width,
-   const size_t height,
-   const size_t spp,
-   struct htrdr_buffer* buf)
-{
-  char strbuf[128];
-  struct time t0, t1;
-  struct list_node tiles;
-  size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted;
-  size_t itile;
-  struct proc_work work;
-  struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL;
-  size_t proc_ntiles_adjusted;
-  double pix_sz[2];
-  ATOMIC probe_thieves = 1;
-  ATOMIC res = RES_OK;
-  ASSERT(htrdr && cam && width && height);
-  ASSERT(htrdr->mpi_rank != 0 || buf);
-
-  list_init(&tiles);
-  proc_work_init(htrdr->allocator, &work);
-
-  if(htrdr->mpi_rank == 0) {
-    const size_t pixsz = htrdr_spectral_type_get_pixsz(htrdr->spectral_type);
-    const size_t pixal = htrdr_spectral_type_get_pixal(htrdr->spectral_type);
-
-    htrdr_buffer_get_layout(buf, &layout);
-    ASSERT(layout.width || layout.height || layout.elmt_size);
-    ASSERT(layout.width == width && layout.height == height);
-
-    if(layout.elmt_size != pixsz || layout.alignment < pixal) {
-      htrdr_log_err(htrdr, "%s: invalid buffer layout.\n", FUNC_NAME);
-      res = RES_BAD_ARG;
-      goto error;
-    }
-  }
-
-  /* Compute the overall number of tiles */
-  ntiles_x = (width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
-  ntiles_y = (height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
-  ntiles = ntiles_x * ntiles_y;
-
-  /* Compute the pixel size in the normalized image plane */
-  pix_sz[0] = 1.0 / (double)width;
-  pix_sz[1] = 1.0 / (double)height;
-
-  /* Adjust the #tiles for the morton-encoding procedure */
-  ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y));
-  ntiles_adjusted *= ntiles_adjusted;
-
-  /* Define the initial number of tiles of the current process */
-  proc_ntiles_adjusted = ntiles_adjusted / (size_t)htrdr->mpi_nprocs;
-  if(htrdr->mpi_rank == 0) { /* Affect the remaining tiles to the master proc */
-    proc_ntiles_adjusted +=
-      ntiles_adjusted - proc_ntiles_adjusted*(size_t)htrdr->mpi_nprocs;
-  }
-
-  /* Define the initial list of tiles of the process */
-  FOR_EACH(itile, 0, proc_ntiles_adjusted) {
-    uint32_t mcode;
-    uint16_t tile_org[2];
-
-    mcode = (uint32_t)itile*(uint32_t)htrdr->mpi_nprocs
-          + (uint32_t)htrdr->mpi_rank;
-
-    tile_org[0] = morton2D_decode(mcode>>0);
-    if(tile_org[0] >= ntiles_x) continue;
-    tile_org[1] = morton2D_decode(mcode>>1);
-    if(tile_org[1] >= ntiles_y) continue;
-    proc_work_add_tile(&work, mcode);
-  }
-
-  if(htrdr->mpi_rank == 0) {
-    fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
-    print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
-  }
-
-  time_current(&t0);
-
-  omp_set_nested(1); /* Enable nested threads for draw_image */
-  #pragma omp parallel sections num_threads(2)
-  {
-    #pragma omp section
-    mpi_probe_thieves(htrdr, &work, &probe_thieves);
-
-    #pragma omp section
-    {
-      draw_image(htrdr, cam, width, height, spp, ntiles_x, ntiles_y,
-        ntiles_adjusted, pix_sz, &work, &tiles);
-      /* The processes have no more work to do. Stop probing for thieves */
-      ATOMIC_SET(&probe_thieves, 0);
-    }
-  }
-
-  if(htrdr->mpi_rank == 0) {
-    update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
-    fprintf(stderr, "\n"); /* Add a new line after the progress statuses */
-  }
-
-  time_sub(&t0, time_current(&t1), &t0);
-  time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf));
-  htrdr_log(htrdr, "Rendering time: %s\n", strbuf);
-
-  /* Gather accum buffers from the group of processes */
-  time_current(&t0);
-  res = mpi_gather_tiles(htrdr, buf, ntiles, &tiles);
-  if(res != RES_OK) goto error;
-  time_sub(&t0, time_current(&t1), &t0);
-  time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf));
-  htrdr_log(htrdr, "Image gathering time: %s\n", strbuf);
-
-exit:
-  { /* Free allocated tiles */
-    struct list_node* node;
-    struct list_node* tmp;
-    LIST_FOR_EACH_SAFE(node, tmp, &tiles) {
-      struct tile* tile = CONTAINER_OF(node, struct tile, node);
-      list_del(node);
-      tile_ref_put(tile);
-    }
-  }
-  proc_work_release(&work);
-  return (res_T)res;
-error:
-  goto exit;
-}
-
diff --git a/src/htrdr_rectangle.c b/src/htrdr_rectangle.c
@@ -26,6 +26,7 @@ struct htrdr_rectangle {
   double axis_x[3];
   double axis_y[3];
 
+  double normal[3];
   double position[3]; /* Center of the rectangle */
   struct htrdr* htrdr;
   ref_T ref;
@@ -91,6 +92,7 @@ htrdr_rectangle_create
 
   d3_muld(rect->axis_x, x, sz[0]*0.5);
   d3_muld(rect->axis_y, y, sz[1]*0.5);
+  d3_set(rect->normal, z);
   d3_set(rect->position, pos);
 
 exit:
@@ -131,3 +133,10 @@ htrdr_rectangle_ref_put(struct htrdr_rectangle* rect)
   ref_put(&rect->ref, rectangle_release);
 }
 
+void
+htrdr_rectangle_get_normal(const struct htrdr_rectangle* rect, double normal[3])
+{
+  ASSERT(rect && normal);
+  d3_set(normal, rect->normal);
+}
+
diff --git a/src/htrdr_rectangle.h b/src/htrdr_rectangle.h
@@ -45,5 +45,10 @@ htrdr_rectangle_sample_pos
    const double sample[2], /* In [0, 1[ */
    double pos[3]);
 
+extern LOCAL_SYM void
+htrdr_rectangle_get_normal
+  (const struct htrdr_rectangle* rect,
+   double normal[3]);
+
 #endif /* HTRDR_RECTANGLE_H */
 
diff --git a/src/htrdr_sensor.c b/src/htrdr_sensor.c
@@ -0,0 +1,115 @@
+/* Copyright (C) 2018, 2019, 2020 |Meso|Star> (contact@meso-star.com)
+ * Copyright (C) 2018, 2019 CNRS, Université Paul Sabatier
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "htrdr.h"
+#include "htrdr_camera.h"
+#include "htrdr_ground.h"
+#include "htrdr_rectangle.h"
+#include "htrdr_sensor.h"
+
+#include <star/s3d.h>
+#include <star/ssp.h>
+
+/*******************************************************************************
+ * Helper functions
+ ******************************************************************************/
+static res_T
+sample_camera_ray
+  (struct htrdr_camera* cam,
+   const size_t ipix[2],
+   const double pix_sz[2],
+   struct ssp_rng* rng,
+   double ray_org[3],
+   double ray_dir[3])
+{
+  double pix_samp[2];
+  ASSERT(cam && ipix && pix_sz && rng && ray_org && ray_dir);
+
+  /* Sample a position into the pixel, in the normalized image plane */
+  pix_samp[0] = ((double)ipix[0] + ssp_rng_canonical(rng)) * pix_sz[0];
+  pix_samp[1] = ((double)ipix[1] + ssp_rng_canonical(rng)) * pix_sz[1];
+
+  /* Generate a ray starting from the pinhole camera and passing through the
+   * pixel sample */
+  htrdr_camera_ray(cam, pix_samp, ray_org, ray_dir);
+
+  return RES_OK;
+}
+
+static res_T
+sample_rectangle_ray
+  (struct htrdr_rectangle* rect,
+   struct htrdr_ground* ground,
+   const size_t ipix[2],
+   const double pix_sz[2],
+   struct ssp_rng* rng,
+   double ray_org[3],
+   double ray_dir[3])
+{
+  struct s3d_hit hit = S3D_HIT_NULL;
+  double pix_samp[2];
+  double up_dir[3] = {0, 0, 1};
+  const double range[2] = {0, INF};
+  double normal[3];
+  ASSERT(rect && ground && ipix && pix_sz && rng && ray_org && ray_dir);
+
+  /* Sample a position into the pixel, in the normalized image plane */
+  pix_samp[0] = ((double)ipix[0] + ssp_rng_canonical(rng)) * pix_sz[0];
+  pix_samp[1] = ((double)ipix[1] + ssp_rng_canonical(rng)) * pix_sz[1];
+
+  /* Retrieve the world space position of pix_samp */
+  htrdr_rectangle_sample_pos(rect, pix_samp, ray_org);
+
+  /* Check that `ray_org' is not included into a geometry. Right now, use a
+   * naïve "up" intersection. */
+  HTRDR(ground_trace_ray(ground, ray_org, up_dir, range, NULL, &hit));
+
+  /* Reject the ray origin */
+  if(!S3D_HIT_NONE(&hit)) return RES_BAD_OP;
+
+  /* Sample a ray direction */
+  htrdr_rectangle_get_normal(rect, normal);
+  ssp_ran_hemisphere_cos(rng, normal, ray_dir, NULL);
+  return RES_OK;
+}
+
+/*******************************************************************************
+ * Local functions
+ ******************************************************************************/
+res_T
+htrdr_sensor_sample_primary_ray
+  (struct htrdr_sensor* sensor,
+   struct htrdr_ground* ground,
+   const size_t ipix[2],
+   const double pix_sz[2],
+   struct ssp_rng* rng,
+   double ray_org[3],
+   double ray_dir[3])
+{
+  res_T res = RES_OK;
+  switch(sensor->type) {
+    case HTRDR_SENSOR_CAMERA:
+      res = sample_camera_ray(sensor->camera, ipix, pix_sz, rng, ray_org, ray_dir);
+      break;
+    case HTRDR_SENSOR_RECTANGLE:
+      res = sample_rectangle_ray(sensor->rectangle, ground, ipix, pix_sz, rng,
+        ray_org, ray_dir);
+      break;
+    default: FATAL("Unreachable code.\n"); break;
+  }
+  return res;
+}
+
diff --git a/src/htrdr_sensor.h b/src/htrdr_sensor.h
@@ -17,6 +17,12 @@
 #ifndef HTRDR_SENSOR_H
 #define HTRDR_SENSOR_H
 
+#include <rsys/rsys.h>
+
+/* Forward declarations */
+struct htrdr_ground;
+struct ssp_rng;
+
 enum htrdr_sensor_type {
   HTRDR_SENSOR_CAMERA,
   HTRDR_SENSOR_RECTANGLE
@@ -28,5 +34,15 @@ struct htrdr_sensor {
   enum htrdr_sensor_type type;
 };
 
+extern LOCAL_SYM res_T
+htrdr_sensor_sample_primary_ray
+  (struct htrdr_sensor* sensor,
+   struct htrdr_ground* ground,
+   const size_t ipix[2],
+   const double pix_sz[2],
+   struct ssp_rng* rng,
+   double ray_org[3],
+   double ray_dir[3]);
+
 #endif /* HTRDR_SENSOR_H */
 
diff --git a/src/htrdr_solve.h b/src/htrdr_solve.h
@@ -81,9 +81,9 @@ htrdr_compute_radiance_lw
    const size_t iquad); /* Index of the quadrature point into the band */
 
 extern LOCAL_SYM res_T
-htrdr_draw_radiance
+htrdr_draw_map
   (struct htrdr* htrdr,
-   const struct htrdr_camera* cam,
+   const struct htrdr_sensor* sensor,
    const size_t width, /* Image width */
    const size_t height, /* Image height */
    const size_t spp, /* #samples per pixel, i.e. #realisations */

	htrdr Solving radiative transfer in heterogeneous media
	git clone git://git.meso-star.fr/htrdr.git
	Log \| Files \| Refs \| README \| LICENSE

M	cmake/CMakeLists.txt	\|	3	++-
M	src/htrdr.c	\|	9	+++++----
A	src/htrdr_draw_map.c	\|	1079	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	src/htrdr_draw_radiance.c	\|	1081	-------------------------------------------------------------------------------
M	src/htrdr_rectangle.c	\|	9	+++++++++
M	src/htrdr_rectangle.h	\|	5	+++++
A	src/htrdr_sensor.c	\|	115	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/htrdr_sensor.h	\|	16	++++++++++++++++
M	src/htrdr_solve.h	\|	4	++--