htrdr_draw_map.c - htrdr - Solving radiative transfer in heterogeneous media

htrdr_draw_map.c (19346B)
      1 /* Copyright (C) 2018-2019, 2022-2025 Centre National de la Recherche Scientifique
      2  * Copyright (C) 2020-2022 Institut Mines Télécom Albi-Carmaux
      3  * Copyright (C) 2022-2025 Institut Pierre-Simon Laplace
      4  * Copyright (C) 2022-2025 Institut de Physique du Globe de Paris
      5  * Copyright (C) 2018-2025 |Méso|Star> (contact@meso-star.com)
      6  * Copyright (C) 2022-2025 Observatoire de Paris
      7  * Copyright (C) 2022-2025 Université de Reims Champagne-Ardenne
      8  * Copyright (C) 2022-2025 Université de Versaille Saint-Quentin
      9  * Copyright (C) 2018-2019, 2022-2025 Université Paul Sabatier
     10  *
     11  * This program is free software: you can redistribute it and/or modify
     12  * it under the terms of the GNU General Public License as published by
     13  * the Free Software Foundation, either version 3 of the License, or
     14  * (at your option) any later version.
     15  *
     16  * This program is distributed in the hope that it will be useful,
     17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     19  * GNU General Public License for more details.
     20  *
     21  * You should have received a copy of the GNU General Public License
     22  * along with this program. If not, see <http://www.gnu.org/licenses/>. */
     23 
     24 #include "core/htrdr.h"
     25 #include "core/htrdr_c.h"
     26 #include "core/htrdr_buffer.h"
     27 #include "core/htrdr_draw_map.h"
     28 #include "core/htrdr_log.h"
     29 #include "core/htrdr_proc_work.h"
     30 
     31 #include <rsys/clock_time.h>
     32 #include <rsys/cstr.h>
     33 #include <rsys/list.h>
     34 #include <rsys/math.h>
     35 #include <rsys/morton.h>
     36 #include <rsys/mutex.h>
     37 #include <rsys/ref_count.h>
     38 
     39 #include <star/ssp.h>
     40 
     41 #include <omp.h>
     42 #include <mpi.h>
     43 #include <unistd.h>
     44 
     45 #define TILE_SIZE 8 /* Definition in X & Y of a tile */
     46 STATIC_ASSERT(IS_POW2(TILE_SIZE), TILE_SIZE_must_be_a_power_of_2);
     47 
     48 /* Tile of row ordered image pixels */
     49 struct tile {
     50   struct list_node node;
     51   struct mem_allocator* allocator;
     52   ref_T ref;
     53 
     54   struct tile_data {
     55     size_t pixsz; /* Size of a pixel */
     56     size_t pixal; /* Pixel alignment */
     57     uint16_t x, y; /* 2D coordinates of the tile in tile space */
     58     /* Simulate the flexible array member of the C99 standard */
     59     char ALIGN(16) pixels[1/*Dummy element*/];
     60   } data;
     61 };
     62 
     63 /*******************************************************************************
     64  * Helper functions
     65  ******************************************************************************/
     66 static INLINE res_T
     67 check_draw_map_args(const struct htrdr_draw_map_args* args)
     68 {
     69   if(!args) return RES_BAD_ARG;
     70 
     71   /* A functor must be defined */
     72   if(!args->draw_pixel) return RES_BAD_ARG;
     73 
     74   /* The number of realisations cannot be null */
     75   if(!args->spp) return RES_BAD_ARG;
     76 
     77   /* Check buffer layout consistency */
     78   return htrdr_buffer_layout_check(&args->buffer_layout);
     79 }
     80 
     81 static INLINE void
     82 tile_ref_get(struct tile* tile)
     83 {
     84   ASSERT(tile);
     85   ref_get(&tile->ref);
     86 }
     87 
     88 static INLINE void
     89 release_tile(ref_T* ref)
     90 {
     91   struct tile* tile = CONTAINER_OF(ref, struct tile, ref);
     92   ASSERT(ref);
     93   MEM_RM(tile->allocator, tile);
     94 }
     95 
     96 static INLINE void
     97 tile_ref_put(struct tile* tile)
     98 {
     99   ASSERT(tile);
    100   ref_put(&tile->ref, release_tile);
    101 }
    102 
    103 static FINLINE struct tile*
    104 tile_create
    105   (struct mem_allocator* allocator,
    106    const size_t pixel_size,
    107    const size_t pixel_alignment)
    108 {
    109   struct tile* tile = NULL;
    110   const size_t tile_sz = sizeof(*tile) - 1/* rm dummy octet in flexible array */;
    111   const size_t buf_sz = TILE_SIZE*TILE_SIZE*pixel_size;
    112   ASSERT(allocator);
    113   ASSERT(IS_ALIGNED(pixel_size, pixel_alignment));
    114   ASSERT(IS_POW2(pixel_alignment));
    115 
    116   tile = MEM_ALLOC_ALIGNED(allocator, tile_sz + buf_sz, 16);
    117   if(!tile) goto error;
    118   ref_init(&tile->ref);
    119   list_init(&tile->node);
    120   tile->allocator = allocator;
    121   tile->data.pixsz = pixel_size;
    122   tile->data.pixal = pixel_alignment;
    123   tile->data.x = 0;
    124   tile->data.y = 0;
    125   CHK(IS_ALIGNED(tile->data.pixels, pixel_alignment));
    126 
    127 exit:
    128   return tile;
    129 error:
    130   if(tile) {
    131     tile_ref_put(tile);
    132     tile = NULL;
    133   }
    134   goto exit;
    135 }
    136 
    137 static FINLINE void*
    138 tile_at
    139   (struct tile* tile,
    140    const size_t x, /* In tile space */
    141    const size_t y) /* In tile space */
    142 {
    143   ASSERT(tile && x < TILE_SIZE && y < TILE_SIZE);
    144   return (void*)(tile->data.pixels + (y*TILE_SIZE + x)*tile->data.pixsz);
    145 }
    146 
    147 static void
    148 write_tile_data
    149   (struct htrdr* htrdr,
    150    struct htrdr_buffer* buf,
    151    const struct tile_data* tile_data)
    152 {
    153   struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL;
    154   size_t icol, irow;
    155   size_t irow_tile;
    156   size_t ncols_tile, nrows_tile;
    157   size_t tile_pitch;
    158   char* buf_mem;
    159   ASSERT(htrdr && buf && tile_data);
    160   (void)htrdr;
    161 
    162   htrdr_buffer_get_layout(buf, &layout);
    163   buf_mem = htrdr_buffer_get_data(buf);
    164   ASSERT(layout.elmt_size == tile_data->pixsz);
    165 
    166   /* Compute the row/column of the tile origin into the buffer */
    167   icol = tile_data->x * (size_t)TILE_SIZE;
    168   irow = tile_data->y * (size_t)TILE_SIZE;
    169 
    170   /* Define the number of tile row/columns to write into the buffer */
    171   ncols_tile = MMIN(icol + TILE_SIZE, layout.width)  - icol;
    172   nrows_tile = MMIN(irow + TILE_SIZE, layout.height) - irow;
    173 
    174   tile_pitch = TILE_SIZE * tile_data->pixsz;
    175 
    176   /* Copy the row ordered tile data */
    177   FOR_EACH(irow_tile, 0, nrows_tile) {
    178     char* buf_row = buf_mem + (irow + irow_tile) * layout.pitch;
    179     char* dst_tile_row = buf_row + icol * layout.elmt_size;
    180     const char* src_tile_row = tile_data->pixels + irow_tile*tile_pitch;
    181 
    182     memcpy(dst_tile_row, src_tile_row, ncols_tile*tile_data->pixsz);
    183   }
    184 }
    185 
    186 static res_T
    187 mpi_gather_tiles
    188   (struct htrdr* htrdr,
    189    const struct htrdr_buffer_layout* buf_layout,
    190    struct htrdr_buffer* buf,
    191    const size_t ntiles,
    192    struct list_node* tiles)
    193 {
    194   /* Compute the size of the tile_data */
    195   size_t msg_sz = 0;
    196   struct list_node* node = NULL;
    197   struct tile* tile = NULL;
    198   res_T res = RES_OK;
    199   ASSERT(htrdr && tiles);
    200   ASSERT(htrdr_buffer_layout_check(buf_layout) == RES_OK);
    201   ASSERT(htrdr->mpi_rank != 0 || buf);
    202   (void)ntiles;
    203 
    204   /* Compute the size of the tile data */
    205   msg_sz =
    206     sizeof(struct tile_data)
    207   + TILE_SIZE*TILE_SIZE*buf_layout->elmt_size
    208   - 1/* Dummy octet of the flexible array */;
    209   ASSERT(msg_sz <= INT_MAX);
    210 
    211   if(htrdr->mpi_rank != 0) { /* Non master process */
    212     /* Send the computed tile to the master process */
    213     LIST_FOR_EACH(node, tiles) {
    214       struct tile* t = CONTAINER_OF(node, struct tile, node);
    215       MPI(Send(&t->data, (int)msg_sz, MPI_CHAR, 0,
    216         HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD));
    217     }
    218   } else { /* Master process */
    219     size_t itile = 0;
    220     ASSERT(buf);
    221 
    222 #ifndef NDEBUG
    223     {
    224       /* Check data consistency */
    225       struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL;
    226       htrdr_buffer_get_layout(buf, &layout);
    227       ASSERT(htrdr_buffer_layout_eq(&layout, buf_layout));
    228     }
    229 #endif
    230 
    231     LIST_FOR_EACH(node, tiles) {
    232       struct tile* t = CONTAINER_OF(node, struct tile, node);
    233       write_tile_data(htrdr, buf, &t->data);
    234       ++itile;
    235     }
    236 
    237     if(itile != ntiles) {
    238       ASSERT(htrdr->mpi_nprocs > 1);
    239 
    240       /* Create a temporary tile to receive the tile data computed by the
    241        * concurrent MPI processes */
    242       tile = tile_create
    243         (htrdr->allocator,
    244          buf_layout->elmt_size,
    245          buf_layout->alignment);
    246       if(!tile) {
    247         res = RES_MEM_ERR;
    248         htrdr_log_err(htrdr,
    249           "Could not allocate the temporary tile used to gather the process "
    250           "output data -- %s.\n", res_to_cstr(res));
    251         goto error;
    252       }
    253 
    254       /* Receive the tile data of the concurrent MPI processes */
    255       FOR_EACH(itile, itile, ntiles) {
    256         MPI(Recv(&tile->data, (int)msg_sz, MPI_CHAR, MPI_ANY_SOURCE,
    257           HTRDR_MPI_TILE_DATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE));
    258         write_tile_data(htrdr, buf, &tile->data);
    259       }
    260     }
    261   }
    262 
    263 exit:
    264   if(tile) tile_ref_put(tile);
    265   return res;
    266 error:
    267   goto exit;
    268 }
    269 
    270 static res_T
    271 draw_tile
    272   (struct htrdr* htrdr,
    273    const struct htrdr_draw_map_args* args,
    274    const size_t ithread,
    275    const int64_t tile_mcode, /* For debug only */
    276    const uint16_t tile_org[2], /* Origin of the tile in pixel space */
    277    const size_t tile_sz[2], /* Definition of the tile */
    278    const double pix_sz[2], /* Size of a pixel in the normalized image plane */
    279    struct ssp_rng* rng,
    280    struct tile* tile)
    281 {
    282   struct htrdr_draw_pixel_args pix_args = HTRDR_DRAW_PIXEL_ARGS_NULL;
    283   size_t npixels;
    284   size_t mcode; /* Morton code of tile pixel */
    285   ASSERT(htrdr && tile_org && tile_sz && pix_sz && rng && tile);
    286   ASSERT(check_draw_map_args(args) == RES_OK);
    287   (void)tile_mcode;
    288 
    289   /* Adjust the #pixels to process them wrt a morton order */
    290   npixels = round_up_pow2(MMAX(tile_sz[0], tile_sz[1]));
    291   npixels *= npixels;
    292 
    293   /* Setup the shared pixel arguments */
    294   pix_args.pixel_normalized_size[0] = pix_sz[0];
    295   pix_args.pixel_normalized_size[1] = pix_sz[1];
    296   pix_args.rng = rng;
    297   pix_args.spp = args->spp;
    298   pix_args.ithread = ithread;
    299   pix_args.context = args->context;
    300 
    301   FOR_EACH(mcode, 0, npixels) {
    302     void* pixel = NULL;
    303     uint16_t ipix_tile[2]; /* Pixel coord in the tile */
    304     ASSERT(mcode <= UINT32_MAX);
    305 
    306     morton_xy_decode_u16((uint32_t)mcode, ipix_tile);
    307     if(ipix_tile[0] >= tile_sz[0] || ipix_tile[1] >= tile_sz[1])
    308       continue; /* Pixel is out of tile */
    309 
    310     /* Fetch the pixel */
    311     pixel = tile_at(tile, ipix_tile[0], ipix_tile[1]);
    312 
    313     /* Compute the pixel coordinate */
    314     pix_args.pixel_coord[0] = (size_t)(tile_org[0] + ipix_tile[0]);
    315     pix_args.pixel_coord[1] = (size_t)(tile_org[1] + ipix_tile[1]);
    316 
    317     /* Invoque the draw pixel functor */
    318     args->draw_pixel(htrdr, &pix_args, pixel);
    319   }
    320   return RES_OK;
    321 }
    322 
    323 static res_T
    324 draw_map
    325   (struct htrdr* htrdr,
    326    const struct htrdr_draw_map_args* args,
    327    const size_t ntiles_x,
    328    const size_t ntiles_y,
    329    const size_t ntiles_adjusted,
    330    const double pix_sz[2], /* Pixel size in the normalized image plane */
    331    struct proc_work* work,
    332    struct list_node* tiles)
    333 {
    334   struct ssp_rng* rng_proc = NULL;
    335   size_t nthreads = 0;
    336   size_t nthieves = 0;
    337   size_t proc_ntiles = 0;
    338   ATOMIC nsolved_tiles = 0;
    339   ATOMIC res = RES_OK;
    340 
    341   /* Pre conditions */
    342   ASSERT(check_draw_map_args(args) == RES_OK);
    343   ASSERT(htrdr && work && tiles);
    344   ASSERT(ntiles_x && ntiles_y && ntiles_adjusted >= ntiles_x*ntiles_y);
    345   ASSERT(pix_sz && pix_sz[0] > 0 && pix_sz[1] > 0);
    346 
    347   /* Avoid the "unused variable" warning */
    348   (void)ntiles_x, (void)ntiles_y;
    349 
    350   res = ssp_rng_create(htrdr->allocator, SSP_RNG_MT19937_64, &rng_proc);
    351   if(res != RES_OK) {
    352     htrdr_log_err(htrdr, "Could not create the RNG used to sample a process "
    353       "to steal -- %s.\n", res_to_cstr((res_T)res));
    354     goto error;
    355   }
    356 
    357   proc_ntiles = proc_work_get_nchunks(work);
    358   nthreads = MMIN(htrdr->nthreads, proc_ntiles);
    359 
    360   /* The process is not considered as a working process for himself */
    361   htrdr->mpi_working_procs[htrdr->mpi_rank] = 0;
    362   --htrdr->mpi_nworking_procs;
    363 
    364   omp_set_num_threads((int)nthreads);
    365   #pragma omp parallel
    366   for(;;) {
    367     const int ithread = omp_get_thread_num();
    368     struct ssp_rng_proxy_create2_args proxy_create2_args =
    369       SSP_RNG_PROXY_CREATE2_ARGS_NULL;
    370     struct ssp_rng_proxy* rng_proxy = NULL;
    371     struct ssp_rng* rng;
    372     struct tile* tile;
    373     uint64_t mcode = CHUNK_ID_NULL;
    374     uint16_t tile_org[2];
    375     size_t tile_sz[2];
    376     size_t n;
    377     res_T res_local = RES_OK;
    378     int32_t pcent;
    379 
    380     /* Get a tile to draw */
    381     #pragma omp critical
    382     {
    383       mcode = proc_work_get_chunk(work);
    384       if(mcode == CHUNK_ID_NULL) { /* No more work on this process */
    385         /* Try to steal works to concurrent processes */
    386         proc_work_reset(work);
    387         nthieves = mpi_steal_work(htrdr, rng_proc, work);
    388         if(nthieves != 0) {
    389           mcode = proc_work_get_chunk(work);
    390         }
    391       }
    392     }
    393     if(mcode == CHUNK_ID_NULL) break; /* No more work */
    394     ASSERT(mcode <= UINT32_MAX);
    395 
    396     /* Decode the morton code to retrieve the tile index  */
    397     morton_xy_decode_u16((uint32_t)mcode, tile_org);
    398     ASSERT(tile_org[0] < ntiles_x && tile_org[1] < ntiles_y);
    399 
    400     /* Create the tile */
    401     tile = tile_create
    402       (htrdr->allocator,
    403        args->buffer_layout.elmt_size,
    404        args->buffer_layout.alignment);
    405     if(!tile) {
    406       ATOMIC_SET(&res, RES_MEM_ERR);
    407       htrdr_log_err(htrdr,
    408         "could not allocate the memory space of the tile (%lu, %lu) -- %s.\n",
    409         (unsigned long)tile_org[0], (unsigned long)tile_org[1],
    410          res_to_cstr((res_T)ATOMIC_GET(&res)));
    411       break;
    412     }
    413 
    414     /* Register the tile */
    415     #pragma omp critical
    416     list_add_tail(tiles, &tile->node);
    417 
    418     tile->data.x = (uint16_t)tile_org[0];
    419     tile->data.y = (uint16_t)tile_org[1];
    420 
    421     /* Define the tile origin in pixel space */
    422     tile_org[0] = (uint16_t)(tile_org[0] * TILE_SIZE);
    423     tile_org[1] = (uint16_t)(tile_org[1] * TILE_SIZE);
    424 
    425     /* Compute the size of the tile clamped by the borders of the buffer */
    426     tile_sz[0] = MMIN(TILE_SIZE, args->buffer_layout.width - tile_org[0]);
    427     tile_sz[1] = MMIN(TILE_SIZE, args->buffer_layout.height - tile_org[1]);
    428 
    429     /* Create a proxy RNG for the current tile. This proxy is used for the
    430      * current thread only and thus it has to manage only one RNG. This proxy
    431      * is initialised in order to ensure that an unique and predictable set of
    432      * random numbers is used for the current tile. */
    433     proxy_create2_args.type = SSP_RNG_THREEFRY;
    434     proxy_create2_args.sequence_offset = RNG_SEQUENCE_SIZE * (size_t)mcode;
    435     proxy_create2_args.sequence_size = RNG_SEQUENCE_SIZE;
    436     proxy_create2_args.sequence_pitch = RNG_SEQUENCE_SIZE * (size_t)ntiles_adjusted;
    437     proxy_create2_args.nbuckets = 1;
    438     SSP(rng_proxy_create2
    439       (htrdr_get_thread_allocator(htrdr, (size_t)ithread),
    440        &proxy_create2_args,
    441        &rng_proxy));
    442     SSP(rng_proxy_create_rng(rng_proxy, 0, &rng));
    443 
    444     /* Launch the tile rendering */
    445     res_local = draw_tile(htrdr, args, (size_t)ithread, (uint32_t)mcode,
    446       tile_org, tile_sz, pix_sz, rng, tile);
    447 
    448     SSP(rng_proxy_ref_put(rng_proxy));
    449     SSP(rng_ref_put(rng));
    450 
    451     if(res_local != RES_OK) {
    452       ATOMIC_SET(&res, res_local);
    453       break;
    454     }
    455 
    456     /* Update the progress status */
    457     n = (size_t)ATOMIC_INCR(&nsolved_tiles);
    458     pcent = (int32_t)((double)n * 100.0 / (double)proc_ntiles + 0.5/*round*/);
    459 
    460     #pragma omp critical
    461     if(pcent > htrdr->mpi_progress_render[0]) {
    462       htrdr->mpi_progress_render[0] = pcent;
    463       if(htrdr->mpi_rank == 0) {
    464         update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
    465       } else { /* Send the progress percentage to the master process */
    466         send_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING, pcent);
    467       }
    468     }
    469   }
    470 
    471   if(ATOMIC_GET(&res) != RES_OK) goto error;
    472 
    473   /* Asynchronously wait for processes completion. Use an asynchronous barrier to
    474    * avoid a dead lock with the `mpi_probe_thieves' thread that requires also
    475    * the `mpi_mutex'. */
    476   {
    477     MPI_Request req;
    478 
    479     mutex_lock(htrdr->mpi_mutex);
    480     MPI(Ibarrier(MPI_COMM_WORLD, &req));
    481     mutex_unlock(htrdr->mpi_mutex);
    482 
    483     mpi_wait_for_request(htrdr, &req);
    484   }
    485 
    486 exit:
    487   if(rng_proc) SSP(rng_ref_put(rng_proc));
    488   return (res_T)res;
    489 error:
    490   goto exit;
    491 }
    492 
    493 /*******************************************************************************
    494  * Local functions
    495  ******************************************************************************/
    496 res_T
    497 htrdr_draw_map
    498   (struct htrdr* htrdr,
    499    const struct htrdr_draw_map_args* args,
    500    struct htrdr_buffer* buf)
    501 {
    502   char strbuf[128];
    503   struct time t0, t1;
    504   struct list_node tiles;
    505   size_t ntiles_x, ntiles_y, ntiles, ntiles_adjusted;
    506   size_t itile;
    507   struct proc_work work;
    508   size_t proc_ntiles_adjusted;
    509   size_t remaining_tiles;
    510   double pix_sz[2];
    511 
    512   ATOMIC probe_thieves = 1;
    513   ATOMIC res = RES_OK;
    514   ASSERT(htrdr && check_draw_map_args(args) == RES_OK);
    515   ASSERT(htrdr->mpi_rank != 0 || buf);
    516 
    517 #ifndef NDEBUG
    518   if(htrdr_get_mpi_rank(htrdr) == 0) {
    519     /* Check data consistency */
    520     struct htrdr_buffer_layout layout = HTRDR_BUFFER_LAYOUT_NULL;
    521     htrdr_buffer_get_layout(buf, &layout);
    522     ASSERT(htrdr_buffer_layout_eq(&layout, &args->buffer_layout));
    523   }
    524 #endif
    525 
    526   list_init(&tiles);
    527   proc_work_init(htrdr->allocator, &work);
    528 
    529   /* Compute the overall number of tiles */
    530   ntiles_x = (args->buffer_layout.width + (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
    531   ntiles_y = (args->buffer_layout.height+ (TILE_SIZE-1)/*ceil*/)/TILE_SIZE;
    532   ntiles = ntiles_x * ntiles_y;
    533 
    534   /* Compute the pixel size in the normalized image plane */
    535   pix_sz[0] = 1.0 / (double)args->buffer_layout.width;
    536   pix_sz[1] = 1.0 / (double)args->buffer_layout.height;
    537 
    538   /* Adjust the #tiles for the morton-encoding procedure */
    539   ntiles_adjusted = round_up_pow2(MMAX(ntiles_x, ntiles_y));
    540   ntiles_adjusted *= ntiles_adjusted;
    541 
    542   /* Define the initial number of tiles of the current process */
    543   proc_ntiles_adjusted = ntiles_adjusted / (size_t)htrdr->mpi_nprocs;
    544 
    545   remaining_tiles =
    546     ntiles_adjusted - proc_ntiles_adjusted*(size_t)htrdr->mpi_nprocs;
    547 
    548   /* Distribute the remaining tiles among the processes. Each process whose
    549    * rank is lower than the number of remaining tiles takes an additional
    550    * tile  */
    551   if((size_t)htrdr->mpi_rank < remaining_tiles) {
    552     ++proc_ntiles_adjusted;
    553   }
    554 
    555   /* Define the initial list of tiles of the process */
    556   FOR_EACH(itile, 0, proc_ntiles_adjusted) {
    557     uint16_t tile_org[2];
    558     const uint32_t mcode =
    559       (uint32_t)itile*(uint32_t)htrdr->mpi_nprocs + (uint32_t)htrdr->mpi_rank;
    560 
    561     morton_xy_decode_u16(mcode, tile_org);
    562     if(tile_org[0] >= ntiles_x || tile_org[1] >= ntiles_y) continue;
    563     proc_work_add_chunk(&work, mcode);
    564   }
    565 
    566   /* On the master process, request and print the progress report, since the
    567    * other processes have been able to start the calculation */
    568   if(htrdr->mpi_rank == 0) {
    569     fetch_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
    570     print_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
    571   }
    572 
    573   time_current(&t0);
    574 
    575   omp_set_nested(1); /* Enable nested threads for draw_image */
    576   #pragma omp parallel sections num_threads(2)
    577   {
    578     #pragma omp section
    579     mpi_probe_thieves(htrdr, &work, &probe_thieves);
    580 
    581     #pragma omp section
    582     {
    583       draw_map(htrdr, args, ntiles_x, ntiles_y, ntiles_adjusted, pix_sz, &work,
    584         &tiles);
    585       /* The processes have no more work to do. Stop probing for thieves */
    586       ATOMIC_SET(&probe_thieves, 0);
    587     }
    588   }
    589 
    590   if(htrdr->mpi_rank == 0) {
    591     update_mpi_progress(htrdr, HTRDR_MPI_PROGRESS_RENDERING);
    592     htrdr_log(htrdr, "\n"); /* Add a new line after the progress statuses */
    593   }
    594 
    595   time_sub(&t0, time_current(&t1), &t0);
    596   time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf));
    597   htrdr_log(htrdr, "Rendering time: %s\n", strbuf);
    598 
    599   /* Gather tiles to master process */
    600   time_current(&t0);
    601   res = mpi_gather_tiles(htrdr, &args->buffer_layout, buf, ntiles, &tiles);
    602   if(res != RES_OK) goto error;
    603   time_sub(&t0, time_current(&t1), &t0);
    604   time_dump(&t0, TIME_ALL, NULL, strbuf, sizeof(strbuf));
    605   htrdr_log(htrdr, "Image gathering time: %s\n", strbuf);
    606 
    607 exit:
    608   { /* Free allocated tiles */
    609     struct list_node* node;
    610     struct list_node* tmp;
    611     LIST_FOR_EACH_SAFE(node, tmp, &tiles) {
    612       struct tile* tile = CONTAINER_OF(node, struct tile, node);
    613       list_del(node);
    614       tile_ref_put(tile);
    615     }
    616   }
    617   proc_work_release(&work);
    618   return (res_T)res;
    619 error:
    620   goto exit;
    621 }
	htrdr Solving radiative transfer in heterogeneous media
	git clone git://git.meso-star.fr/htrdr.git
	Log \| Files \| Refs \| README \| LICENSE