atrstm

Load and structure a combustion gas mixture
git clone git://git.meso-star.fr/atrstm.git
Log | Files | Refs | README | LICENSE

atrstm_radcoefs_simd4.h (7808B)


      1 /* Copyright (C) 2022, 2023 |Méso|Star> (contact@meso-star.com)
      2  * Copyright (C) 2020, 2021 Centre National de la Recherche Scientifique
      3  *
      4  * This program is free software: you can redistribute it and/or modify
      5  * it under the terms of the GNU General Public License as published by
      6  * the Free Software Foundation, either version 3 of the License, or
      7  * (at your option) any later version.
      8  *
      9  * This program is distributed in the hope that it will be useful,
     10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     12  * GNU General Public License for more details.
     13  *
     14  * You should have received a copy of the GNU General Public License
     15  * along with this program. If not, see <http://www.gnu.org/licenses/>. */
     16 
     17 #ifndef ATRSTM_RADCOEFS_SIMD4_H
     18 #define ATRSTM_RADCOEFS_SIMD4_H
     19 
     20 #include "atrstm.h"
     21 #include "atrstm_radcoefs.h"
     22 #include "atrstm_rdgfa_simd4.h"
     23 
     24 #include <rsimd/math.h>
     25 #include <rsimd/rsimd.h>
     26 
     27 #include <rsys/math.h>
     28 #include <rsys/rsys.h>
     29 
     30 /* Radiative coefficients in m^-1 */
     31 struct radcoefs_simd4 {
     32   v4f_T ka; /* Absorption coefficient */
     33   v4f_T ks; /* Scattering coefficient */
     34   v4f_T kext; /* Extinction coefficient */
     35 };
     36 static const struct radcoefs_simd4 RADCOEFS_SIMD4_NULL;
     37 
     38 struct radcoefs_compute_simd4_args {
     39   v4f_T lambda; /* wavelength [nm] */
     40   v4f_T n; /* Refractive index (real component) */
     41   v4f_T kappa; /* Refractive index (imaginary component) */
     42   v4f_T fractal_prefactor;
     43   v4f_T fractal_dimension;
     44   v4f_T soot_volumic_fraction; /* In m^3(soot) / m^3 */
     45   v4f_T soot_primary_particles_count;
     46   v4f_T soot_primary_particles_diameter; /* In nm */
     47   int radcoefs_mask; /* Combination of atrstm_radcoef_flag */
     48 };
     49 static const struct radcoefs_compute_simd4_args RADCOEFS_COMPUTE_SIMD4_ARGS_NULL;
     50 
     51 /* Forward declarations */
     52 struct atrstm;
     53 struct atrri_refractive_index;
     54 struct suvm_primitive;
     55 
     56 extern LOCAL_SYM res_T
     57 fetch_radcoefs_simd4
     58   (const struct atrstm* atrstm,
     59    const struct atrstm_fetch_radcoefs_args* args,
     60    atrstm_radcoefs_T radcoefs);
     61 
     62 extern LOCAL_SYM res_T
     63 primitive_compute_radcoefs_simd4
     64   (const struct atrstm* atrstm,
     65    const struct atrri_refractive_index* refract_id,
     66    const struct suvm_primitive* prim,
     67    const int radcoefs_mask,
     68    struct radcoefs_simd4* radcoefs); /* Per node radiative coefficients */
     69 
     70 static INLINE res_T
     71 primitive_compute_radcoefs_range_simd4
     72   (const struct atrstm* atrstm,
     73    const struct atrri_refractive_index* refract_id,
     74    const struct suvm_primitive* prim,
     75    struct radcoefs* radcoefs_min,
     76    struct radcoefs* radcoefs_max)
     77 {
     78   struct radcoefs_simd4 radcoefs;
     79   v4f_T ka[2];
     80   v4f_T ks[2];
     81   v4f_T kext[2];
     82   res_T res = RES_OK;
     83   ASSERT(radcoefs_min && radcoefs_max);
     84 
     85   res = primitive_compute_radcoefs_simd4
     86     (atrstm, refract_id, prim, ATRSTM_RADCOEFS_MASK_ALL, &radcoefs);
     87   if(res != RES_OK) return res;
     88 
     89   ka[0] = v4f_reduce_min(radcoefs.ka);
     90   ka[1] = v4f_reduce_max(radcoefs.ka);
     91   ks[0] = v4f_reduce_min(radcoefs.ks);
     92   ks[1] = v4f_reduce_max(radcoefs.ks);
     93   kext[0] = v4f_reduce_min(radcoefs.kext);
     94   kext[1] = v4f_reduce_max(radcoefs.kext);
     95 
     96   radcoefs_min->ka = v4f_x(ka[0]);
     97   radcoefs_max->ka = v4f_x(ka[1]);
     98   radcoefs_min->ks = v4f_x(ks[0]);
     99   radcoefs_max->ks = v4f_x(ks[1]);
    100   radcoefs_min->kext = v4f_x(kext[0]);
    101   radcoefs_max->kext = v4f_x(kext[1]);
    102 
    103   return RES_OK;
    104 }
    105 
    106 /* SIMD version of the radcoefs_compute function. Refer to its scalar version
    107  * for informations of what it does */
    108 static INLINE void
    109 radcoefs_compute_simd4
    110   (struct radcoefs_simd4* radcoefs,
    111    const struct radcoefs_compute_simd4_args* args)
    112 {
    113   /* Constant */
    114   const v4f_T pi = v4f_set1((float)PI);
    115 
    116   /* Input variables */
    117   const v4f_T lambda = args->lambda; /* [nm] */
    118   const v4f_T n = args->n;
    119   const v4f_T kappa = args->kappa;
    120   const v4f_T Fv = args->soot_volumic_fraction; /* [nm^3(soot)/nm] */
    121   const v4f_T Np = args->soot_primary_particles_count;
    122   const v4f_T Dp = args->soot_primary_particles_diameter; /* [nm] */
    123   const v4f_T kf = args->fractal_prefactor;
    124   const v4f_T Df = args->fractal_dimension;
    125   const int ka = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Ka) != 0;
    126   const int ks = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Ks) != 0;
    127   const int kext = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Kext) != 0;
    128 
    129   /* SIMD mask */
    130   const v4f_T is_Fv_not_null = v4f_neq(Fv, v4f_zero());
    131   const v4f_T is_Np_not_null = v4f_neq(Np, v4f_zero());
    132   const v4f_T query_radcoefs =
    133     v4f_mask1(args->radcoefs_mask & ATRSTM_RADCOEFS_MASK_ALL ? ~0 : 0);
    134   const v4f_T mask =
    135     v4f_and(v4f_and(is_Fv_not_null, is_Np_not_null), query_radcoefs);
    136 
    137   /* Clean up radiative coefficients */
    138   radcoefs->ka = v4f_zero();
    139   radcoefs->ks = v4f_zero();
    140   radcoefs->kext = v4f_zero();
    141 
    142   if(v4f_movemask(mask) != 0) {
    143     /* Precomputed values */
    144     const v4f_T n2 = v4f_mul(n, n);
    145     const v4f_T kappa2 = v4f_mul(kappa, kappa);
    146     const v4f_T four_n2_kappa2 = v4f_mul(v4f_set1(4), v4f_mul(n2, kappa2));
    147     const v4f_T xp = v4f_div(v4f_mul(pi, Dp), lambda);
    148     const v4f_T xp3 = v4f_mul(v4f_mul(xp, xp), xp);
    149     const v4f_T k = v4f_div(v4f_mul(v4f_set1(2), pi), lambda);
    150     const v4f_T k2 = v4f_mul(k, k);
    151 
    152     const v4f_T Dp3 = v4f_mul(v4f_mul(Dp, Dp), Dp);
    153     const v4f_T Np_pi_Dp3 = v4f_mul(v4f_mul(Np, pi), Dp3);
    154     const v4f_T Va = v4f_div(Np_pi_Dp3, v4f_set1(6)); /* [nm^3] */
    155     const v4f_T rho = v4f_div(Fv, Va); /* [#aggregates / nm^3] */
    156     const v4f_T tmp0 = v4f_mul(v4f_set1(1e9f), rho);
    157 
    158     /* E(m), m = n + i*kappa */
    159     const v4f_T tmp1 = v4f_add(v4f_sub(n2, kappa2), v4f_set1(2));
    160     const v4f_T denom = v4f_madd(tmp1, tmp1, four_n2_kappa2);
    161     const v4f_T six_n_kappa = v4f_mul(v4f_mul(v4f_set1(6), n), kappa);
    162     const v4f_T Em = v4f_div(six_n_kappa, denom);
    163 
    164     if(ka || kext) {
    165       /* Absorption cross section, [nm^3/aggrefate] */
    166       const v4f_T four_pi_xp3 = v4f_mul(v4f_mul(v4f_set1(4), pi), xp3);
    167       const v4f_T Cabs = v4f_mul(v4f_mul(Np, v4f_div(four_pi_xp3, k2)), Em);
    168 
    169       /* Absorption coefficient, [m^-1] */
    170       radcoefs->ka = v4f_and(v4f_mul(tmp0, Cabs), mask);
    171     }
    172 
    173     if(ks || kext) {
    174       /* F(m), m = n + i*kappa */
    175       const v4f_T n2_sub_kappa2 = v4f_sub(n2, kappa2);
    176       const v4f_T n2_sub_kappa2_sub_1 = v4f_sub(n2_sub_kappa2, v4f_set1(1));
    177       const v4f_T n2_sub_kappa2_add_2 = v4f_add(n2_sub_kappa2, v4f_set1(2));
    178       const v4f_T tmp2 = v4f_mul(n2_sub_kappa2_sub_1, n2_sub_kappa2_add_2);
    179       const v4f_T tmp3 = v4f_div(v4f_add(tmp2, four_n2_kappa2), denom);
    180       const v4f_T Fm = v4f_madd(tmp3, tmp3, v4f_mul(Em, Em));
    181 
    182       /* Gyration radius */
    183       const v4f_T Rg = compute_gyration_radius_simd4(kf, Df, Np, Dp); /* [nm] */
    184       const v4f_T Rg2 = v4f_mul(Rg, Rg);
    185       const v4f_T four_k2_Rg2 = v4f_mul(v4f_mul(v4f_set1(4), k2), Rg2);
    186       const v4f_T tmp4 = v4f_div(four_k2_Rg2, v4f_mul(v4f_set1(3), Df));
    187       const v4f_T tmp5 = v4f_add(v4f_set1(1), tmp4);
    188       const v4f_T g = v4f_pow(tmp5, v4f_mul(Df, v4f_set1(-0.5f)));
    189 
    190       /* Scattering cross section, [nm^3/aggrefate] */
    191       const v4f_T Np2 = v4f_mul(Np, Np);
    192       const v4f_T xp6 = v4f_mul(xp3, xp3);
    193       const v4f_T eight_pi_xp6 = v4f_mul(v4f_mul(v4f_set1(8), pi), xp6);
    194       const v4f_T tmp7 = v4f_div(eight_pi_xp6, v4f_mul(v4f_set1(3), k2));
    195       const v4f_T Csca = v4f_mul(v4f_mul(v4f_mul(Np2, tmp7), Fm), g);
    196 
    197       /* Scattering coefficient, [m^-1] */
    198       radcoefs->ks = v4f_and(v4f_mul(tmp0, Csca), mask);
    199     }
    200 
    201     if(kext) {
    202       radcoefs->kext = v4f_add(radcoefs->ka, radcoefs->ks);
    203     }
    204 
    205     ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(denom, v4f_zero())));
    206     ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(Df, v4f_zero())));
    207     ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(Dp, v4f_zero())));
    208   }
    209 }
    210 
    211 #endif /* ATRSTM_RADCOEFS_SIMD4_H */