atrstm_radcoefs_simd4.h (7808B)
1 /* Copyright (C) 2022, 2023 |Méso|Star> (contact@meso-star.com) 2 * Copyright (C) 2020, 2021 Centre National de la Recherche Scientifique 3 * 4 * This program is free software: you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation, either version 3 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17 #ifndef ATRSTM_RADCOEFS_SIMD4_H 18 #define ATRSTM_RADCOEFS_SIMD4_H 19 20 #include "atrstm.h" 21 #include "atrstm_radcoefs.h" 22 #include "atrstm_rdgfa_simd4.h" 23 24 #include <rsimd/math.h> 25 #include <rsimd/rsimd.h> 26 27 #include <rsys/math.h> 28 #include <rsys/rsys.h> 29 30 /* Radiative coefficients in m^-1 */ 31 struct radcoefs_simd4 { 32 v4f_T ka; /* Absorption coefficient */ 33 v4f_T ks; /* Scattering coefficient */ 34 v4f_T kext; /* Extinction coefficient */ 35 }; 36 static const struct radcoefs_simd4 RADCOEFS_SIMD4_NULL; 37 38 struct radcoefs_compute_simd4_args { 39 v4f_T lambda; /* wavelength [nm] */ 40 v4f_T n; /* Refractive index (real component) */ 41 v4f_T kappa; /* Refractive index (imaginary component) */ 42 v4f_T fractal_prefactor; 43 v4f_T fractal_dimension; 44 v4f_T soot_volumic_fraction; /* In m^3(soot) / m^3 */ 45 v4f_T soot_primary_particles_count; 46 v4f_T soot_primary_particles_diameter; /* In nm */ 47 int radcoefs_mask; /* Combination of atrstm_radcoef_flag */ 48 }; 49 static const struct radcoefs_compute_simd4_args RADCOEFS_COMPUTE_SIMD4_ARGS_NULL; 50 51 /* Forward declarations */ 52 struct atrstm; 53 struct atrri_refractive_index; 54 struct suvm_primitive; 55 56 extern LOCAL_SYM res_T 57 fetch_radcoefs_simd4 58 (const struct atrstm* atrstm, 59 const struct atrstm_fetch_radcoefs_args* args, 60 atrstm_radcoefs_T radcoefs); 61 62 extern LOCAL_SYM res_T 63 primitive_compute_radcoefs_simd4 64 (const struct atrstm* atrstm, 65 const struct atrri_refractive_index* refract_id, 66 const struct suvm_primitive* prim, 67 const int radcoefs_mask, 68 struct radcoefs_simd4* radcoefs); /* Per node radiative coefficients */ 69 70 static INLINE res_T 71 primitive_compute_radcoefs_range_simd4 72 (const struct atrstm* atrstm, 73 const struct atrri_refractive_index* refract_id, 74 const struct suvm_primitive* prim, 75 struct radcoefs* radcoefs_min, 76 struct radcoefs* radcoefs_max) 77 { 78 struct radcoefs_simd4 radcoefs; 79 v4f_T ka[2]; 80 v4f_T ks[2]; 81 v4f_T kext[2]; 82 res_T res = RES_OK; 83 ASSERT(radcoefs_min && radcoefs_max); 84 85 res = primitive_compute_radcoefs_simd4 86 (atrstm, refract_id, prim, ATRSTM_RADCOEFS_MASK_ALL, &radcoefs); 87 if(res != RES_OK) return res; 88 89 ka[0] = v4f_reduce_min(radcoefs.ka); 90 ka[1] = v4f_reduce_max(radcoefs.ka); 91 ks[0] = v4f_reduce_min(radcoefs.ks); 92 ks[1] = v4f_reduce_max(radcoefs.ks); 93 kext[0] = v4f_reduce_min(radcoefs.kext); 94 kext[1] = v4f_reduce_max(radcoefs.kext); 95 96 radcoefs_min->ka = v4f_x(ka[0]); 97 radcoefs_max->ka = v4f_x(ka[1]); 98 radcoefs_min->ks = v4f_x(ks[0]); 99 radcoefs_max->ks = v4f_x(ks[1]); 100 radcoefs_min->kext = v4f_x(kext[0]); 101 radcoefs_max->kext = v4f_x(kext[1]); 102 103 return RES_OK; 104 } 105 106 /* SIMD version of the radcoefs_compute function. Refer to its scalar version 107 * for informations of what it does */ 108 static INLINE void 109 radcoefs_compute_simd4 110 (struct radcoefs_simd4* radcoefs, 111 const struct radcoefs_compute_simd4_args* args) 112 { 113 /* Constant */ 114 const v4f_T pi = v4f_set1((float)PI); 115 116 /* Input variables */ 117 const v4f_T lambda = args->lambda; /* [nm] */ 118 const v4f_T n = args->n; 119 const v4f_T kappa = args->kappa; 120 const v4f_T Fv = args->soot_volumic_fraction; /* [nm^3(soot)/nm] */ 121 const v4f_T Np = args->soot_primary_particles_count; 122 const v4f_T Dp = args->soot_primary_particles_diameter; /* [nm] */ 123 const v4f_T kf = args->fractal_prefactor; 124 const v4f_T Df = args->fractal_dimension; 125 const int ka = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Ka) != 0; 126 const int ks = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Ks) != 0; 127 const int kext = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Kext) != 0; 128 129 /* SIMD mask */ 130 const v4f_T is_Fv_not_null = v4f_neq(Fv, v4f_zero()); 131 const v4f_T is_Np_not_null = v4f_neq(Np, v4f_zero()); 132 const v4f_T query_radcoefs = 133 v4f_mask1(args->radcoefs_mask & ATRSTM_RADCOEFS_MASK_ALL ? ~0 : 0); 134 const v4f_T mask = 135 v4f_and(v4f_and(is_Fv_not_null, is_Np_not_null), query_radcoefs); 136 137 /* Clean up radiative coefficients */ 138 radcoefs->ka = v4f_zero(); 139 radcoefs->ks = v4f_zero(); 140 radcoefs->kext = v4f_zero(); 141 142 if(v4f_movemask(mask) != 0) { 143 /* Precomputed values */ 144 const v4f_T n2 = v4f_mul(n, n); 145 const v4f_T kappa2 = v4f_mul(kappa, kappa); 146 const v4f_T four_n2_kappa2 = v4f_mul(v4f_set1(4), v4f_mul(n2, kappa2)); 147 const v4f_T xp = v4f_div(v4f_mul(pi, Dp), lambda); 148 const v4f_T xp3 = v4f_mul(v4f_mul(xp, xp), xp); 149 const v4f_T k = v4f_div(v4f_mul(v4f_set1(2), pi), lambda); 150 const v4f_T k2 = v4f_mul(k, k); 151 152 const v4f_T Dp3 = v4f_mul(v4f_mul(Dp, Dp), Dp); 153 const v4f_T Np_pi_Dp3 = v4f_mul(v4f_mul(Np, pi), Dp3); 154 const v4f_T Va = v4f_div(Np_pi_Dp3, v4f_set1(6)); /* [nm^3] */ 155 const v4f_T rho = v4f_div(Fv, Va); /* [#aggregates / nm^3] */ 156 const v4f_T tmp0 = v4f_mul(v4f_set1(1e9f), rho); 157 158 /* E(m), m = n + i*kappa */ 159 const v4f_T tmp1 = v4f_add(v4f_sub(n2, kappa2), v4f_set1(2)); 160 const v4f_T denom = v4f_madd(tmp1, tmp1, four_n2_kappa2); 161 const v4f_T six_n_kappa = v4f_mul(v4f_mul(v4f_set1(6), n), kappa); 162 const v4f_T Em = v4f_div(six_n_kappa, denom); 163 164 if(ka || kext) { 165 /* Absorption cross section, [nm^3/aggrefate] */ 166 const v4f_T four_pi_xp3 = v4f_mul(v4f_mul(v4f_set1(4), pi), xp3); 167 const v4f_T Cabs = v4f_mul(v4f_mul(Np, v4f_div(four_pi_xp3, k2)), Em); 168 169 /* Absorption coefficient, [m^-1] */ 170 radcoefs->ka = v4f_and(v4f_mul(tmp0, Cabs), mask); 171 } 172 173 if(ks || kext) { 174 /* F(m), m = n + i*kappa */ 175 const v4f_T n2_sub_kappa2 = v4f_sub(n2, kappa2); 176 const v4f_T n2_sub_kappa2_sub_1 = v4f_sub(n2_sub_kappa2, v4f_set1(1)); 177 const v4f_T n2_sub_kappa2_add_2 = v4f_add(n2_sub_kappa2, v4f_set1(2)); 178 const v4f_T tmp2 = v4f_mul(n2_sub_kappa2_sub_1, n2_sub_kappa2_add_2); 179 const v4f_T tmp3 = v4f_div(v4f_add(tmp2, four_n2_kappa2), denom); 180 const v4f_T Fm = v4f_madd(tmp3, tmp3, v4f_mul(Em, Em)); 181 182 /* Gyration radius */ 183 const v4f_T Rg = compute_gyration_radius_simd4(kf, Df, Np, Dp); /* [nm] */ 184 const v4f_T Rg2 = v4f_mul(Rg, Rg); 185 const v4f_T four_k2_Rg2 = v4f_mul(v4f_mul(v4f_set1(4), k2), Rg2); 186 const v4f_T tmp4 = v4f_div(four_k2_Rg2, v4f_mul(v4f_set1(3), Df)); 187 const v4f_T tmp5 = v4f_add(v4f_set1(1), tmp4); 188 const v4f_T g = v4f_pow(tmp5, v4f_mul(Df, v4f_set1(-0.5f))); 189 190 /* Scattering cross section, [nm^3/aggrefate] */ 191 const v4f_T Np2 = v4f_mul(Np, Np); 192 const v4f_T xp6 = v4f_mul(xp3, xp3); 193 const v4f_T eight_pi_xp6 = v4f_mul(v4f_mul(v4f_set1(8), pi), xp6); 194 const v4f_T tmp7 = v4f_div(eight_pi_xp6, v4f_mul(v4f_set1(3), k2)); 195 const v4f_T Csca = v4f_mul(v4f_mul(v4f_mul(Np2, tmp7), Fm), g); 196 197 /* Scattering coefficient, [m^-1] */ 198 radcoefs->ks = v4f_and(v4f_mul(tmp0, Csca), mask); 199 } 200 201 if(kext) { 202 radcoefs->kext = v4f_add(radcoefs->ka, radcoefs->ks); 203 } 204 205 ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(denom, v4f_zero()))); 206 ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(Df, v4f_zero()))); 207 ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(Dp, v4f_zero()))); 208 } 209 } 210 211 #endif /* ATRSTM_RADCOEFS_SIMD4_H */