atrstm

Load and structure a combustion gas mixture
git clone git://git.meso-star.fr/atrstm.git
Log | Files | Refs | README | LICENSE

commit 16f9712ee6acaa269a691992046fdf3c13a5e41a
parent 477fb701f096b706c222a18958146814be6d5a8b
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Tue,  2 Feb 2021 09:02:33 +0100

Merge branch 'feature_simd'

Diffstat:
Mcmake/CMakeLists.txt | 35++++++++++++++++++++++++++++++++---
Asrc/atrstm_radcoefs_simd4.c | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/atrstm_radcoefs_simd4.h | 204+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/atrstm_setup_octrees.c | 9+++++++++
Msrc/test_atrstm_radcoefs.c | 17++++++++++++++++-
Asrc/test_atrstm_radcoefs_simd.c | 151++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 493 insertions(+), 4 deletions(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2020 CNRS +# Copyright (C) 2020, 2021 CNRS # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.1) project(atrstm C) enable_testing() @@ -31,6 +31,16 @@ find_package(RSys 0.12 REQUIRED) find_package(StarTetraHedra 0.0 REQUIRED) find_package(StarUVM 0.0 REQUIRED) find_package(StarVX 0.2 REQUIRED) +find_package(RSIMD 0.3) +if(RSIMD_FOUND) + option(USE_SIMD "Use SIMD instruction sets" ON) +endif() + +if(USE_SIMD) + message(STATUS "Use SIMD instruction sets") +else() + message(STATUS "Do not use SIMD instruction sets") +endif() set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${RCMAKE_SOURCE_DIR}) include(rcmake) @@ -77,15 +87,31 @@ set(ATRSTM_FILES_INC_API set(ATRSTM_FILES_DOC COPYING README.md) +# SIMD files +if(USE_SIMD) + set(ATRSTM_FILES_SRC ${ATRSTM_FILES_SRC} atrstm_radcoefs_simd4.c) + set(ATRSTM_FILES_INC ${ATRSTM_FILES_INC} atrstm_radcoefs_simd4.h) +endif() + # Prepend each file in the `ATRSTM_FILES_<SRC|INC>' list by `ATRSTM_SOURCE_DIR' rcmake_prepend_path(ATRSTM_FILES_SRC ${ATRSTM_SOURCE_DIR}) rcmake_prepend_path(ATRSTM_FILES_INC ${ATRSTM_SOURCE_DIR}) rcmake_prepend_path(ATRSTM_FILES_INC_API ${ATRSTM_SOURCE_DIR}) rcmake_prepend_path(ATRSTM_FILES_DOC ${PROJECT_SOURCE_DIR}/../) -add_library(atrstm SHARED ${ATRSTM_FILES_SRC} ${ATRSTM_FILES_INC} ${ATRSTM_FILES_INC_API}) +add_library(atrstm SHARED + ${ATRSTM_FILES_SRC} + ${ATRSTM_FILES_INC} + ${ATRSTM_FILES_INC_API}) target_link_libraries(atrstm AtrRI AtrTP RSys StarTetraHedra StarUVM StarVX m) +# Setup SIMD support on the target +if(USE_SIMD) + target_link_libraries(atrstm RSIMD) + set_target_properties(atrstm PROPERTIES + COMPILE_DEFINITIONS ATRSTM_USE_SIMD) +endif() + if(CMAKE_COMPILER_IS_GNUCC) set_target_properties(atrstm PROPERTIES LINK_FLAGS "${OpenMP_C_FLAGS}") endif() @@ -115,6 +141,9 @@ if(NOT NO_TEST) build_test(test_atrstm) new_test(test_atrstm_radcoefs) + if(USE_SIMD) + new_test(test_atrstm_radcoefs_simd) + endif() endif() ################################################################################ diff --git a/src/atrstm_radcoefs_simd4.c b/src/atrstm_radcoefs_simd4.c @@ -0,0 +1,81 @@ +/* Copyright (C) 2020, 2021 CNRS + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "atrstm_c.h" +#include "atrstm_radcoefs_simd4.h" + +#include <astoria/atrtp.h> +#include <star/suvm.h> + +/******************************************************************************* + * Local functions + ******************************************************************************/ +res_T +primitive_compute_radcoefs_simd4 + (const struct atrstm* atrstm, + const struct atrri_refractive_index* refract_id, + const struct suvm_primitive* prim, + const int radcoefs_mask, /* Combination of atrstm_radcoef_flag */ + struct radcoefs_simd4* radcoefs) +{ + struct radcoefs_compute_simd4_args args = RADCOEFS_COMPUTE_SIMD4_ARGS_NULL; + struct atrtp_desc desc = ATRTP_DESC_NULL; + const double* node[4]; + res_T res = RES_OK; + ASSERT(atrstm && prim && prim->nvertices == 4 && refract_id && radcoefs); + + res = atrtp_get_desc(atrstm->atrtp, &desc); + if(res != RES_OK) goto error; + + /* Setup the constant input arguments of the optical properties computation + * routine */ + args.lambda = v4f_set1((float)refract_id->wavelength); + args.n = v4f_set1((float)refract_id->n); + args.kappa = v4f_set1((float)refract_id->kappa); + args.gyration_radius_prefactor = v4f_set1((float)atrstm->gyration_radius_prefactor); + args.fractal_dimension = v4f_set1((float)atrstm->fractal_dimension); + args.radcoefs_mask = radcoefs_mask; + + /* Fetch the thermodynamic properties of the 4 primitive nodes */ + node[0] = atrtp_desc_get_node_properties(&desc, prim->indices[0]); + node[1] = atrtp_desc_get_node_properties(&desc, prim->indices[1]); + node[2] = atrtp_desc_get_node_properties(&desc, prim->indices[2]); + node[3] = atrtp_desc_get_node_properties(&desc, prim->indices[3]); + + /* Scatter Soot properties */ + args.soot_volumic_fraction = v4f_set + ((float)node[0][ATRTP_SOOT_VOLFRAC], + (float)node[1][ATRTP_SOOT_VOLFRAC], + (float)node[2][ATRTP_SOOT_VOLFRAC], + (float)node[3][ATRTP_SOOT_VOLFRAC]); + args.soot_primary_particles_count = v4f_set + ((float)node[0][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT], + (float)node[1][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT], + (float)node[2][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT], + (float)node[3][ATRTP_SOOT_PRIMARY_PARTICLES_COUNT]); + args.soot_primary_particles_diameter = v4f_set + ((float)node[0][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER], + (float)node[1][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER], + (float)node[2][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER], + (float)node[3][ATRTP_SOOT_PRIMARY_PARTICLES_DIAMETER]); + + /* Compute the per primitive node optical properties */ + radcoefs_compute_simd4(radcoefs, &args); + +exit: + return res; +error: + goto exit; +} diff --git a/src/atrstm_radcoefs_simd4.h b/src/atrstm_radcoefs_simd4.h @@ -0,0 +1,204 @@ +/* Copyright (C) 2020, 2021 CNRS + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef ATRSTM_RADCOEFS_SIMD4_H +#define ATRSTM_RADCOEFS_SIMD4_H + +#include "atrstm.h" +#include "atrstm_radcoefs.h" + +#include <rsimd/math.h> +#include <rsimd/rsimd.h> + +#include <rsys/math.h> +#include <rsys/rsys.h> + +/* Radiative coefficients in m^-1 */ +struct radcoefs_simd4 { + v4f_T ka; /* Absorption coefficient */ + v4f_T ks; /* Scattering coefficient */ + v4f_T kext; /* Extinction coefficient */ +}; +static const struct radcoefs_simd4 RADCOEFS_SIMD4_NULL; + +struct radcoefs_compute_simd4_args { + v4f_T lambda; /* wavelength [nm] */ + v4f_T n; /* Refractive index (real component) */ + v4f_T kappa; /* Refractive index (imaginary component) */ + v4f_T gyration_radius_prefactor; + v4f_T fractal_dimension; + v4f_T soot_volumic_fraction; /* In m^3(soot) / m^3 */ + v4f_T soot_primary_particles_count; + v4f_T soot_primary_particles_diameter; /* In nm */ + int radcoefs_mask; /* Combination of atrstm_radcoef_flag */ +}; +static const struct radcoefs_compute_simd4_args RADCOEFS_COMPUTE_SIMD4_ARGS_NULL; + +/* Forward declarations */ +struct atrstm; +struct atrri_refractive_index; +struct suvm_primitive; + +extern LOCAL_SYM res_T +primitive_compute_radcoefs_simd4 + (const struct atrstm* atrstm, + const struct atrri_refractive_index* refract_id, + const struct suvm_primitive* prim, + const int radcoefs_mask, + struct radcoefs_simd4* radcoefs); /* Per node radiative coefficients */ + +static INLINE res_T +primitive_compute_radcoefs_range_simd4 + (const struct atrstm* atrstm, + const struct atrri_refractive_index* refract_id, + const struct suvm_primitive* prim, + struct radcoefs* radcoefs_min, + struct radcoefs* radcoefs_max) +{ + struct radcoefs_simd4 radcoefs; + v4f_T ka[2]; + v4f_T ks[2]; + v4f_T kext[2]; + res_T res = RES_OK; + ASSERT(radcoefs_min && radcoefs_max); + + res = primitive_compute_radcoefs_simd4 + (atrstm, refract_id, prim, ATRSTM_RADCOEFS_MASK_ALL, &radcoefs); + if(res != RES_OK) return res; + + ka[0] = v4f_reduce_min(radcoefs.ka); + ka[1] = v4f_reduce_max(radcoefs.ka); + ks[0] = v4f_reduce_min(radcoefs.ks); + ks[1] = v4f_reduce_max(radcoefs.ks); + kext[0] = v4f_reduce_min(radcoefs.kext); + kext[1] = v4f_reduce_max(radcoefs.kext); + + radcoefs_min->ka = v4f_x(ka[0]); + radcoefs_max->ka = v4f_x(ka[1]); + radcoefs_min->ks = v4f_x(ks[0]); + radcoefs_max->ks = v4f_x(ks[1]); + radcoefs_min->kext = v4f_x(kext[0]); + radcoefs_max->kext = v4f_x(kext[1]); + + return RES_OK; +} + +/* SIMD version of the radcoefs_compute function. Refer to its scalar version + * for informations of what it does */ +static INLINE void +radcoefs_compute_simd4 + (struct radcoefs_simd4* radcoefs, + const struct radcoefs_compute_simd4_args* args) +{ + /* Constant */ + const v4f_T pi = v4f_set1((float)PI); + + /* Input variables */ + const v4f_T lambda = args->lambda; /* [nm] */ + const v4f_T n = args->n; + const v4f_T kappa = args->kappa; + const v4f_T Fv = args->soot_volumic_fraction; /* [nm^3(soot)/nm] */ + const v4f_T Np = args->soot_primary_particles_count; + const v4f_T Dp = args->soot_primary_particles_diameter; /* [nm] */ + const v4f_T kf = args->gyration_radius_prefactor; + const v4f_T Df = args->fractal_dimension; + const int ka = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Ka) != 0; + const int ks = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Ks) != 0; + const int kext = (args->radcoefs_mask & ATRSTM_RADCOEF_FLAG_Kext) != 0; + + /* SIMD mask */ + const v4f_T is_Fv_not_null = v4f_neq(Fv, v4f_zero()); + const v4f_T is_Np_not_null = v4f_neq(Np, v4f_zero()); + const v4f_T query_radcoefs = + v4f_mask1(args->radcoefs_mask & ATRSTM_RADCOEFS_MASK_ALL ? ~0 : 0); + const v4f_T mask = + v4f_and(v4f_and(is_Fv_not_null, is_Np_not_null), query_radcoefs); + + /* Clean up radiative coefficients */ + radcoefs->ka = v4f_zero(); + radcoefs->ks = v4f_zero(); + radcoefs->kext = v4f_zero(); + + if(v4f_movemask(mask) != 0) { + /* Precomputed values */ + const v4f_T n2 = v4f_mul(n, n); + const v4f_T kappa2 = v4f_mul(kappa, kappa); + const v4f_T four_n2_kappa2 = v4f_mul(v4f_set1(4), v4f_mul(n2, kappa2)); + const v4f_T xp = v4f_div(v4f_mul(pi, Dp), lambda); + const v4f_T xp3 = v4f_mul(v4f_mul(xp, xp), xp); + const v4f_T k = v4f_div(v4f_mul(v4f_set1(2), pi), lambda); + const v4f_T k2 = v4f_mul(k, k); + + const v4f_T Dp3 = v4f_mul(v4f_mul(Dp, Dp), Dp); + const v4f_T Np_pi_Dp3 = v4f_mul(v4f_mul(Np, pi), Dp3); + const v4f_T Va = v4f_div(Np_pi_Dp3, v4f_set1(6)); /* [nm^3] */ + const v4f_T rho = v4f_div(Fv, Va); /* [#aggregates / nm^3] */ + const v4f_T tmp0 = v4f_mul(v4f_set1(1e9f), rho); + + /* E(m), m = n + i*kappa */ + const v4f_T tmp1 = v4f_add(v4f_sub(n2, kappa2), v4f_set1(2)); + const v4f_T denom = v4f_madd(tmp1, tmp1, four_n2_kappa2); + const v4f_T six_n_kappa = v4f_mul(v4f_mul(v4f_set1(6), n), kappa); + const v4f_T Em = v4f_div(six_n_kappa, denom); + + if(ka || kext) { + /* Absorption cross section, [nm^3/aggrefate] */ + const v4f_T four_pi_xp3 = v4f_mul(v4f_mul(v4f_set1(4), pi), xp3); + const v4f_T Cabs = v4f_mul(v4f_mul(Np, v4f_div(four_pi_xp3, k2)), Em); + + /* Absorption coefficient, [m^-1] */ + radcoefs->ka = v4f_and(v4f_mul(tmp0, Cabs), mask); + } + + if(ks || kext) { + /* F(m), m = n + i*kappa */ + const v4f_T n2_sub_kappa2 = v4f_sub(n2, kappa2); + const v4f_T n2_sub_kappa2_sub_1 = v4f_sub(n2_sub_kappa2, v4f_set1(1)); + const v4f_T n2_sub_kappa2_add_2 = v4f_add(n2_sub_kappa2, v4f_set1(2)); + const v4f_T tmp2 = v4f_mul(n2_sub_kappa2_sub_1, n2_sub_kappa2_add_2); + const v4f_T tmp3 = v4f_div(v4f_add(tmp2, four_n2_kappa2), denom); + const v4f_T Fm = v4f_madd(tmp3, tmp3, v4f_mul(Em, Em)); + + /* Gyration radius */ + const v4f_T tmp4 = v4f_pow(v4f_div(Np, kf), v4f_rcp(Df)); + const v4f_T Rg = v4f_mul(v4f_mul(v4f_set1(0.5f), Dp), tmp4); /* [nm] */ + const v4f_T Rg2 = v4f_mul(Rg, Rg); + const v4f_T four_k2_Rg2 = v4f_mul(v4f_mul(v4f_set1(4), k2), Rg2); + const v4f_T tmp5 = v4f_div(four_k2_Rg2, v4f_mul(v4f_set1(3), Df)); + const v4f_T tmp6 = v4f_add(v4f_set1(1), tmp5); + const v4f_T g = v4f_pow(tmp6, v4f_mul(Df, v4f_set1(-0.5f))); + + /* Scattering cross section, [nm^3/aggrefate] */ + const v4f_T Np2 = v4f_mul(Np, Np); + const v4f_T xp6 = v4f_mul(xp3, xp3); + const v4f_T eight_pi_xp6 = v4f_mul(v4f_mul(v4f_set1(8), pi), xp6); + const v4f_T tmp7 = v4f_div(eight_pi_xp6, v4f_mul(v4f_set1(3), k2)); + const v4f_T Csca = v4f_mul(v4f_mul(v4f_mul(Np2, tmp7), Fm), g); + + /* Scattering coefficient, [m^-1] */ + radcoefs->ks = v4f_and(v4f_mul(tmp0, Csca), mask); + } + + if(kext) { + radcoefs->kext = v4f_add(radcoefs->ka, radcoefs->ks); + } + + ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(denom, v4f_zero()))); + ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(Df, v4f_zero()))); + ASSERT(v4f_movemask(mask) && v4f_movemask(v4f_neq(Dp, v4f_zero()))); + } +} + +#endif /* ATRSTM_RADCOEFS_SIMD4_H */ diff --git a/src/atrstm_setup_octrees.c b/src/atrstm_setup_octrees.c @@ -23,6 +23,10 @@ #include "atrstm_setup_octrees.h" #include "atrstm_svx.h" +#ifdef ATRSTM_USE_SIMD + #include "atrstm_radcoefs_simd4.h" +#endif + #include <astoria/atrri.h> #include <star/suvm.h> @@ -234,8 +238,13 @@ voxelize_partition voxel_commit_radcoefs_range (vox, ATRSTM_CPNT_GAS, &radcoefs_min, &radcoefs_max); +#ifdef ATRSTM_USE_SIMD + res = primitive_compute_radcoefs_range_simd4 + (atrstm, refract_id, &prim, &radcoefs_min, &radcoefs_max); +#else res = primitive_compute_radcoefs_range (atrstm, refract_id, &prim, &radcoefs_min, &radcoefs_max); +#endif if(UNLIKELY(res != RES_OK)) goto error; voxel_commit_radcoefs_range (vox, ATRSTM_CPNT_SOOT, &radcoefs_min, &radcoefs_max); diff --git a/src/test_atrstm_radcoefs.c b/src/test_atrstm_radcoefs.c @@ -21,7 +21,7 @@ main(int argc, char** argv) const double ka_ref = 5.7382401729092799E-1; const double ks_ref = 7.2169062018378995E-6; - struct radcoefs radcoefs; + struct radcoefs radcoefs = RADCOEFS_NULL; struct radcoefs_compute_args args = RADCOEFS_COMPUTE_ARGS_NULL; (void)argc, (void)argv; @@ -60,5 +60,20 @@ main(int argc, char** argv) radcoefs_compute(&radcoefs, &args); CHK(eq_eps(radcoefs.kext, ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6)); + args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL; + + args.soot_primary_particles_count = 0; + radcoefs_compute(&radcoefs, &args); + CHK(radcoefs.ka == 0); + CHK(radcoefs.ks == 0); + CHK(radcoefs.kext == 0); + + args.soot_primary_particles_count = 100; + args.soot_volumic_fraction = 0; + radcoefs_compute(&radcoefs, &args); + CHK(radcoefs.ka == 0); + CHK(radcoefs.ks == 0); + CHK(radcoefs.kext == 0); + return 0; } diff --git a/src/test_atrstm_radcoefs_simd.c b/src/test_atrstm_radcoefs_simd.c @@ -0,0 +1,151 @@ +/* Copyright (C) 2020, 2021 CNRS + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "atrstm_radcoefs_simd4.c" + +int +main(int argc, char** argv) +{ + const double ka_ref = 5.7382401729092799E-1; + const double ks_ref = 7.2169062018378995E-6; + + const double ka_ref2[4] = { + 0.52178067472799794, + 0.52178067472799794, + 1.0435613494559959, + 0.52178067472799794 + }; + const double ks_ref2[4] = { + 9.6010140939975883e-002, + 3.3272961678492224e-002, + 0.19202028187995177, + 9.9964602374815484e-002 + }; + struct radcoefs_simd4 radcoefs = RADCOEFS_SIMD4_NULL; + struct radcoefs_compute_simd4_args args = RADCOEFS_COMPUTE_SIMD4_ARGS_NULL; + float ALIGN(16) ka[4], ks[4], kext[4]; + (void)argc, (void)argv; + + args.lambda = v4f_set1(633.f); + args.n = v4f_set1(1.90f); + args.kappa = v4f_set1(0.55f); + args.gyration_radius_prefactor = v4f_set1(1.70f); + args.fractal_dimension = v4f_set1(1.75f); + args.soot_volumic_fraction = v4f_set(1e-7f, 0.f, 1e-7f, 1e-7f); + args.soot_primary_particles_count = v4f_set(100.f, 100.f, 0.f, 100.f); + args.soot_primary_particles_diameter = v4f_set1(1.f); + args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL; + + radcoefs_compute_simd4(&radcoefs, &args); + + v4f_store(ka, radcoefs.ka); + v4f_store(ks, radcoefs.ks); + v4f_store(kext, radcoefs.kext); + printf("ka = {%g, %g, %g, %g}; ks = {%g, %g, %g, %g}\n", + SPLIT4(ka), SPLIT4(ks)); + + CHK(eq_eps(ka[0], ka_ref, ka_ref*1.e-6)); + CHK(eq_eps(ka[3], ka_ref, ka_ref*1.e-6)); + CHK(ka[1] == 0); + CHK(ka[2] == 0); + + CHK(eq_eps(ks[0], ks_ref, ks_ref*1.e-6)); + CHK(eq_eps(ks[3], ks_ref, ks_ref*1.e-6)); + CHK(ks[1] == 0); + CHK(ks[2] == 0); + + CHK(eq_eps(kext[0], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6)); + CHK(eq_eps(kext[3], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6)); + CHK(kext[1] == 0); + CHK(kext[2] == 0); + + args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Ka; + radcoefs_compute_simd4(&radcoefs, &args); + v4f_store(ka, radcoefs.ka); + v4f_store(ks, radcoefs.ks); + v4f_store(kext, radcoefs.kext); + CHK(eq_eps(ka[0], ka_ref, ka_ref*1.e-6)); + CHK(eq_eps(ka[3], ka_ref, ka_ref*1.e-6)); + CHK(ka[1] == 0); + CHK(ka[2] == 0); + CHK(ks[0] == 0 && ks[1] == 0 && ks[2] == 0 && ks[0] == 0); + CHK(kext[0] == 0 && kext[1] == 0 && kext[2] == 0 && kext[0] == 0); + + args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Ks; + radcoefs_compute_simd4(&radcoefs, &args); + v4f_store(ka, radcoefs.ka); + v4f_store(ks, radcoefs.ks); + v4f_store(kext, radcoefs.kext); + CHK(eq_eps(ks[0], ks_ref, ks_ref*1.e-6)); + CHK(eq_eps(ks[3], ks_ref, ks_ref*1.e-6)); + CHK(ks[1] == 0); + CHK(ks[2] == 0); + CHK(ka[0] == 0 && ka[1] == 0 && ka[2] == 0 && ka[0] == 0); + CHK(kext[0] == 0 && kext[1] == 0 && kext[2] == 0 && kext[0] == 0); + + /* Note that actually even though Ka and Ks are note required they are + * internally computed to evaluate kext and are returned to the caller. Their + * value are thus not null */ + args.radcoefs_mask = ATRSTM_RADCOEF_FLAG_Kext; + radcoefs_compute_simd4(&radcoefs, &args); + v4f_store(kext, radcoefs.kext); + CHK(eq_eps(kext[0], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6)); + CHK(eq_eps(kext[3], ka_ref+ks_ref, (ka_ref+ks_ref)*1e-6)); + CHK(kext[1] == 0); + CHK(kext[2] == 0); + + args.lambda = v4f_set1(633.f); + args.n = v4f_set1(1.75f); + args.kappa = v4f_set1(0.435f); + args.gyration_radius_prefactor = v4f_set1(1.70f); + args.fractal_dimension = v4f_set1(1.75f); + args.soot_volumic_fraction = v4f_set + (9.9999999999999995e-008f, + 9.9999999999999995e-008f, + 1.9999999999999999e-007f, + 9.9999999999999995e-008f); + args.soot_primary_particles_count = v4f_set + (400.f, + 400.f, + 400.f, + 800.f); + args.soot_primary_particles_diameter = v4f_set + (34.000000006413003f, + 17.000000003206502f, + 34.000000006413003f, + 34.000000006413003f); + args.radcoefs_mask = ATRSTM_RADCOEFS_MASK_ALL; + radcoefs_compute_simd4(&radcoefs, &args); + v4f_store(ka, radcoefs.ka); + v4f_store(ks, radcoefs.ks); + v4f_store(kext, radcoefs.kext); + printf("ka = {%g, %g, %g, %g}; ks = {%g, %g, %g, %g}\n", + SPLIT4(ka), SPLIT4(ks)); + CHK(eq_eps(ka[0], ka_ref2[0], ka_ref2[0]*1.e-6)); + CHK(eq_eps(ka[1], ka_ref2[1], ka_ref2[1]*1.e-6)); + CHK(eq_eps(ka[2], ka_ref2[2], ka_ref2[2]*1.e-6)); + CHK(eq_eps(ka[3], ka_ref2[3], ka_ref2[3]*1.e-6)); + CHK(eq_eps(ks[0], ks_ref2[0], ks_ref2[0]*1.e-6)); + CHK(eq_eps(ks[1], ks_ref2[1], ks_ref2[1]*1.e-6)); + CHK(eq_eps(ks[2], ks_ref2[2], ks_ref2[2]*1.e-6)); + CHK(eq_eps(ks[3], ks_ref2[3], ks_ref2[3]*1.e-6)); + CHK(eq_eps(kext[0], ka_ref2[0]+ks_ref2[0], (ka_ref2[0]+ks_ref2[0])*1.e-6)); + CHK(eq_eps(kext[1], ka_ref2[1]+ks_ref2[1], (ka_ref2[1]+ks_ref2[1])*1.e-6)); + CHK(eq_eps(kext[2], ka_ref2[2]+ks_ref2[2], (ka_ref2[2]+ks_ref2[2])*1.e-6)); + CHK(eq_eps(kext[3], ka_ref2[3]+ks_ref2[3], (ka_ref2[3]+ks_ref2[3])*1.e-6)); + + return 0; +} +