rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

commit 0e1e4c09f7844bf674cb02e2c93be421b1206346
parent 896a516c378be27ae73708bfad24732272cdee40
Author: vaplv <vaplv@free.fr>
Date:   Sun, 10 Mar 2019 17:16:22 +0100

Make generic the SoA tests

Refactor the existing SoA tests to use the new generic test. Add and
test the SoA8f3 and SoA8f4 API.

Diffstat:
Mcmake/CMakeLists.txt | 9+++++++--
Asrc/soa8f3.h | 22++++++++++++++++++++++
Asrc/soa8f4.h | 27+++++++++++++++++++++++++++
Msrc/test_soa4f2.c | 100++++---------------------------------------------------------------------------
Msrc/test_soa4f3.c | 131+++----------------------------------------------------------------------------
Msrc/test_soa4f4.c | 201++-----------------------------------------------------------------------------
Msrc/test_soa8f2.c | 113++++---------------------------------------------------------------------------
Asrc/test_soa8f3.c | 28++++++++++++++++++++++++++++
Asrc/test_soa8f4.c | 28++++++++++++++++++++++++++++
Asrc/test_soaXfY.h | 262+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 files changed, 394 insertions(+), 527 deletions(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -60,7 +60,10 @@ set(RSIMD_FILES_INC_LEGACY soaXf3.h soa4f2.h soa4f3.h - soa4f4.h) + soa4f4.h + soa8f2.h + soa8f3.h + soa8f4.h) set(RSIMD_FILES_INC_SSE sse/sse.h sse/ssef.h @@ -119,7 +122,6 @@ if(NOT NO_TEST) new_test(test_soa4f2) new_test(test_soa4f3) new_test(test_soa4f4) - new_test(test_soa8f2 "-mavx") if(SSE4_1 AND CMAKE_COMPILER_IS_GNUCC) new_test_named(test_v4f_sse4_1 test_v4f "-msse4.1") @@ -129,6 +131,9 @@ if(NOT NO_TEST) if(AVX AND CMAKE_COMPILER_IS_GNUCC) new_test(test_v8f "-mavx") new_test(test_v8i "-mavx") + new_test(test_soa8f2 "-mavx") + new_test(test_soa8f3 "-mavx") + new_test(test_soa8f4 "-mavx") endif(AVX AND CMAKE_COMPILER_IS_GNUCC) endif(NOT NO_TEST) diff --git a/src/soa8f3.h b/src/soa8f3.h @@ -0,0 +1,22 @@ +/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef SOA8F3_H +#define SOA8F3_H + +#define RSIMD_WIDTH__ 8 +#include "soaXf3.h" + +#endif /* SOA8F3_H */ diff --git a/src/soa8f4.h b/src/soa8f4.h @@ -0,0 +1,27 @@ +/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef SOA8F4_H +#define SOA8F4_H + +/* Generate the common soa4f4 functions */ +#define RSIMD_WIDTH__ 8 +#define RSIMD_SOA_DIMENSION__ 4 +#include "soaXfY_begin.h" +#include "soaXfY.h" +#include "soaXfY_end.h" + +#endif /* SOA8F4_H */ + diff --git a/src/test_soa4f2.c b/src/test_soa4f2.c @@ -13,106 +13,16 @@ * You should have received a copy of the GNU Lesser General Public License * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ -#include "soa4f2.h" -#include "test_soaXf_utils.h" - -#define CHECK_F2(V, A, B, C, D, E, F, G, H) \ - { \ - const v4f_T* v__ = (V); \ - CHECK_V4MASK(v4f_eq(v__[0], v4f_set((A), (B), (C), (D))), V4TRUE); \ - CHECK_V4MASK(v4f_eq(v__[1], v4f_set((E), (F), (G), (H))), V4TRUE); \ - } (void)0 +/* Generate the test_soa3f2 function */ +#define SOA_SIMD_WIDTH 4 +#define SOA_DIMENSION 2 +#include "test_soaXfY.h" int main(int argc, char** argv) { - v4f_T a[2], b[2], c[2], dst[2], f; (void)argc, (void)argv; - - CHK(soa4f2_set(a, soa4f2_splat(c, v4f_set1(-1.f))) == a); - CHECK_V4MASK(v4f_eq(a[0], v4f_set1(-1.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[1], v4f_set1(-1.f)), V4TRUE); - - CHK(soa4f2(c, v4f_set(0.f, 1.f, 2.f, 3.f), v4f_set(5.f, 6.f, 7.f, 8.f)) == c); - CHK(soa4f2_set(a, c) == a); - CHECK_V4MASK(v4f_eq(c[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(c[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE); - - CHK(soa4f2(a, v4f_set(-1.f, 2.f, 3.f,-4.f),v4f_set(5.f,-6.f,-7.f, 8.f)) == a); - CHK(soa4f2_minus(b, a) == b); - CHECK_F2(b, 1.f,-2.f,-3.f, 4.f, -5.f, 6.f, 7.f,-8.f); - - CHK(soa4f2_addf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)) == dst); - CHECK_F2(dst, 0.f, 4.f, 3.f, -1.f, 6.f, -4.f, -7.f, 11.f); - CHK(soa4f2_add(dst, a, b) == dst); - CHECK_F2(dst, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f); - CHK(soa4f2_subf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)) == dst); - CHECK_F2(dst, -2.f, 0.f, 3.f, -7.f, 4.f, -8.f, -7.f, 5.f); - CHK(soa4f2_sub(dst, a, b) == dst); - CHECK_F2(dst, -2.f, 4.f, 6.f, -8.f, 10.f, -12.f, -14.f, 16.f); - CHK(soa4f2_mulf(dst, a, v4f_set(2.f, 3.f, 0.f, -1.f)) == dst); - CHECK_F2(dst, -2.f, 6.f, 0.f, 4.f, 10.f, -18.f, 0.f, -8.f); - CHK(soa4f2_mul(dst, a, b) == dst); - CHECK_F2(dst, -1.f, -4.f, -9.f, -16.f, -25.f, -36.f, -49.f, -64.f); - CHK(soa4f2_divf(dst, a, v4f_set(2.f, 0.5f, 1.f, 4.f)) == dst); - CHECK_F2(dst, -0.5f, 4.f, 3.f, -1.f, 2.5f, -12.f, -7.f, 2.f); - CHK(soa4f2_div(dst, a, b) == dst); - CHECK_F2(dst, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f); - - soa4f2(a, v4f_set1(0.f), v4f_set1(1.f)); - soa4f2(b, v4f_set1(1.f), v4f_set1(2.f)); - CHK(soa4f2_lerp(dst, a, b, v4f_set1(0.5f)) == dst); - CHECK_F2(dst, 0.5f, 0.5f, 0.5f, 0.5f, 1.5f, 1.5f, 1.5f, 1.5f); - soa4f2(a, v4f_set(-1.f, 2.f, 3.f,-4.f), v4f_set(5.f,-6.f,-7.f, 8.f)); - soa4f2_minus(b, a); - CHK(soa4f2_lerp(dst, a, b, v4f_set(0.f, 1.f, 0.5f, 1.f)) == dst); - CHECK_F2(dst, -1.f, -2.f, 0.f, 4.f, 5.f, 6.f, 0.f, -8.f); - - f = soa4f2_sum(b); - CHECK_V4MASK(v4f_eq(f, v4f_set(-4.f, 4.f, 4.f, -4.f)), V4TRUE); - f = soa4f2_dot(a, b); - CHECK_V4MASK(v4f_eq(f, v4f_set(-26.f, -40.f, -58.f, -80.f)), V4TRUE); - f = soa4f2_len(a); - CHECK_V4MASK - (v4f_eq_eps(f, v4f_sqrt(soa4f2_dot(a, a)), v4f_set1(1.e-6f)), V4TRUE); - - CHECK_V4MASK(soa4f2_is_normalized(b), V4FALSE); - f = soa4f2_normalize(dst, b); - CHECK_V4MASK(v4f_eq_eps(f, soa4f2_len(b), v4f_set1(1.e-6f)), V4TRUE); - CHECK_V4MASK(soa4f2_is_normalized(b), V4FALSE); - CHECK_V4MASK(soa4f2_is_normalized(dst), V4TRUE); - soa4f2_divf(b, b, f); - CHECK_V4MASK(v4f_eq_eps(dst[0], b[0], v4f_set1(1.e-6f)), V4TRUE); - CHECK_V4MASK(v4f_eq_eps(dst[1], b[1], v4f_set1(1.e-6f)), V4TRUE); - - CHECK_V4MASK(soa4f2_eq(a, a), V4TRUE); - CHECK_V4MASK(soa4f2_eq(a, b), V4FALSE); - soa4f2(a, v4f_set(-1.f, 2.f, 3.f,-4.f), v4f_set(5.f,-6.f,-7.f, 8.f)); - soa4f2(b, v4f_set(-1.f,-2.f, 5.f,-4.001f), v4f_set(5.f,-6.f, 7.f, 8.001f)); - CHECK_V4MASK__(soa4f2_eq(a, b), ~0, 0, 0, 0); - CHECK_V4MASK__(soa4f2_eq_eps(a, b, v4f_set1(1.e-6f)), ~0, 0, 0, 0); - CHECK_V4MASK__(soa4f2_eq_eps(a, b, v4f_set(0.f,0.f,0.f,1.e-6f)),~0, 0, 0, 0); - CHECK_V4MASK__(soa4f2_eq_eps(a, b, v4f_set(0.f,0.f,0.f,1.e-2f)),~0, 0, 0,~0); - - soa4f2(a, v4f_set(1.f, 2.f, 3.f,-1.f), v4f_set(-2.f, 0.f,-7.f, 0.f)); - soa4f2(b, v4f_set(3.f, 2.f, 1.f,-2.f), v4f_set(1.f,-6.f, 0.5f, 2.f)); - f = soa4f2_cross(a, b); - CHECK_V4MASK(v4f_eq(f, v4f_set(7.f, -12.f, 8.5f, -2.f)), V4TRUE); - - CHK(soa4f2_min(dst, a, b) == dst); - CHECK_F2(dst, 1.f, 2.f, 1.f, -2.f, -2.f, -6.f, -7.f, 0.f); - CHK(soa4f2_max(dst, a, b) == dst); - CHECK_F2(dst, 3.f, 2.f, 3.f, -1.f, 1.f, 0.f, 0.5f, 2.f); - - soa4f2_sel(dst, b, a, v4f_mask(~0, ~0, 0, ~0)); - CHECK_F2(dst, 1.f, 2.f, 1.f, -1.f, -2.f, 0.f, 0.5f, 0.f); - - soa4f2(c, v4f_mask(~0, ~0, 1, ~0), v4f_mask(~0, 0, 0, 0)); - soa4f2_selv(dst, b, a, c); - CHECK_F2(dst, 1.f, 2.f, 1.f, -1.f, -2.f, -6.f, 0.5f, 2.f); - + test_soa4f2(); return 0; } diff --git a/src/test_soa4f3.c b/src/test_soa4f3.c @@ -13,136 +13,15 @@ * You should have received a copy of the GNU Lesser General Public License * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ -#include "soa4f3.h" -#include "test_soaXf_utils.h" - -#define CHECK_F3(V, A, B, C, D, E, F, G, H, I, J, K, L) \ - { \ - const v4f_T* v__ = (V); \ - CHECK_V4MASK(v4f_eq(v__[0], v4f_set((A), (B), (C), (D))), V4TRUE); \ - CHECK_V4MASK(v4f_eq(v__[1], v4f_set((E), (F), (G), (H))), V4TRUE); \ - CHECK_V4MASK(v4f_eq(v__[2], v4f_set((I), (J), (K), (L))), V4TRUE); \ - } (void)0 +/* Generate the test_soa4f2 function */ +#define SOA_SIMD_WIDTH 4 +#define SOA_DIMENSION 3 +#include "test_soaXfY.h" int main(int argc, char** argv) { - v4f_T a[3], b[3], c[3], dst[3], f; (void)argc, (void)argv; - - CHK(soa4f3_set(a, soa4f3_splat(c, v4f_set1(-1.f))) == a); - CHECK_V4MASK(v4f_eq(a[0], v4f_set1(-1.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[1], v4f_set1(-1.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[2], v4f_set1(-1.f)), V4TRUE); - CHK(soa4f3(c, - v4f_set(0.f, 1.f, 2.f, 3.f), - v4f_set(5.f, 6.f, 7.f, 8.f), - v4f_set(9.f, 10.f, 11.f, 12.f)) == c); - CHK(soa4f3_set(a, c) == a); - CHECK_V4MASK(v4f_eq(c[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(c[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(c[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE); - - CHK(soa4f3(a, - v4f_set(-1.f, 2.f, 3.f, -4.f), - v4f_set(5.f, -6.f, -7.f, 8.f), - v4f_set(9.f, -10.f, 1.f, -2.f)) == a); - CHK(soa4f3_minus(b, a) == b); - CHECK_F3(b, 1.f,-2.f,-3.f, 4.f,-5.f, 6.f, 7.f,-8.f,-9.f, 10.f,-1.f, 2.f); - - CHK(soa4f3_addf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)) == dst); - CHECK_F3(dst, 0.f, 4.f, 3.f,-1.f, 6.f,-4.f,-7.f, 11.f, 10.f,-8.f, 1.f, 1.f); - CHK(soa4f3_add(dst, a, b) == dst); - CHECK_F3(dst, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f); - CHK(soa4f3_subf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)) == dst); - CHECK_F3(dst,-2.f, 0.f, 3.f,-7.f, 4.f,-8.f,-7.f, 5.f, 8.f,-12.f, 1.f,-5.f); - CHK(soa4f3_sub(dst, a, b) == dst); - CHECK_F3(dst,-2.f, 4.f, 6.f,-8.f, 10.f,-12.f,-14.f, 16.f, 18.f,-20.f, 2.f,-4.f); - CHK(soa4f3_mulf(dst, a, v4f_set(2.f, 3.f, 0.f, -1.f)) == dst); - CHECK_F3(dst,-2.f, 6.f, 0.f, 4.f, 10.f,-18.f, 0.f,-8.f, 18.f,-30.f, 0.f, 2.f); - CHK(soa4f3_mul(dst, a, b) == dst); - CHECK_F3(dst,-1.f,-4.f,-9.f,-16.f,-25.f,-36.f,-49.f,-64.f,-81.f,-100.f,-1.f,-4.f); - CHK(soa4f3_divf(dst, a, v4f_set(2.f, 0.5f, 1.f, 4.f)) == dst); - CHECK_F3(dst,-0.5f, 4.f, 3.f,-1.f, 2.5f,-12.f,-7.f, 2.f, 4.5f,-20.f, 1.f,-0.5f); - CHK(soa4f3_div(dst, a, b) == dst); - CHECK_F3(dst,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f); - - soa4f3(a, v4f_set1(0.f), v4f_set1(1.f), v4f_set1(2.f)); - soa4f3(b, v4f_set1(1.f), v4f_set1(2.f), v4f_set1(-1.f)); - CHK(soa4f3_lerp(dst, a, b, v4f_set1(0.5f)) == dst); - CHECK_F3(dst, - 0.5f, 0.5f, 0.5f, 0.5f, - 1.5f, 1.5f, 1.5f, 1.5f, - 0.5f, 0.5f, 0.5f, 0.5f); - CHK(soa4f3(a, - v4f_set(-1.f, 2.f, 3.f, -4.f), - v4f_set(5.f, -6.f, -7.f, 8.f), - v4f_set(9.f, -10.f, 1.f, -2.f)) == a); - CHK(soa4f3_minus(b, a) == b); - CHK(soa4f3_lerp(dst, a, b, v4f_set(0.f, 1.f, 0.5f, 1.f)) == dst); - CHECK_F3(dst, -1.f, -2.f, 0.f, 4.f, 5.f, 6.f, 0.f, -8.f, 9.f, 10.f, 0.f, 2.f); - - f = soa4f3_sum(b); - CHECK_V4MASK(v4f_eq(f, v4f_set(-13.f, 14.f, 3.f, -2.f)), V4TRUE); - f = soa4f3_dot(a, b); - CHECK_V4MASK(v4f_eq(f, v4f_set(-107.f, -140.f, -59.f, -84.f)), V4TRUE); - f = soa4f3_len(a); - CHECK_V4MASK - (v4f_eq_eps(f, v4f_sqrt(soa4f3_dot(a, a)), v4f_set1(1.e-6f)), V4TRUE); - - CHECK_V4MASK(soa4f3_is_normalized(b), V4FALSE); - f = soa4f3_normalize(dst, b); - CHECK_V4MASK(v4f_eq_eps(f, soa4f3_len(b), v4f_set1(1.e-6f)), V4TRUE); - CHECK_V4MASK(soa4f3_is_normalized(b), V4FALSE); - CHECK_V4MASK(soa4f3_is_normalized(dst), V4TRUE); - soa4f3_divf(b, b, f); - CHECK_V4MASK(v4f_eq_eps(dst[0], b[0], v4f_set1(1.e-6f)), V4TRUE); - CHECK_V4MASK(v4f_eq_eps(dst[1], b[1], v4f_set1(1.e-6f)), V4TRUE); - CHECK_V4MASK(v4f_eq_eps(dst[2], b[2], v4f_set1(1.e-6f)), V4TRUE); - - CHECK_V4MASK(soa4f3_eq(a, a), V4TRUE); - CHECK_V4MASK(soa4f3_eq(a, b), V4FALSE); - soa4f3(a, - v4f_set(-1.f, 2.f, 3.f,-4.f), - v4f_set(5.f,-6.f,-7.f, 8.f), - v4f_set(9.f,-10.f,1.f, -2.f)); - soa4f3(b, - v4f_set(-1.f, 2.f, 5.f,-4.001f), - v4f_set(5.f,-6.03f,7.f, 8.0), - v4f_set(9.f,-10.f,0.f, -2.001f)); - CHECK_V4MASK__(soa4f3_eq(a, b), ~0, 0, 0, 0); - CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set1(1.e-6f)), ~0, 0, 0, 0); - CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,0.f,0.f,1.e-6f)),~0, 0, 0, 0); - CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,0.f,0.f,1.e-2f)),~0, 0, 0,~0); - CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,1.e-2f,0.f,1.e-2f)),~0, 0, 0,~0); - CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,1.e-1f,0.f,1.e-2f)),~0,~0, 0,~0); - - soa4f3(a, - v4f_set(1.f, 2.f, 3.f,-1.f), - v4f_set(-2.f, 0.f,-7.f, 0.f), - v4f_set(-1.f, 4.f, 3.f, 2.f)); - soa4f3(b, - v4f_set(3.f, 2.f, 1.f,-2.f), - v4f_set(1.f,-6.f, 0.5f, 2.f), - v4f_set(0.f, 1.f, 0.f, 3.f)); - CHK(soa4f3_cross(dst, a, b) == dst); - CHECK_F3(dst, 1.f, 24.f,-1.5f,-4.f,-3.f, 6.f, 3.f,-1.f, 7.f,-12.f, 8.5f,-2.f); - - CHK(soa4f3_min(dst, a, b) == dst); - CHECK_F3(dst, 1.f, 2.f, 1.f, -2.f,-2.f,-6.f,-7.f, 0.f,-1.f, 1.f, 0.f, 2.f); - CHK(soa4f3_max(dst, a, b) == dst); - CHECK_F3(dst, 3.f, 2.f, 3.f, -1.f, 1.f, 0.f, 0.5f, 2.f, 0.f, 4.f, 3.f, 3.f); - - soa4f3_sel(dst, b, a, v4f_mask(~0, ~0, 1, ~0)); - CHECK_F3(dst, 1.f, 2.f, 1.f, -1.f, -2.f, 0.f, 0.5f, 0.f, -1.f, 4.f, 0.f, 2.f); - - soa4f3(c, v4f_mask(~0,~0, 0,~0), v4f_mask(~0, 0, 0, 0), v4f_mask(0,~0,~0, 0)); - soa4f3_selv(dst, b, a, c); - CHECK_F3(dst, 1.f, 2.f, 1.f,-1.f,-2.f,-6.f, 0.5f, 2.f, 0.f, 4.f, 3.f, 3.f); - + test_soa4f3(); return 0; } - diff --git a/src/test_soa4f4.c b/src/test_soa4f4.c @@ -13,206 +13,15 @@ * You should have received a copy of the GNU Lesser General Public License * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ -#include "soa4f4.h" -#include "test_soaXf_utils.h" - -#define CHECK_F4(V, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ - { \ - const v4f_T* v__ = (V); \ - CHECK_V4MASK(v4f_eq(v__[0], v4f_set((A), (B), (C), (D))), V4TRUE); \ - CHECK_V4MASK(v4f_eq(v__[1], v4f_set((E), (F), (G), (H))), V4TRUE); \ - CHECK_V4MASK(v4f_eq(v__[2], v4f_set((I), (J), (K), (L))), V4TRUE); \ - CHECK_V4MASK(v4f_eq(v__[3], v4f_set((M), (N), (O), (P))), V4TRUE); \ - } (void)0 +/* Generate the test_soa4f2 function */ +#define SOA_SIMD_WIDTH 4 +#define SOA_DIMENSION 4 +#include "test_soaXfY.h" int main(int argc, char** argv) { - v4f_T a[4], b[4], c[4], dst[4], f; (void)argc, (void)argv; - - CHK(soa4f4_set(a, soa4f4_splat(c, v4f_set1(-1.f))) == a); - CHECK_V4MASK(v4f_eq(a[0], v4f_set1(-1.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[1], v4f_set1(-1.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[2], v4f_set1(-1.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[3], v4f_set1(-1.f)), V4TRUE); - CHK(soa4f4(c, - v4f_set(0.f, 1.f, 2.f, 3.f), - v4f_set(5.f, 6.f, 7.f, 8.f), - v4f_set(9.f, 10.f, 11.f, 12.f), - v4f_set(13.f, 14.f, 15.f, 16.f)) == c); - CHK(soa4f4_set(a, c) == a); - CHECK_V4MASK(v4f_eq(c[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(c[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(c[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(c[3], v4f_set(13.f, 14.f, 15.f, 16.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE); - CHECK_V4MASK(v4f_eq(a[3], v4f_set(13.f, 14.f, 15.f, 16.f)), V4TRUE); - - CHK(soa4f4(a, - v4f_set(-1.f, 2.f, 3.f, -4.f), - v4f_set(5.f, -6.f, -7.f, 8.f), - v4f_set(9.f, -10.f, 1.f, -2.f), - v4f_set(5.f, -3.f, -7.f, 1.f)) == a); - CHK(soa4f4_minus(b, a) == b); - CHECK_F4(b, - 1.f, -2.f, -3.f, 4.f, - -5.f, 6.f, 7.f, -8.f, - -9.f, 10.f, -1.f, 2.f, - -5.f, 3.f, 7.f, -1.f); - - CHK(soa4f4_addf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)) == dst); - CHECK_F4(dst, - 0.f, 4.f, 3.f, -1.f, - 6.f, -4.f, -7.f, 11.f, - 10.f, -8.f, 1.f, 1.f, - 6.f, -1.f, -7.f, 4.f); - CHK(soa4f4_add(dst, a, b) == dst); - CHECK_F4(dst, - 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f); - - CHK(soa4f4_subf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)) == dst); - CHECK_F4(dst, - -2.f, 0.f, 3.f, -7.f, - 4.f, -8.f, -7.f, 5.f, - 8.f,-12.f, 1.f,-5.f, - 4.f, -5.f, -7.f, -2.f); - CHK(soa4f4_sub(dst, a, b) == dst); - CHECK_F4(dst, - -2.f, 4.f, 6.f, -8.f, - 10.f, -12.f, -14.f, 16.f, - 18.f, -20.f, 2.f, -4.f, - 10.f, -6.f, -14.f, 2.f); - - CHK(soa4f4_mulf(dst, a, v4f_set(2.f, 3.f, 0.f, -1.f)) == dst); - CHECK_F4(dst, - -2.f, 6.f, 0.f, 4.f, - 10.f, -18.f, 0.f, -8.f, - 18.f, -30.f, 0.f, 2.f, - 10.f, -9.f, 0.f, -1.f); - CHK(soa4f4_mul(dst, a, b) == dst); - CHECK_F4(dst, - -1.f, -4.f, -9.f, -16.f, - -25.f, -36.f, -49.f, -64.f, - -81.f, -100.f, -1.f, -4.f, - -25.f, -9.f, -49.f, -1.f); - - CHK(soa4f4_divf(dst, a, v4f_set(2.f, 0.5f, 1.f, 4.f)) == dst); - CHECK_F4(dst, - -0.5f, 4.f, 3.f, -1.f, - 2.5f, -12.f, -7.f, 2.f, - 4.5f, -20.f, 1.f, -0.5f, - 2.5f, -6.f, -7.f, 0.25f); - CHK(soa4f4_div(dst, a, b) == dst); - CHECK_F4(dst, - -1.f, -1.f, -1.f, -1.f, - -1.f, -1.f, -1.f, -1.f, - -1.f, -1.f, -1.f, -1.f, - -1.f, -1.f, -1.f, -1.f); - - CHK(soa4f4(a, - v4f_set(-1.f, 2.f, 3.f, -4.f), - v4f_set(5.f, -6.f, -7.f, 8.f), - v4f_set(9.f, -10.f, 1.f, -2.f), - v4f_set(5.f, -3.f, -7.f, 1.f)) == a); - CHK(soa4f4_minus(b, a) == b); - CHK(soa4f4_lerp(dst, a, b, v4f_set(0.f, 1.f, 0.5f, 1.f)) == dst); - CHECK_F4(dst, - -1.f, -2.f, 0.f, 4.f, - 5.f, 6.f, 0.f, -8.f, - 9.f, 10.f, 0.f, 2.f, - 5.f, 3.f, 0.f, -1.f); - - f = soa4f4_sum(b); - CHECK_V4MASK(v4f_eq(f, v4f_set(-18.f, 17.f, 10.f, -3.f)), V4TRUE); - f = soa4f4_dot(a, b); - CHECK_V4MASK(v4f_eq(f, v4f_set(-132.f, -149.f, -108.f, -85.f)), V4TRUE); - f = soa4f4_len(a); - CHECK_V4MASK - (v4f_eq_eps(f, v4f_sqrt(soa4f4_dot(a, a)), v4f_set1(1.e-6f)), V4TRUE); - - CHECK_V4MASK(soa4f4_is_normalized(b), V4FALSE); - f = soa4f4_normalize(dst, b); - CHECK_V4MASK(v4f_eq_eps(f, soa4f4_len(b), v4f_set1(1.e-6f)), V4TRUE); - CHECK_V4MASK(soa4f4_is_normalized(b), V4FALSE); - CHECK_V4MASK(soa4f4_is_normalized(dst), V4TRUE); - soa4f4_divf(b, b, f); - CHECK_V4MASK(v4f_eq_eps(dst[0], b[0], v4f_set1(1.e-6f)), V4TRUE); - CHECK_V4MASK(v4f_eq_eps(dst[1], b[1], v4f_set1(1.e-6f)), V4TRUE); - CHECK_V4MASK(v4f_eq_eps(dst[2], b[2], v4f_set1(1.e-6f)), V4TRUE); - CHECK_V4MASK(v4f_eq_eps(dst[3], b[3], v4f_set1(1.e-6f)), V4TRUE); - - CHECK_V4MASK(soa4f4_eq(a, a), V4TRUE); - CHECK_V4MASK(soa4f4_eq(a, b), V4FALSE); - soa4f4(a, - v4f_set(-1.f, 2.f, 3.f, -4.f), - v4f_set(5.f, -6.f, -7.f, 8.f), - v4f_set(9.f, -10.f, 1.f, -2.f), - v4f_set(1.f, -1.f, 1.f, -2.f)); - soa4f4(b, - v4f_set(-1.f, 2.f, 3.f,-4.001f), - v4f_set(5.f,-6.03f,-7.f, 8.0), - v4f_set(9.f,-10.f,1.f, -2.001f), - v4f_set(1.f, -1.f, 1.0005f, -2.f)); - CHECK_V4MASK__(soa4f4_eq(a, b), ~0, 0, 0, 0); - CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set1(1.e-6f)), ~0, 0, 0, 0); - CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 0.f, 0.f, 1.e-6f)), - ~0, 0, 0, 0); - CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 0.f, 0.f, 1.e-2f)), - ~0, 0, 0,~0); - CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 1.e-2f, 0.f, 1.e-2f)), - ~0, 0, 0,~0); - CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 1.e-1f, 0.f, 1.e-2f)), - ~0,~0, 0,~0); - CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 1.e-1f, 1.e-3f, 1.e-2f)), - ~0,~0,~0,~0); - - soa4f4(a, - v4f_set(1.f, 2.f, 3.f, -1.f), - v4f_set(-2.f, 0.f, -7.f, 0.f), - v4f_set(-1.f, 4.f, 3.f, 2.f), - v4f_set(-5.f, 7.f, 0.5f, -1.f)); - soa4f4(b, - v4f_set(3.f, 2.f, 1.f,-2.f), - v4f_set(1.f,-6.f, 0.5f, 2.f), - v4f_set(0.f, 1.f, 0.f, 3.f), - v4f_set(1.f,-1.f, 0.f, 0.f)); - CHK(soa4f4_min(dst, a, b) == dst); - CHECK_F4(dst, - 1.f, 2.f, 1.f, -2.f, - -2.f, -6.f, -7.f, 0.f, - -1.f, 1.f, 0.f, 2.f, - -5.f, -1.f, 0.f, -1.f); - CHK(soa4f4_max(dst, a, b) == dst); - CHECK_F4(dst, - 3.f, 2.f, 3.f, -1.f, - 1.f, 0.f, 0.5f, 2.f, - 0.f, 4.f, 3.f, 3.f, - 1.f, 7.f, 0.5f, 0.f); - - soa4f4_sel(dst, b, a, v4f_mask(~0, ~0, 1, ~0)); - CHECK_F4(dst, - 1.f, 2.f, 1.f, -1.f, - -2.f, 0.f, 0.5f, 0.f, - -1.f, 4.f, 0.f, 2.f, - -5.f, 7.f, 0.f, -1.f); - - soa4f4(c, - v4f_mask(~0,~0, 0,~0), - v4f_mask(~0, 0, 0, 0), - v4f_mask( 0,~0,~0, 0), - v4f_mask(~0,~0, 0, 0)); - soa4f4_selv(dst, b, a, c); - CHECK_F4(dst, - 1.f, 2.f, 1.f, -1.f, - -2.f, -6.f, 0.5f, 2.f, - 0.f, 4.f, 3.f, 3.f, - -5.f, 7.f, 0.f, 0.f); - + test_soa4f4(); return 0; } diff --git a/src/test_soa8f2.c b/src/test_soa8f2.c @@ -13,119 +13,16 @@ * You should have received a copy of the GNU Lesser General Public License * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ -#include "soa8f2.h" -#include "test_soaXf_utils.h" +/* Generate the test_soa8f2 function */ +#define SOA_SIMD_WIDTH 8 +#define SOA_DIMENSION 2 +#include "test_soaXfY.h" int main(int argc, char** argv) { - v8f_T a[2], b[2], c[2]; - v8f_T v0, v1, v2, v3; (void)argc, (void)argv; - - CHK(soa8f2_set(a, soa8f2_splat(c, v8f_set1(-1))) == a); - CHECK_V8MASK(v8f_eq(a[0], v8f_set1(-1.f)), V8TRUE); - CHECK_V8MASK(v8f_eq(a[1], v8f_set1(-1.f)), V8TRUE); - - v0 = v8f_set(.5f, 1.f, 2.f, 3.f, 4.f, 5.f , 6.f , 7.f); - v1 = v8f_set(8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f); - CHK(soa8f2(c, v0, v1) == c); - CHK(soa8f2_set(a, c) == a); - CHECK_V8MASK(v8f_eq(c[0], v0), V8TRUE); - CHECK_V8MASK(v8f_eq(c[1], v1), V8TRUE); - CHECK_V8MASK(v8f_eq(a[0], v0), V8TRUE); - CHECK_V8MASK(v8f_eq(a[1], v1), V8TRUE); - - v0 = v8f_set(.5f, -1.f, -2.f, 3.f, -4.f, 5.f , 6.f , -7.f); - v1 = v8f_set(-8.f, 9.f, -10.f, 11.f, 12.f, -13.f, -14.f, -15.f); - CHK(soa8f2(a, v0, v1) == a); - CHK(soa8f2_minus(b, a) == b); - CHECK_V8MASK(v8f_eq(b[0], v8f_minus(v0)), V8TRUE); - CHECK_V8MASK(v8f_eq(b[1], v8f_minus(v1)), V8TRUE); - - v2 = v8f_set(1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f); - CHK(soa8f2_addf(c, a, v2) == c); - CHECK_V8MASK(v8f_eq(c[0], v8f_add(v0, v2)), V8TRUE); - CHECK_V8MASK(v8f_eq(c[1], v8f_add(v1, v2)), V8TRUE); - CHK(soa8f2_add(c, a, b) == c); - CHECK_V8MASK(v8f_eq(c[0], v8f_zero()), V8TRUE); - CHECK_V8MASK(v8f_eq(c[1], v8f_zero()), V8TRUE); - CHK(soa8f2_subf(c, a, v2) == c); - CHECK_V8MASK(v8f_eq(c[0], v8f_sub(v0, v2)), V8TRUE); - CHECK_V8MASK(v8f_eq(c[1], v8f_sub(v1, v2)), V8TRUE); - CHK(soa8f2_sub(c, a, b) == c); - CHECK_V8MASK(v8f_eq(c[0], v8f_sub(a[0], b[0])), V8TRUE); - CHECK_V8MASK(v8f_eq(c[1], v8f_sub(a[1], b[1])), V8TRUE); - CHK(soa8f2_mulf(c, a, v2) == c); - CHECK_V8MASK(v8f_eq(c[0], v8f_mul(v0, v2)), V8TRUE); - CHECK_V8MASK(v8f_eq(c[1], v8f_mul(v1, v2)), V8TRUE); - CHK(soa8f2_mul(c, a, b) == c); - CHECK_V8MASK(v8f_eq(c[0], v8f_mul(a[0], b[0])), V8TRUE); - CHECK_V8MASK(v8f_eq(c[1], v8f_mul(a[1], b[1])), V8TRUE); - CHK(soa8f2_divf(c, a, v2) == c); - CHECK_V8MASK(v8f_eq(c[0], v8f_div(v0, v2)), V8TRUE); - CHECK_V8MASK(v8f_eq(c[1], v8f_div(v1, v2)), V8TRUE); - CHK(soa8f2_div(c, a, b) == c); - CHECK_V8MASK(v8f_eq(c[0], v8f_set1(-1.f)), V8TRUE); - CHECK_V8MASK(v8f_eq(c[1], v8f_set1(-1.f)), V8TRUE); - - v3 = v8f_set(1.f, 0.5f, 0.25f, 0.125f, 0.0625f, 0.03125f, 2.f, 4.f); - CHK(soa8f2_lerp(c, a, b, v3)); - CHECK_V8MASK(v8f_eq(c[0], v8f_lerp(a[0], b[0], v3)), V8TRUE); - CHECK_V8MASK(v8f_eq(c[1], v8f_lerp(a[1], b[1], v3)), V8TRUE); - - v3 = soa8f2_sum(b); - CHECK_V8MASK(v8f_eq(v3, v8f_add(b[0], b[1])), V8TRUE); - v0 = v8f_mul(a[0], b[0]); - v1 = v8f_mul(a[1], b[1]); - v2 = v8f_add(v0, v1); - v3 = soa8f2_dot(a, b); - CHECK_V8MASK(v8f_eq(v3, v2), V8TRUE); - v2 = v8f_sqrt(soa8f2_dot(a, a)); - v3 = soa8f2_len(a); - CHECK_V8MASK(v8f_eq(v3, v2), V8TRUE); - - CHECK_V8MASK(soa8f2_is_normalized(a), V8FALSE); - v2 = soa8f2_normalize(b, a); - CHECK_V8MASK(v8f_eq_eps(v3, v2, v8f_set1(1.e-4f)), V8TRUE); - CHECK_V8MASK(soa8f2_is_normalized(b), V8TRUE); - v2 = soa8f2_len(b); - CHECK_V8MASK(v8f_eq_eps(v2, v8f_set1(1), v8f_set1(1.e-4f)), V8TRUE); - soa8f2_divf(c, a, v3); - CHECK_V8MASK(v8f_eq_eps(b[0], c[0], v8f_set1(1.e-4f)), V8TRUE); - CHECK_V8MASK(v8f_eq_eps(b[1], c[1], v8f_set1(1.e-4f)), V8TRUE); - - CHECK_V8MASK(soa8f2_eq(a, a), V8TRUE); - CHECK_V8MASK(soa8f2_eq(a, b), V8FALSE); - soa8f2_addf(b, a, v8f_set1(1.e-4f)); - CHECK_V8MASK(soa8f2_eq(a, b), V8FALSE); - CHECK_V8MASK(soa8f2_eq_eps(a, b, v8f_set1(1.e-3f)), V8TRUE); - v2 = v8f_set(0, 0, 1.e-3f, 0, 0, 0, 1.e-3f, 1.e-3f); - CHECK_V8MASK__(soa8f2_eq_eps(a, b, v2), 0, 0, ~0, 0, 0, 0, ~0, ~0); - - CHK(soa8f2_min(c, a, b) == c); - CHECK_V8MASK(v8f_eq(c[0], v8f_min(a[0], b[0])), V8TRUE); - CHECK_V8MASK(v8f_eq(c[1], v8f_min(a[1], b[1])), V8TRUE); - CHK(soa8f2_max(c, a, b) == c); - CHECK_V8MASK(v8f_eq(c[0], v8f_max(a[0], b[0])), V8TRUE); - CHECK_V8MASK(v8f_eq(c[1], v8f_max(a[1], b[1])), V8TRUE); - - v0 = v8f_mask(0,0,~0,~0,0,~0,~0,0); - v1 = v8f_mask(0,~0,~0,0,0,0,0,~0); - CHK(soa8f2_sel(c, b, a, v0) == c); - CHECK_V8MASK(v8f_eq(c[0], v8f_sel(b[0], a[0], v0)), V8TRUE); - CHECK_V8MASK(v8f_eq(c[1], v8f_sel(b[1], a[1], v0)), V8TRUE); - soa8f2(c, v0, v1); - CHK(soa8f2_selv(c, b, a, c) == c); - CHECK_V8MASK(v8f_eq(c[0], v8f_sel(b[0], a[0], v0)), V8TRUE); - CHECK_V8MASK(v8f_eq(c[1], v8f_sel(b[1], a[1], v1)), V8TRUE); - - v0 = v8f_mul(a[0], b[1]); - v1 = v8f_mul(a[1], b[0]); - v2 = v8f_sub(v0, v1); - v3 = soa8f2_cross(a, b); - CHECK_V8MASK(v8f_eq_eps(v3, v2, v8f_set1(1.e-6f)), V8TRUE); - + test_soa8f2(); return 0; } diff --git a/src/test_soa8f3.c b/src/test_soa8f3.c @@ -0,0 +1,28 @@ +/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +/* Generate the test_soa8f3 function */ +#define SOA_SIMD_WIDTH 8 +#define SOA_DIMENSION 3 +#include "test_soaXfY.h" + +int +main(int argc, char** argv) +{ + (void)argc, (void)argv; + test_soa8f3(); + return 0; +} + diff --git a/src/test_soa8f4.c b/src/test_soa8f4.c @@ -0,0 +1,28 @@ +/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +/* Generate the test_soa8f4 function */ +#define SOA_SIMD_WIDTH 8 +#define SOA_DIMENSION 4 +#include "test_soaXfY.h" + +int +main(int argc, char** argv) +{ + (void)argc, (void)argv; + test_soa8f4(); + return 0; +} + diff --git a/src/test_soaXfY.h b/src/test_soaXfY.h @@ -0,0 +1,262 @@ +/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +#include "rsimd.h" +#include <rsys/rsys.h> + +/* Check macros */ +#ifndef SOA_SIMD_WIDTH + #error "Missing the SOA_SIMD_WIDTH macro" +#endif +#if SOA_SIMD_WIDTH != 4 && SOA_SIMD_WIDTH != 8 + #error "Invalid value for the SOA_SIMD_WIDTH macro" +#endif +#ifndef SOA_DIMENSION + #error "Missing the SOA_DIMENSION macro" +#endif +#if SOA_DIMENSION < 2 || SOA_DIMENSION > 4 + #error "Invalid value for the SOA_DIMENSION macro" +#endif + +/* Define macros generics to the SOA_SIMD_WIDTH parameter */ +#if SOA_SIMD_WIDTH == 4 + #define soaX soa4 + #define vXf(Func) CONCAT(v4f_, Func) + #define vXf_T v4f_T + #define VEC(A, B, C, D, E, F, G, H) v4f_set(A, B, C, D) + #define MASK(A, B, C, D, E, F, G, H) v4f_mask(A, B, C, D) + #define CHKVX(V0, V1) { \ + const v4f_T v0__ = (V0); \ + const v4f_T v1__ = (V1); \ + CHK(v4f_mask_x(v0__) == v4f_mask_y(v1__)); \ + CHK(v4f_mask_y(v0__) == v4f_mask_y(v1__)); \ + CHK(v4f_mask_z(v0__) == v4f_mask_z(v1__)); \ + CHK(v4f_mask_w(v0__) == v4f_mask_w(v1__)); \ + } (void)0 +#elif SOA_SIMD_WIDTH == 8 + #define soaX soa8 + #define vXf(Func) CONCAT(v8f_, Func) + #define vXf_T v8f_T + #define VEC(A, B, C, D, E, F, G, H) v8f_set(A, B, C, D, E, F, G, H) + #define MASK(A, B, C, D, E, F, G, H) v8f_mask(A, B, C, D, E, F, G, H) + #define CHKVX(V0, V1) { \ + const v8f_T v0__ = (V0); \ + const v8f_T v1__ = (V1); \ + CHK(v4f_mask_x(v8f_abcd(v0__)) == v4f_mask_y(v8f_abcd(v1__))); \ + CHK(v4f_mask_y(v8f_abcd(v0__)) == v4f_mask_y(v8f_abcd(v1__))); \ + CHK(v4f_mask_z(v8f_abcd(v0__)) == v4f_mask_z(v8f_abcd(v1__))); \ + CHK(v4f_mask_w(v8f_abcd(v0__)) == v4f_mask_w(v8f_abcd(v1__))); \ + CHK(v4f_mask_x(v8f_efgh(v0__)) == v4f_mask_y(v8f_efgh(v1__))); \ + CHK(v4f_mask_y(v8f_efgh(v0__)) == v4f_mask_y(v8f_efgh(v1__))); \ + CHK(v4f_mask_z(v8f_efgh(v0__)) == v4f_mask_z(v8f_efgh(v1__))); \ + CHK(v4f_mask_w(v8f_efgh(v0__)) == v4f_mask_w(v8f_efgh(v1__))); \ + } (void)0 +#endif + +/* Define macros generics to the SOA_DIMENSION parameter */ +#if SOA_DIMENSION == 2 + #define soaXfY(Func) CONCAT(CONCAT(soaX, f2_), Func) + #define SOA_VEC(Dst, X, Y, Z, W) CONCAT(soaX, f2)(Dst, X, Y) +#elif SOA_DIMENSION == 3 + #define soaXfY(Func) CONCAT(CONCAT(soaX, f3_), Func) + #define SOA_VEC(Dst, X, Y, Z, W) CONCAT(soaX, f3)(Dst, X, Y, Z) +#elif SOA_DIMENSION == 4 + #define soaXfY(Func) CONCAT(CONCAT(soaX, f4_), Func) + #define SOA_VEC(Dst, X, Y, Z, W) CONCAT(soaX, f4)(Dst, X, Y, Z, W) +#endif + +/* Include the corresponding header */ +#if SOA_SIMD_WIDTH == 4 + #if SOA_DIMENSION == 2 + #include "soa4f2.h" + #elif SOA_DIMENSION == 3 + #include "soa4f3.h" + #elif SOA_DIMENSION == 4 + #include "soa4f4.h" + #endif +#else + #if SOA_DIMENSION == 2 + #include "soa8f2.h" + #elif SOA_DIMENSION == 3 + #include "soa8f3.h" + #elif SOA_DIMENSION == 4 + #include "soa8f4.h" + #endif +#endif + +/* Define constants */ +#define VXTRUE MASK(~0,~0,~0,~0,~0,~0,~0,~0) +#define VXFALSE MASK(0,0,0,0,0,0,0,0) + +static void +CONCAT(CONCAT(CONCAT(test_, soaX), f), SOA_DIMENSION)(void) +{ + vXf_T a[SOA_DIMENSION], b[SOA_DIMENSION], c[SOA_DIMENSION]; + vXf_T v[4], f, tmp, mask; + int i; + + v[0] = VEC(.5f, -1.f, -2.f, 3.f, -4.f, 5.f , 6.f , -7.f); + v[1] = VEC(-8.f, 9.f, -10.f, 11.f, 12.f, -13.f, -14.f, -15.f); + v[2] = VEC(16.f, -17.f, 18.f, -19.f, 20.f, 21.f, 22.f, -23.f); + v[3] = VEC(16.f, -17.f, 18.f, -19.f, 20.f, 21.f, 22.f, -23.f); + f = VEC(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); + + /* Setters */ + soaXfY(splat)(a, vXf(set1)(-1)); + FOR_EACH(i, 0, SOA_DIMENSION) { + CHKVX(vXf(eq)(a[i], vXf(set1)(-1.f)), VXTRUE); + } + CHK(soaXfY(set)(b, a) == b); + FOR_EACH(i, 0, SOA_DIMENSION) { + CHKVX(vXf(eq)(b[i], a[i]), VXTRUE); + } + CHK(SOA_VEC(a, v[0], v[1], v[2], v[3]) == a); + CHK(soaXfY(set)(b, a) == b); + FOR_EACH(i, 0, SOA_DIMENSION) { + CHKVX(vXf(eq)(a[i], v[i]), VXTRUE); + CHKVX(vXf(eq)(b[i], v[i]), VXTRUE); + } + + /* Unary operator */ + CHK(soaXfY(minus)(b, a) == b); + FOR_EACH(i, 0, SOA_DIMENSION) { + CHKVX(vXf(eq)(a[i], v[i]), VXTRUE); + CHKVX(vXf(eq)(b[i], vXf(minus)(v[i])), VXTRUE); + } + + /* Regular binary operators */ + CHK(soaXfY(addf)(c, a, f) == c); + FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(add)(a[i], f)), VXTRUE); + CHK(soaXfY(subf)(c, a, f) == c); + FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(sub)(a[i], f)), VXTRUE); + CHK(soaXfY(mulf)(c, a, f) == c); + FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(mul)(a[i], f)), VXTRUE); + CHK(soaXfY(divf)(c, a, f) == c); + FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(div)(a[i], f)), VXTRUE); + CHK(soaXfY(add)(c, a, b) == c); + FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(zero)()), VXTRUE); + CHK(soaXfY(sub)(c, a, b) == c); + FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(sub)(a[i], b[i])), VXTRUE); + CHK(soaXfY(mul)(c, a, b) == c); + FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(mul)(a[i], b[i])), VXTRUE); + CHK(soaXfY(div)(c, a, b) == c); + FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(div)(a[i], b[i])), VXTRUE); + + /* Linear interpolation */ + CHK(soaXfY(lerp)(c, a, b, f)); + FOR_EACH(i, 0, SOA_DIMENSION) { + CHKVX(vXf(eq)(c[i], vXf(lerp)(a[i], b[i], f)), VXTRUE); + } + + /* Sum operator */ + f = soaXfY(sum)(a); + tmp = vXf(zero)(); + FOR_EACH(i, 0, SOA_DIMENSION) { + tmp = vXf(add)(tmp, a[i]); + } + CHKVX(vXf(eq)(f, tmp), VXTRUE); + + /* Dot operator */ + f = soaXfY(dot)(a, b); + tmp = vXf(zero)(); + FOR_EACH(i, 0, SOA_DIMENSION) { + tmp = vXf(add)(tmp, vXf(mul(a[i], b[i]))); + } + CHKVX(vXf(eq)(f, tmp), VXTRUE); + + /* Vector normalization functions */ + CHKVX(soaXfY(is_normalized)(a), VXFALSE); + f = soaXfY(normalize)(c, a); + CHKVX(vXf(eq)(soaXfY(len)(a), vXf(sqrt)(soaXfY(dot)(a, a))), VXTRUE); + tmp = vXf(sqrt)(soaXfY(dot)(a, a)); + CHKVX(vXf(eq_eps)(f, vXf(sqrt)(soaXfY(dot)(a, a)), vXf(set1)(1.e-4f)), VXTRUE); + CHKVX(soaXfY(is_normalized)(c), VXTRUE); + CHKVX(vXf(eq_eps)(soaXfY(len)(c), vXf(set1)(1.f), vXf(set1)(1.e-4f)), VXTRUE); + soaXfY(divf)(b, a, f); + FOR_EACH(i, 0, SOA_DIMENSION) { + CHKVX(vXf(eq_eps)(b[i], c[i], vXf(set1)(1.e-4f)), VXTRUE); + } + + /* Comparators */ + CHKVX(soaXfY(eq)(a, a), VXTRUE); + CHKVX(soaXfY(eq)(a, b), VXFALSE); + soaXfY(addf)(b, a, vXf(set1(1.e-4f))); + CHKVX(soaXfY(eq)(a, b), VXFALSE); + CHKVX(soaXfY(eq_eps)(a, b, vXf(set1)(1.e-3f)), VXTRUE); + tmp = VEC(0, 0, 1.e-3f, 0, 0, 0, 1.e-3f, 1.e-3f); + mask = MASK(0, 0, ~0, 0, 0, 0, ~0, ~0); + CHKVX(soaXfY(eq_eps)(a, b, tmp), mask); + + /* Min/Max */ + CHK(soaXfY(min)(c, a, b) == c); + FOR_EACH(i, 0, SOA_DIMENSION) { + CHKVX(vXf(eq)(c[i], vXf(min)(a[i], b[i])), VXTRUE); + } + CHK(soaXfY(max)(c, a, b) == c); + FOR_EACH(i, 0, SOA_DIMENSION) { + CHKVX(vXf(eq)(c[i], vXf(max)(a[i], b[i])), VXTRUE); + } + + /* Select */ + v[0] = MASK(0,0,~0,~0,0,~0,~0,0); + v[1] = MASK(0,~0,~0,0,0,0,0,~0); + v[2] = MASK(0, 0, 0,0,~0,~0,0, 0); + v[3] = MASK(~0,~0,~0,0,~0,0,0,~0); + CHK(soaXfY(sel)(c, b, a, v[0]) == c); + FOR_EACH(i, 0, SOA_DIMENSION) { + CHKVX(vXf(eq)(c[i], vXf(sel)(b[i], a[i], v[0])), VXTRUE); + } + CHK(soaXfY(selv)(c, b, a, v) == c); + FOR_EACH(i, 0, SOA_DIMENSION) { + CHKVX(vXf(eq)(c[i], vXf(sel)(b[i], a[i], v[i])), VXTRUE); + } + + /* Cross product */ +#if SOA_DIMENSION == 2 + v[0] = vXf(mul)(a[0], b[1]); + v[1] = vXf(mul)(a[1], b[0]); + tmp = vXf(sub)(v[0], v[1]); + f = soaXfY(cross)(a, b); + CHKVX(vXf(eq_eps)(f, tmp, vXf(set1)(1.e-6f)), VXTRUE); +#elif SOA_DIMENSION == 3 + v[0] = vXf(sub)(vXf(mul)(a[1], b[2]), vXf(mul)(a[2], b[1])); + v[1] = vXf(sub)(vXf(mul)(a[2], b[0]), vXf(mul)(a[0], b[2])); + v[2] = vXf(sub)(vXf(mul)(a[0], b[1]), vXf(mul)(a[1], b[0])); + CHK(soaXfY(cross)(c, a, b) == c); + FOR_EACH(i, 0, SOA_DIMENSION) { + CHKVX(vXf(eq_eps)(c[i], v[i], vXf(set1)(1.e-6f)), VXTRUE); + } +#endif +} + +/* Generic parameters */ +#undef SOA_SIMD_WIDTH +#undef SOA_DIMENSION + +/* Macros generic to the SOA_SIMD_WIDTH parameter */ +#undef soaX +#undef vXf +#undef vXf_T +#undef VEC +#undef MASK +#undef CHKVX + +/* Macros generic to the SOA_DIMENSION parameter */ +#undef soaXfY +#undef SOA_VEC + +/* Constants */ +#undef VXTRUE +#undef VXFALSE