rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

commit 54a913366e6fc1d2e8c2813ed9763e448bb99a1a
parent 101c28de8a2975f80e1b664039d0759051fda495
Author: vaplv <vaplv@free.fr>
Date:   Thu, 23 Oct 2014 16:14:20 +0200

Implement and test the SIMD SoA Float4 functions

Diffstat:
MREADME.md | 2+-
Mcmake/CMakeLists.txt | 1+
Asrc/soa4f4.h | 24++++++++++++++++++++++++
Asrc/test_soa4f4.c | 199+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 225 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md @@ -4,7 +4,7 @@ This C89 library defines an interface that encapsulates and make easier the manipulation of SIMD instruction sets. It also provides a SIMD implementation of linear algebra operations for 3x3 and 4x4 matrices as well as quaternions arranged in an `Array of Structures` SIMD layout. Linear algebra functions on -two and three dimensions `Structure of Arrays` vectors are also implemented. +`Structure of Arrays` <2|3|4> dimensions vectors are also implemented. Note that currently only the SSE2 instruction set is supported. diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -89,6 +89,7 @@ new_test(test_aosf44) new_test(test_aosq) new_test(test_soa4f2) new_test(test_soa4f3) +new_test(test_soa4f4) ################################################################################ # Install directives diff --git a/src/soa4f4.h b/src/soa4f4.h @@ -0,0 +1,24 @@ +/* Copyright (C) 2014 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef SOA4F4_H +#define SOA4F4_H + +/* Generate the common soa4fX functions */ +#define SOA4FX_DIMENSION__ 4 +#include "soa4fX.h" + +#endif /* SOA4F4_H */ + diff --git a/src/test_soa4f4.c b/src/test_soa4f4.c @@ -0,0 +1,199 @@ +/* Copyright (C) 2014 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +#include "soa4f4.h" +#include "test_soa4f_utils.h" + +#define CHECK_F4(V, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + { \ + const v4f_T* v__ = (V); \ + CHECK_V4MASK(v4f_eq(v__[0], v4f_set((A), (B), (C), (D))), V4TRUE); \ + CHECK_V4MASK(v4f_eq(v__[1], v4f_set((E), (F), (G), (H))), V4TRUE); \ + CHECK_V4MASK(v4f_eq(v__[2], v4f_set((I), (J), (K), (L))), V4TRUE); \ + CHECK_V4MASK(v4f_eq(v__[3], v4f_set((M), (N), (O), (P))), V4TRUE); \ + } (void)0 + +int +main(int argc, char** argv) +{ + v4f_T a[4], b[4], c[4], dst[4], f; + (void)argc, (void)argv; + + CHECK(soa4f4_set(a, soa4f4_splat(c, v4f_set1(-1.f))), a); + CHECK_V4MASK(v4f_eq(a[0], v4f_set1(-1.f)), V4TRUE); + CHECK_V4MASK(v4f_eq(a[1], v4f_set1(-1.f)), V4TRUE); + CHECK_V4MASK(v4f_eq(a[2], v4f_set1(-1.f)), V4TRUE); + CHECK_V4MASK(v4f_eq(a[3], v4f_set1(-1.f)), V4TRUE); + CHECK(soa4f4(c, + v4f_set(0.f, 1.f, 2.f, 3.f), + v4f_set(5.f, 6.f, 7.f, 8.f), + v4f_set(9.f, 10.f, 11.f, 12.f), + v4f_set(13.f, 14.f, 15.f, 16.f)), c); + CHECK(soa4f4_set(a, c), a); + CHECK_V4MASK(v4f_eq(c[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE); + CHECK_V4MASK(v4f_eq(c[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE); + CHECK_V4MASK(v4f_eq(c[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE); + CHECK_V4MASK(v4f_eq(c[3], v4f_set(13.f, 14.f, 15.f, 16.f)), V4TRUE); + CHECK_V4MASK(v4f_eq(a[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE); + CHECK_V4MASK(v4f_eq(a[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE); + CHECK_V4MASK(v4f_eq(a[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE); + CHECK_V4MASK(v4f_eq(a[3], v4f_set(13.f, 14.f, 15.f, 16.f)), V4TRUE); + + CHECK(soa4f4(a, + v4f_set(-1.f, 2.f, 3.f, -4.f), + v4f_set(5.f, -6.f, -7.f, 8.f), + v4f_set(9.f, -10.f, 1.f, -2.f), + v4f_set(5.f, -3.f, -7.f, 1.f)), a); + CHECK(soa4f4_minus(b, a), b); + CHECK_F4(b, + 1.f, -2.f, -3.f, 4.f, + -5.f, 6.f, 7.f, -8.f, + -9.f, 10.f, -1.f, 2.f, + -5.f, 3.f, 7.f, -1.f); + + CHECK(soa4f4_addf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)), dst); + CHECK_F4(dst, + 0.f, 4.f, 3.f, -1.f, + 6.f, -4.f, -7.f, 11.f, + 10.f, -8.f, 1.f, 1.f, + 6.f, -1.f, -7.f, 4.f); + CHECK(soa4f4_add(dst, a, b), dst); + CHECK_F4(dst, + 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f); + + CHECK(soa4f4_subf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)), dst); + CHECK_F4(dst, + -2.f, 0.f, 3.f, -7.f, + 4.f, -8.f, -7.f, 5.f, + 8.f,-12.f, 1.f,-5.f, + 4.f, -5.f, -7.f, -2.f); + CHECK(soa4f4_sub(dst, a, b), dst); + CHECK_F4(dst, + -2.f, 4.f, 6.f, -8.f, + 10.f, -12.f, -14.f, 16.f, + 18.f, -20.f, 2.f, -4.f, + 10.f, -6.f, -14.f, 2.f); + + CHECK(soa4f4_mulf(dst, a, v4f_set(2.f, 3.f, 0.f, -1.f)), dst); + CHECK_F4(dst, + -2.f, 6.f, 0.f, 4.f, + 10.f, -18.f, 0.f, -8.f, + 18.f, -30.f, 0.f, 2.f, + 10.f, -9.f, 0.f, -1.f); + CHECK(soa4f4_mul(dst, a, b), dst); + CHECK_F4(dst, + -1.f, -4.f, -9.f, -16.f, + -25.f, -36.f, -49.f, -64.f, + -81.f, -100.f, -1.f, -4.f, + -25.f, -9.f, -49.f, -1.f); + + CHECK(soa4f4_divf(dst, a, v4f_set(2.f, 0.5f, 1.f, 4.f)), dst); + CHECK_F4(dst, + -0.5f, 4.f, 3.f, -1.f, + 2.5f, -12.f, -7.f, 2.f, + 4.5f, -20.f, 1.f, -0.5f, + 2.5f, -6.f, -7.f, 0.25f); + CHECK(soa4f4_div(dst, a, b), dst); + CHECK_F4(dst, + -1.f, -1.f, -1.f, -1.f, + -1.f, -1.f, -1.f, -1.f, + -1.f, -1.f, -1.f, -1.f, + -1.f, -1.f, -1.f, -1.f); + + CHECK(soa4f4(a, + v4f_set(-1.f, 2.f, 3.f, -4.f), + v4f_set(5.f, -6.f, -7.f, 8.f), + v4f_set(9.f, -10.f, 1.f, -2.f), + v4f_set(5.f, -3.f, -7.f, 1.f)), a); + CHECK(soa4f4_minus(b, a), b); + CHECK(soa4f4_lerp(dst, a, b, v4f_set(-0.5f, 1.f, 0.5f, 4.f)), dst); + CHECK_F4(dst, + -1.f, -2.f, 0.f, 4.f, + 5.f, 6.f, 0.f, -8.f, + 9.f, 10.f, 0.f, 2.f, + 5.f, 3.f, 0.f, -1.f); + + f = soa4f4_sum(b); + CHECK_V4MASK(v4f_eq(f, v4f_set(-18.f, 17.f, 10.f, -3.f)), V4TRUE); + f = soa4f4_dot(a, b); + CHECK_V4MASK(v4f_eq(f, v4f_set(-132.f, -149.f, -108.f, -85.f)), V4TRUE); + f = soa4f4_len(a); + CHECK_V4MASK + (v4f_eq_eps(f, v4f_sqrt(soa4f4_dot(a, a)), v4f_set1(1.e-6f)), V4TRUE); + + CHECK_V4MASK(soa4f4_is_normalized(b), V4FALSE); + f = soa4f4_normalize(dst, b); + CHECK_V4MASK(v4f_eq_eps(f, soa4f4_len(b), v4f_set1(1.e-6f)), V4TRUE); + CHECK_V4MASK(soa4f4_is_normalized(b), V4FALSE); + CHECK_V4MASK(soa4f4_is_normalized(dst), V4TRUE); + soa4f4_divf(b, b, f); + CHECK_V4MASK(v4f_eq_eps(dst[0], b[0], v4f_set1(1.e-6f)), V4TRUE); + CHECK_V4MASK(v4f_eq_eps(dst[1], b[1], v4f_set1(1.e-6f)), V4TRUE); + CHECK_V4MASK(v4f_eq_eps(dst[2], b[2], v4f_set1(1.e-6f)), V4TRUE); + CHECK_V4MASK(v4f_eq_eps(dst[3], b[3], v4f_set1(1.e-6f)), V4TRUE); + + CHECK_V4MASK(soa4f4_eq(a, a), V4TRUE); + CHECK_V4MASK(soa4f4_eq(a, b), V4FALSE); + soa4f4(a, + v4f_set(-1.f, 2.f, 3.f, -4.f), + v4f_set(5.f, -6.f, -7.f, 8.f), + v4f_set(9.f, -10.f, 1.f, -2.f), + v4f_set(1.f, -1.f, 1.f, -2.f)); + soa4f4(b, + v4f_set(-1.f, 2.f, 3.f,-4.001f), + v4f_set(5.f,-6.03f,-7.f, 8.0), + v4f_set(9.f,-10.f,1.f, -2.001f), + v4f_set(1.f, -1.f, 1.0005f, -2.f)); + CHECK_V4MASK__(soa4f4_eq(a, b), ~0, 0, 0, 0); + CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set1(1.e-6f)), ~0, 0, 0, 0); + CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 0.f, 0.f, 1.e-6f)), + ~0, 0, 0, 0); + CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 0.f, 0.f, 1.e-2f)), + ~0, 0, 0,~0); + CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 1.e-2f, 0.f, 1.e-2f)), + ~0, 0, 0,~0); + CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 1.e-1f, 0.f, 1.e-2f)), + ~0,~0, 0,~0); + CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 1.e-1f, 1.e-3f, 1.e-2f)), + ~0,~0,~0,~0); + + soa4f4(a, + v4f_set(1.f, 2.f, 3.f, -1.f), + v4f_set(-2.f, 0.f, -7.f, 0.f), + v4f_set(-1.f, 4.f, 3.f, 2.f), + v4f_set(-5.f, 7.f, 0.5f, -1.f)); + soa4f4(b, + v4f_set(3.f, 2.f, 1.f,-2.f), + v4f_set(1.f,-6.f, 0.5f, 2.f), + v4f_set(0.f, 1.f, 0.f, 3.f), + v4f_set(1.f,-1.f, 0.f, 0.f)); + CHECK(soa4f4_min(dst, a, b), dst); + CHECK_F4(dst, + 1.f, 2.f, 1.f, -2.f, + -2.f, -6.f, -7.f, 0.f, + -1.f, 1.f, 0.f, 2.f, + -5.f, -1.f, 0.f, -1.f); + CHECK(soa4f4_max(dst, a, b), dst); + CHECK_F4(dst, + 3.f, 2.f, 3.f, -1.f, + 1.f, 0.f, 0.5f, 2.f, + 0.f, 4.f, 3.f, 3.f, + 1.f, 7.f, 0.5f, 0.f); + + return 0; +}