rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

commit dc44f07f5fca4178b6a4dd52cb81f06391660402
parent 85c665f40a596c1728a57523014a5eea61f1ba86
Author: vaplv <vaplv@free.fr>
Date:   Tue, 27 Apr 2021 15:59:55 +0200

Add the math functions for the v8f_T type

Diffstat:
Mcmake/CMakeLists.txt | 14++++++++++----
Msrc/math.h | 141++++---------------------------------------------------------------------------
Asrc/math4.h | 41+++++++++++++++++++++++++++++++++++++++++
Asrc/math8.h | 24++++++++++++++++++++++++
Asrc/mathX.h | 137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/soaXfY_begin.h | 12+++---------
Msrc/soaXfY_end.h | 4++--
Dsrc/test_math.c | 138-------------------------------------------------------------------------------
Asrc/test_math4.c | 138+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/test_math8.c | 172+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/vXf_begin.h | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/vXf_end.h | 31+++++++++++++++++++++++++++++++
12 files changed, 621 insertions(+), 288 deletions(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -63,6 +63,9 @@ set(RSIMD_FILES_INC_LEGACY aosf44.h aosq.h math.h + mathX.h + math4.h + math8.h rsimd.h soaXfY.h soaXfY_begin.h @@ -74,7 +77,9 @@ set(RSIMD_FILES_INC_LEGACY soa4f4.h soa8f2.h soa8f3.h - soa8f4.h) + soa8f4.h + vXf_begin.h + vXf_end.h) set(RSIMD_FILES_INC_SSE sse/sse.h sse/ssef.h @@ -88,7 +93,7 @@ set(RSIMD_FILES_SRC aosf44.c aosq.c) set(RSIMD_FILES_DOC COPYING COPYING.LESSER README.md) -set(RSIMD_FILES_CMAKE +set(RSIMD_FILES_CMAKE RSIMDConfig.cmake RSIMDConfigVersion.cmake) rcmake_prepend_path(RSIMD_FILES_INC_LEGACY ${RSIMD_SOURCE_DIR}) @@ -131,7 +136,7 @@ if(NOT NO_TEST) new_test(test_aosf33) new_test(test_aosf44) new_test(test_aosq) - new_test(test_math) + new_test(test_math4) new_test(test_soa4f2) new_test(test_soa4f3) new_test(test_soa4f4) @@ -149,6 +154,7 @@ if(NOT NO_TEST) endif() if(AVX AND CMAKE_COMPILER_IS_GNUCC) + new_test(test_math8 "-mavx") new_test(test_v8f "-mavx") new_test(test_v8i "-mavx") new_test(test_soa8f2 "-mavx") @@ -174,6 +180,6 @@ install(FILES ${Sleef_DIR}/SleefConfig.cmake DESTINATION lib/cmake/Sleef/) install(FILES ${PROJECT_SOURCE_DIR}/RSIMDConfig.cmake DESTINATION lib/cmake/RSIMD) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/RSIMDConfigVersion.cmake +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/RSIMDConfigVersion.cmake DESTINATION lib/cmake/RSIMD) diff --git a/src/math.h b/src/math.h @@ -16,143 +16,14 @@ #ifndef RSIMD_MATH_H #define RSIMD_MATH_H -#include "rsimd.h" +#include <rsys/rsys.h> -#ifdef COMPILER_GCC - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wignored-qualifiers" +#ifdef SIMD_SSE2 + #include "math4.h" #endif - -#include <sleef.h> - -#ifdef COMPILER_GCC - #pragma GCC diagnostic pop +#ifdef SIMD_AVX + #include "math8.h" #endif -static FINLINE v4f_T -v4f_copysign(const v4f_T x, const v4f_T y) -{ - return Sleef_copysignf4(x, y); -} - -static INLINE v4f_T -v4f_floor(const v4f_T x) -{ - return Sleef_floorf4(x); -} - -static INLINE v4f_T -v4f_pow(const v4f_T x, const v4f_T y) -{ - return Sleef_powf4_u10(x, y); -} - -/******************************************************************************* - * Exponentatial functions - ******************************************************************************/ -static INLINE v4f_T -v4f_exp2(const v4f_T x) -{ - return Sleef_exp2f4_u10(x); -} - -static INLINE v4f_T -v4f_exp(const v4f_T x) -{ - return Sleef_expf4_u10(x); -} - -static INLINE v4f_T -v4f_exp10(const v4f_T x) -{ - return Sleef_exp10f4_u10(x); -} - -/******************************************************************************* - * Log functions - ******************************************************************************/ -static INLINE v4f_T -v4f_log2(const v4f_T x) -{ - return Sleef_log2f4_u10(x); -} - -static INLINE v4f_T -v4f_log(const v4f_T x) -{ - return Sleef_logf4_u10(x); -} - -static INLINE v4f_T -v4f_log10(const v4f_T x) -{ - return Sleef_log10f4_u10(x); -} - -/******************************************************************************* - * Trigonometric functions - ******************************************************************************/ -static INLINE v4f_T -v4f_sin(const v4f_T v) -{ - return Sleef_sinf4_u10(v); -} - -static INLINE v4f_T -v4f_asin(const v4f_T v) -{ - return Sleef_asinf4_u10(v); -} - -static INLINE v4f_T -v4f_cos(const v4f_T v) -{ - return Sleef_cosf4_u10(v); -} - -static INLINE v4f_T -v4f_acos(const v4f_T v) -{ - return Sleef_acosf4_u10(v); -} - -static INLINE void -v4f_sincos(const v4f_T v, v4f_T* RESTRICT s, v4f_T* RESTRICT c) -{ - const Sleef___m128_2 r = Sleef_sincosf4_u10(v); - *s = r.x; - *c = r.y; -} - -static INLINE v4f_T -v4f_tan(const v4f_T v) -{ - return Sleef_tanf4_u10(v); -} - -static INLINE v4f_T -v4f_atan(const v4f_T v) -{ - return Sleef_atanf4_u10(v); -} - -/******************************************************************************* - * Miscellaneous - ******************************************************************************/ -static FINLINE v4f_T /* Cartesian (xyz) to spherical (r, theta, phi)*/ -v4f_xyz_to_rthetaphi(const v4f_T v) -{ - const v4f_T zero = v4f_zero(); - const v4f_T len2 = v4f_len2(v); - const v4f_T len3 = v4f_len3(v); - const v4f_T theta = v4f_sel - (v4f_acos(v4f_div(v4f_zzzz(v), len3)), zero, v4f_eq(len3, zero)); - const v4f_T tmp_phi = v4f_sel - (v4f_asin(v4f_div(v4f_yyyy(v), len2)), zero, v4f_eq(len2, zero)); - const v4f_T phi = v4f_sel - (v4f_sub(v4f_set1((float)PI), tmp_phi), tmp_phi, v4f_ge(v4f_xxxx(v), zero)); - - return v4f_xyab(v4f_xayb(len3, theta), phi); -} - #endif /* RSIMD_MATH_H */ + diff --git a/src/math4.h b/src/math4.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef RSIMD_MATH4_H +#define RSIMD_MATH4_H + +#define RSIMD_WIDTH__ 4 +#include "vXf_begin.h" +#include "mathX.h" +#include "vXf_end.h" + +/******************************************************************************* + * Miscellaneous + ******************************************************************************/ +static FINLINE v4f_T /* Cartesian (xyz) to spherical (r, theta, phi)*/ +v4f_xyz_to_rthetaphi(const v4f_T v) +{ + const v4f_T zero = v4f_zero(); + const v4f_T len2 = v4f_len2(v); + const v4f_T len3 = v4f_len3(v); + const v4f_T theta = v4f_sel + (v4f_acos(v4f_div(v4f_zzzz(v), len3)), zero, v4f_eq(len3, zero)); + const v4f_T tmp_phi = v4f_sel + (v4f_asin(v4f_div(v4f_yyyy(v), len2)), zero, v4f_eq(len2, zero)); + const v4f_T phi = v4f_sel + (v4f_sub(v4f_set1((float)PI), tmp_phi),tmp_phi, v4f_ge(v4f_xxxx(v), zero)); + return v4f_xyab(v4f_xayb(len3, theta), phi); +} +#endif /* RSIMD_MATH4_H */ diff --git a/src/math8.h b/src/math8.h @@ -0,0 +1,24 @@ +/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef RSIMD_MATH8_H +#define RSIMD_MATH8_H + +#define RSIMD_WIDTH__ 8 +#include "vXf_begin.h" +#include "mathX.h" +#include "vXf_end.h" + +#endif /* RSIMD_MATH8_H */ diff --git a/src/mathX.h b/src/mathX.h @@ -0,0 +1,137 @@ +/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +#include "rsimd.h" + +#ifdef COMPILER_GCC + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wignored-qualifiers" +#endif + +#include <sleef.h> + +#ifdef COMPILER_GCC + #pragma GCC diagnostic pop +#endif + +static FINLINE RSIMD_vXf_T__ +RSIMD_vXf__(copysign)(const RSIMD_vXf_T__ x, const RSIMD_vXf_T__ y) +{ + return RSIMD_Sleef__(copysignf)(x, y); +} + +static INLINE RSIMD_vXf_T__ +RSIMD_vXf__(floor)(const RSIMD_vXf_T__ x) +{ + return RSIMD_Sleef__(floorf)(x); +} + +static INLINE RSIMD_vXf_T__ +RSIMD_vXf__(pow)(const RSIMD_vXf_T__ x, const RSIMD_vXf_T__ y) +{ + return RSIMD_Sleef_ULP__(powf, u10)(x, y); +} + +/******************************************************************************* + * Exponentatial functions + ******************************************************************************/ +static INLINE RSIMD_vXf_T__ +RSIMD_vXf__(exp2)(const RSIMD_vXf_T__ x) +{ + return RSIMD_Sleef_ULP__(exp2f, u10)(x); +} + +static INLINE RSIMD_vXf_T__ +RSIMD_vXf__(exp)(const RSIMD_vXf_T__ x) +{ + return RSIMD_Sleef_ULP__(expf, u10)(x); +} + +static INLINE RSIMD_vXf_T__ +RSIMD_vXf__(exp10)(const RSIMD_vXf_T__ x) +{ + return RSIMD_Sleef_ULP__(exp10f, u10)(x); +} + +/******************************************************************************* + * Log functions + ******************************************************************************/ +static INLINE RSIMD_vXf_T__ +RSIMD_vXf__(log2)(const RSIMD_vXf_T__ x) +{ + return RSIMD_Sleef_ULP__(log2f, u10)(x); +} + +static INLINE RSIMD_vXf_T__ +RSIMD_vXf__(log)(const RSIMD_vXf_T__ x) +{ + return RSIMD_Sleef_ULP__(logf, u10)(x); +} + +static INLINE RSIMD_vXf_T__ +RSIMD_vXf__(log10)(const RSIMD_vXf_T__ x) +{ + return RSIMD_Sleef_ULP__(log10f, u10)(x); +} + +/******************************************************************************* + * Trigonometric functions + ******************************************************************************/ +static INLINE RSIMD_vXf_T__ +RSIMD_vXf__(sin)(const RSIMD_vXf_T__ v) +{ + return RSIMD_Sleef_ULP__(sinf, u10)(v); +} + +static INLINE RSIMD_vXf_T__ +RSIMD_vXf__(asin)(const RSIMD_vXf_T__ v) +{ + return RSIMD_Sleef_ULP__(asinf, u10)(v); +} + +static INLINE RSIMD_vXf_T__ +RSIMD_vXf__(cos)(const RSIMD_vXf_T__ v) +{ + return RSIMD_Sleef_ULP__(cosf, u10)(v); +} + +static INLINE RSIMD_vXf_T__ +RSIMD_vXf__(acos)(const RSIMD_vXf_T__ v) +{ + return RSIMD_Sleef_ULP__(acosf, u10)(v); +} + +static INLINE void +RSIMD_vXf__(sincos) + (const RSIMD_vXf_T__ v, RSIMD_vXf_T__* RESTRICT s, RSIMD_vXf_T__* RESTRICT c) +{ + const RSIMD_Sleef_vecf__(2) r = RSIMD_Sleef_ULP__(sincosf, u10)(v); + *s = r.x; + *c = r.y; +} + +static INLINE RSIMD_vXf_T__ +RSIMD_vXf__(tan)(const RSIMD_vXf_T__ v) +{ + return RSIMD_Sleef_ULP__(tanf, u10)(v); +} + +static INLINE RSIMD_vXf_T__ +RSIMD_vXf__(atan)(const RSIMD_vXf_T__ v) +{ + return RSIMD_Sleef_ULP__(atanf, u10)(v); +} + + diff --git a/src/soaXfY_begin.h b/src/soaXfY_begin.h @@ -14,6 +14,7 @@ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ #include "rsimd.h" +#include "vXf_begin.h" /* This file can be included once */ #ifdef SOAXFY_BEGIN_H @@ -32,23 +33,16 @@ #error "Unexpected RSIMD_SOA_DIMENSION__ value" #endif #if RSIMD_WIDTH__ != 4 && RSIMD_WIDTH__ != 8 - #error "Unexpected RSIMD_WIDTH__ value" + #error "Unexpected RSIMD_WIDTH__ value of "STR(RSIMD_WIDTH__) #endif /* Check that internal macros are not already defined */ -#if defined(RSIMD_vXf__) \ - || defined(RSIMD_vXf_T__) \ - || defined(RSIMD_soaXfY_PREFIX__) \ +#if defined(RSIMD_soaXfY_PREFIX__) \ || defined(RSIMD_soaXfY__) \ || defined(SIZEOF_RSIMD_soaXfY__) #error "Unexpected macro definition" #endif -/* Macros generic to RSIMD_WIDTH__ */ -#define RSIMD_vXf__(Func) \ - CONCAT(CONCAT(CONCAT(CONCAT(v, RSIMD_WIDTH__), f), _), Func) -#define RSIMD_vXf_T__ CONCAT(CONCAT(v, RSIMD_WIDTH__), f_T) - /* Macros genric to RSIMD_WIDTH__ and RSIMD_SOA_DIMENSION__ */ #define RSIMD_soaXfY_PREFIX__ \ CONCAT(CONCAT(CONCAT(soa, RSIMD_WIDTH__), f), RSIMD_SOA_DIMENSION__) diff --git a/src/soaXfY_end.h b/src/soaXfY_end.h @@ -18,8 +18,6 @@ #endif /* Undef helper macros */ -#undef RSIMD_vXf__ -#undef RSIMD_vXf_T__ #undef RSIMD_soaXfY_PREFIX__ #undef RSIMD_soaXfY__ #undef SIZEOF_RSIMD_soaXfY__ @@ -29,3 +27,5 @@ #undef RSIMD_WIDTH__ #undef SOAXFY_BEGIN_H + +#include "vXf_end.h" diff --git a/src/test_math.c b/src/test_math.c @@ -1,138 +0,0 @@ -/* Copyright (C) 2013-2019 Vincent Forest (vaplv@free.fr) - * - * The RSIMD library is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * The RSIMD library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ - -#define _POSIX_C_SOURCE 200112L - -#include "rsimd.h" -#include "math.h" - -#include <math.h> - -#define LOG2E 1.4426950408889634074 /* log_2 e */ -#define LN10 2.30258509299404568402 /* log_e 10 */ - -#define CHKV4_EPS(V, Ref, Eps) { \ - CHK(eq_eps(v4f_x(V), Ref[0], fabsf(Ref[0]) * Eps)); \ - CHK(eq_eps(v4f_y(V), Ref[1], fabsf(Ref[1]) * Eps)); \ - CHK(eq_eps(v4f_z(V), Ref[2], fabsf(Ref[2]) * Eps)); \ - CHK(eq_eps(v4f_w(V), Ref[3], fabsf(Ref[3]) * Eps)); \ -} (void)0 - -#define CHKV4_FUNC_EPS(V, Func, Eps) { \ - const v4f_T r__ = v4f_##Func(V); \ - float ref__[4]; \ - ref__[0] = (float)Func(v4f_x(V)); \ - ref__[1] = (float)Func(v4f_y(V)); \ - ref__[2] = (float)Func(v4f_z(V)); \ - ref__[3] = (float)Func(v4f_w(V)); \ - CHKV4_EPS(r__, ref__, Eps); \ -} (void)0 - -static void -test_trigo(void) -{ - v4f_T i, j, k; - float ref[4]; - - i = v4f_set((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f); - - CHKV4_FUNC_EPS(i, cos, 1.e-6); - CHKV4_FUNC_EPS(i, sin, 1.e-6); - - v4f_sincos(i, &k, &j); - ref[0] = (float)sin(v4f_x(i)); - ref[1] = (float)sin(v4f_y(i)); - ref[2] = (float)sin(v4f_z(i)); - ref[3] = (float)sin(v4f_w(i)); - CHKV4_EPS(k, ref, 1.e-6f); - ref[0] = (float)cos(v4f_x(i)); - ref[1] = (float)cos(v4f_y(i)); - ref[2] = (float)cos(v4f_z(i)); - ref[3] = (float)cos(v4f_w(i)); - CHKV4_EPS(j, ref, 1.e-6f); - - i = v4f_set((float)PI/8.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f); - CHKV4_FUNC_EPS(i, tan, 1.e-6); - CHKV4_FUNC_EPS(v4f_cos(i), acos, 1.e-6); - CHKV4_FUNC_EPS(v4f_sin(i), asin, 1.e-6); - CHKV4_FUNC_EPS(v4f_tan(i), atan, 1.e-6); -} - -static void -test_exp(void) -{ - const v4f_T i = v4f_set(1.f, -1.234f, 0.f, 3.14156f); - v4f_T j; - float ref[4]; - - CHKV4_FUNC_EPS(i, exp, 1.e-6); - CHKV4_FUNC_EPS(i, exp2, 1.e-6); - - j = v4f_exp10(i); - ref[0] = (float)exp2(LOG2E * LN10 * v4f_x(i)); - ref[1] = (float)exp2(LOG2E * LN10 * v4f_y(i)); - ref[2] = (float)exp2(LOG2E * LN10 * v4f_z(i)); - ref[3] = (float)exp2(LOG2E * LN10 * v4f_w(i)); - CHKV4_EPS(j, ref, 1.e-6f); -} - -static void -test_log(void) -{ - const v4f_T i = v4f_set(4.675f, 3.14f, 9.99999f, 1.234e-13f); - - CHKV4_FUNC_EPS(i, log, 1.e-6); - CHKV4_FUNC_EPS(i, log2, 1.e-6); - CHKV4_FUNC_EPS(i, log10, 1.e-6); -} - -static void -test_misc(void) -{ - v4f_T i, j, k; - float ref[4]; - - i = v4f_set(-1.2345f, 9.3e-7f, 3.879e9f, -10.56f); - j = v4f_set(7.89e-9f, 0.12f, -4.9e10f, 3.14f); - k = v4f_copysign(i, j); - ref[0] = (float)copysign(v4f_x(i), v4f_x(j)); - ref[1] = (float)copysign(v4f_y(i), v4f_y(j)); - ref[2] = (float)copysign(v4f_z(i), v4f_z(j)); - ref[3] = (float)copysign(v4f_w(i), v4f_w(j)); - CHKV4_EPS(k, ref, 1.e-6f); - - CHKV4_FUNC_EPS(i, floor, 1.e-6); - - k = v4f_pow(v4f_abs(i), j); - ref[0] = (float)pow(fabsf(v4f_x(i)), v4f_x(j)); - ref[1] = (float)pow(fabsf(v4f_y(i)), v4f_y(j)); - ref[2] = (float)pow(fabsf(v4f_z(i)), v4f_z(j)); - ref[3] = (float)pow(fabsf(v4f_w(i)), v4f_w(j)); - CHKV4_EPS(k, ref, 1.e-6f); -} - -int -main(int argc, char** argv) -{ - (void)argc, (void)argv; - - test_trigo(); - test_exp(); - test_log(); - test_misc(); - - return 0; -} - diff --git a/src/test_math4.c b/src/test_math4.c @@ -0,0 +1,138 @@ +/* Copyright (C) 2013-2021 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +#define _POSIX_C_SOURCE 200112L + +#include "rsimd.h" +#include "math.h" + +#include <math.h> + +#define LOG2E 1.4426950408889634074 /* log_2 e */ +#define LN10 2.30258509299404568402 /* log_e 10 */ + +#define CHKV4_EPS(V, Ref, Eps) { \ + CHK(eq_eps(v4f_x(V), Ref[0], fabsf(Ref[0]) * Eps)); \ + CHK(eq_eps(v4f_y(V), Ref[1], fabsf(Ref[1]) * Eps)); \ + CHK(eq_eps(v4f_z(V), Ref[2], fabsf(Ref[2]) * Eps)); \ + CHK(eq_eps(v4f_w(V), Ref[3], fabsf(Ref[3]) * Eps)); \ +} (void)0 + +#define CHKV4_FUNC_EPS(V, Func, Eps) { \ + const v4f_T r__ = v4f_##Func(V); \ + float ref__[4]; \ + ref__[0] = (float)Func(v4f_x(V)); \ + ref__[1] = (float)Func(v4f_y(V)); \ + ref__[2] = (float)Func(v4f_z(V)); \ + ref__[3] = (float)Func(v4f_w(V)); \ + CHKV4_EPS(r__, ref__, Eps); \ +} (void)0 + +static void +test_trigo(void) +{ + v4f_T i, j, k; + float ref[4]; + + i = v4f_set((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f); + + CHKV4_FUNC_EPS(i, cos, 1.e-6f); + CHKV4_FUNC_EPS(i, sin, 1.e-6f); + + v4f_sincos(i, &k, &j); + ref[0] = (float)sin(v4f_x(i)); + ref[1] = (float)sin(v4f_y(i)); + ref[2] = (float)sin(v4f_z(i)); + ref[3] = (float)sin(v4f_w(i)); + CHKV4_EPS(k, ref, 1.e-6f); + ref[0] = (float)cos(v4f_x(i)); + ref[1] = (float)cos(v4f_y(i)); + ref[2] = (float)cos(v4f_z(i)); + ref[3] = (float)cos(v4f_w(i)); + CHKV4_EPS(j, ref, 1.e-6f); + + i = v4f_set((float)PI/8.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f); + CHKV4_FUNC_EPS(i, tan, 1.e-6f); + CHKV4_FUNC_EPS(v4f_cos(i), acos, 1.e-6f); + CHKV4_FUNC_EPS(v4f_sin(i), asin, 1.e-6f); + CHKV4_FUNC_EPS(v4f_tan(i), atan, 1.e-6f); +} + +static void +test_exp(void) +{ + const v4f_T i = v4f_set(1.f, -1.234f, 0.f, 3.14156f); + v4f_T j; + float ref[4]; + + CHKV4_FUNC_EPS(i, exp, 1.e-6f); + CHKV4_FUNC_EPS(i, exp2, 1.e-6f); + + j = v4f_exp10(i); + ref[0] = (float)exp2(LOG2E * LN10 * v4f_x(i)); + ref[1] = (float)exp2(LOG2E * LN10 * v4f_y(i)); + ref[2] = (float)exp2(LOG2E * LN10 * v4f_z(i)); + ref[3] = (float)exp2(LOG2E * LN10 * v4f_w(i)); + CHKV4_EPS(j, ref, 1.e-6f); +} + +static void +test_log(void) +{ + const v4f_T i = v4f_set(4.675f, 3.14f, 9.99999f, 1.234e-13f); + + CHKV4_FUNC_EPS(i, log, 1.e-6f); + CHKV4_FUNC_EPS(i, log2, 1.e-6f); + CHKV4_FUNC_EPS(i, log10, 1.e-6f); +} + +static void +test_misc(void) +{ + v4f_T i, j, k; + float ref[4]; + + i = v4f_set(-1.2345f, 9.3e-7f, 3.879e9f, -10.56f); + j = v4f_set(7.89e-9f, 0.12f, -4.9e10f, 3.14f); + k = v4f_copysign(i, j); + ref[0] = (float)copysign(v4f_x(i), v4f_x(j)); + ref[1] = (float)copysign(v4f_y(i), v4f_y(j)); + ref[2] = (float)copysign(v4f_z(i), v4f_z(j)); + ref[3] = (float)copysign(v4f_w(i), v4f_w(j)); + CHKV4_EPS(k, ref, 1.e-6f); + + CHKV4_FUNC_EPS(i, floor, 1.e-6f); + + k = v4f_pow(v4f_abs(i), j); + ref[0] = (float)pow(fabsf(v4f_x(i)), v4f_x(j)); + ref[1] = (float)pow(fabsf(v4f_y(i)), v4f_y(j)); + ref[2] = (float)pow(fabsf(v4f_z(i)), v4f_z(j)); + ref[3] = (float)pow(fabsf(v4f_w(i)), v4f_w(j)); + CHKV4_EPS(k, ref, 1.e-6f); +} + +int +main(int argc, char** argv) +{ + (void)argc, (void)argv; + + test_trigo(); + test_exp(); + test_log(); + test_misc(); + + return 0; +} + diff --git a/src/test_math8.c b/src/test_math8.c @@ -0,0 +1,172 @@ +/* Copyright (C) 2013-2021 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +#define _POSIX_C_SOURCE 200112L + +#include "rsimd.h" +#include "math.h" + +#include <math.h> + +#define LOG2E 1.4426950408889634074 /* log_2 e */ +#define LN10 2.30258509299404568402 /* log_e 10 */ + +#define CHKV8_EPS(V, Ref, Eps) { \ + CHK(eq_eps(v4f_x(v8f_abcd(V)), Ref[0], fabsf(Ref[0]) * Eps)); \ + CHK(eq_eps(v4f_y(v8f_abcd(V)), Ref[1], fabsf(Ref[1]) * Eps)); \ + CHK(eq_eps(v4f_z(v8f_abcd(V)), Ref[2], fabsf(Ref[2]) * Eps)); \ + CHK(eq_eps(v4f_w(v8f_abcd(V)), Ref[3], fabsf(Ref[3]) * Eps)); \ + CHK(eq_eps(v4f_x(v8f_efgh(V)), Ref[4], fabsf(Ref[4]) * Eps)); \ + CHK(eq_eps(v4f_y(v8f_efgh(V)), Ref[5], fabsf(Ref[5]) * Eps)); \ + CHK(eq_eps(v4f_z(v8f_efgh(V)), Ref[6], fabsf(Ref[6]) * Eps)); \ + CHK(eq_eps(v4f_w(v8f_efgh(V)), Ref[7], fabsf(Ref[7]) * Eps)); \ +} (void)0 + +#define CHKV8_FUNC_EPS(V, Func, Eps) { \ + const v8f_T r__ = v8f_##Func(V); \ + float ref__[8]; \ + ref__[0] = (float)Func(v4f_x(v8f_abcd(V))); \ + ref__[1] = (float)Func(v4f_y(v8f_abcd(V))); \ + ref__[2] = (float)Func(v4f_z(v8f_abcd(V))); \ + ref__[3] = (float)Func(v4f_w(v8f_abcd(V))); \ + ref__[4] = (float)Func(v4f_x(v8f_efgh(V))); \ + ref__[5] = (float)Func(v4f_y(v8f_efgh(V))); \ + ref__[6] = (float)Func(v4f_z(v8f_efgh(V))); \ + ref__[7] = (float)Func(v4f_w(v8f_efgh(V))); \ + CHKV8_EPS(r__, ref__, Eps); \ +} (void)0 + +static void +test_trigo(void) +{ + v8f_T i, j, k; + float ref[8]; + + i = v8f_set + ((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f, + (float)PI/8.f, (float)PI/7.f, (float)PI/16.f, (float)PI/9.f); + + CHKV8_FUNC_EPS(i, cos, 1.e-6f); + CHKV8_FUNC_EPS(i, sin, 1.e-6f); + + v8f_sincos(i, &k, &j); + ref[0] = (float)sin(v4f_x(v8f_abcd(i))); + ref[1] = (float)sin(v4f_y(v8f_abcd(i))); + ref[2] = (float)sin(v4f_z(v8f_abcd(i))); + ref[3] = (float)sin(v4f_w(v8f_abcd(i))); + ref[4] = (float)sin(v4f_x(v8f_efgh(i))); + ref[5] = (float)sin(v4f_y(v8f_efgh(i))); + ref[6] = (float)sin(v4f_z(v8f_efgh(i))); + ref[7] = (float)sin(v4f_w(v8f_efgh(i))); + CHKV8_EPS(k, ref, 1.e-6f); + ref[0] = (float)cos(v4f_x(v8f_abcd(i))); + ref[1] = (float)cos(v4f_y(v8f_abcd(i))); + ref[2] = (float)cos(v4f_z(v8f_abcd(i))); + ref[3] = (float)cos(v4f_w(v8f_abcd(i))); + ref[4] = (float)cos(v4f_x(v8f_efgh(i))); + ref[5] = (float)cos(v4f_y(v8f_efgh(i))); + ref[6] = (float)cos(v4f_z(v8f_efgh(i))); + ref[7] = (float)cos(v4f_w(v8f_efgh(i))); + CHKV8_EPS(j, ref, 1.e-6f); + + i = v8f_set + ((float)PI/2.2f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f, + (float)PI/8.f, (float)PI/7.f, (float)PI/16.f, (float)PI/9.f); + + CHKV8_FUNC_EPS(i, tan, 1.e-6); + CHKV8_FUNC_EPS(v8f_cos(i), acos, 1.e-6f); + CHKV8_FUNC_EPS(v8f_sin(i), asin, 1.e-6f); + CHKV8_FUNC_EPS(v8f_tan(i), atan, 1.e-6f); +} + +static void +test_exp(void) +{ + const v8f_T i = v8f_set + (1.f, -1.234f, 0.f, 3.14156f, 0.9187f, 7.9f, 3.333f, 2.387e-7f); + v8f_T j; + float ref[8]; + + CHKV8_FUNC_EPS(i, exp, 1.e-6f); + CHKV8_FUNC_EPS(i, exp2, 1.e-6f); + + j = v8f_exp10(i); + ref[0] = (float)exp2(LOG2E * LN10 * v4f_x(v8f_abcd(i))); + ref[1] = (float)exp2(LOG2E * LN10 * v4f_y(v8f_abcd(i))); + ref[2] = (float)exp2(LOG2E * LN10 * v4f_z(v8f_abcd(i))); + ref[3] = (float)exp2(LOG2E * LN10 * v4f_w(v8f_abcd(i))); + ref[4] = (float)exp2(LOG2E * LN10 * v4f_x(v8f_efgh(i))); + ref[5] = (float)exp2(LOG2E * LN10 * v4f_y(v8f_efgh(i))); + ref[6] = (float)exp2(LOG2E * LN10 * v4f_z(v8f_efgh(i))); + ref[7] = (float)exp2(LOG2E * LN10 * v4f_w(v8f_efgh(i))); + CHKV8_EPS(j, ref, 1.e-6f); +} + +static void +test_log(void) +{ + const v8f_T i = v8f_set + (4.675f, 3.14f, 9.99999f, 1.234e-13f, 3.33e-3f, 0.98f, 8.f, 9.87654f); + CHKV8_FUNC_EPS(i, log, 1.e-6f); + CHKV8_FUNC_EPS(i, log2, 1.e-6f); + CHKV8_FUNC_EPS(i, log10, 1.e-6f); +} + +static void +test_misc(void) +{ + v8f_T i, j, k; + float ref[8]; + + i = v8f_set(-1.2345f, 9.3e-7f, 3.879e9f, -10.56f, 9.9f, -3.1f, 0.33e-6f, 1.f); + j = v8f_set(7.89e-9f, 0.12f, -4.9e10f, 3.14f, 5.f, 0.1e-19f, 1.234f, -0.45f); + k = v8f_copysign(i, j); + ref[0] = (float)copysign(v4f_x(v8f_abcd(i)), v4f_x(v8f_abcd(j))); + ref[1] = (float)copysign(v4f_y(v8f_abcd(i)), v4f_y(v8f_abcd(j))); + ref[2] = (float)copysign(v4f_z(v8f_abcd(i)), v4f_z(v8f_abcd(j))); + ref[3] = (float)copysign(v4f_w(v8f_abcd(i)), v4f_w(v8f_abcd(j))); + ref[4] = (float)copysign(v4f_x(v8f_efgh(i)), v4f_x(v8f_efgh(j))); + ref[5] = (float)copysign(v4f_y(v8f_efgh(i)), v4f_y(v8f_efgh(j))); + ref[6] = (float)copysign(v4f_z(v8f_efgh(i)), v4f_z(v8f_efgh(j))); + ref[7] = (float)copysign(v4f_w(v8f_efgh(i)), v4f_w(v8f_efgh(j))); + CHKV8_EPS(k, ref, 1.e-6f); + + CHKV8_FUNC_EPS(i, floor, 1.e-6f); + + k = v8f_pow(v8f_abs(i), j); + ref[0] = (float)pow(fabsf(v4f_x(v8f_abcd(i))), v4f_x(v8f_abcd(j))); + ref[1] = (float)pow(fabsf(v4f_y(v8f_abcd(i))), v4f_y(v8f_abcd(j))); + ref[2] = (float)pow(fabsf(v4f_z(v8f_abcd(i))), v4f_z(v8f_abcd(j))); + ref[3] = (float)pow(fabsf(v4f_w(v8f_abcd(i))), v4f_w(v8f_abcd(j))); + ref[4] = (float)pow(fabsf(v4f_x(v8f_efgh(i))), v4f_x(v8f_efgh(j))); + ref[5] = (float)pow(fabsf(v4f_y(v8f_efgh(i))), v4f_y(v8f_efgh(j))); + ref[6] = (float)pow(fabsf(v4f_z(v8f_efgh(i))), v4f_z(v8f_efgh(j))); + ref[7] = (float)pow(fabsf(v4f_w(v8f_efgh(i))), v4f_w(v8f_efgh(j))); + CHKV8_EPS(k, ref, 1.e-6f); +} + +int +main(int argc, char** argv) +{ + (void)argc, (void)argv; + + test_trigo(); + test_exp(); + test_log(); + test_misc(); + + return 0; +} + diff --git a/src/vXf_begin.h b/src/vXf_begin.h @@ -0,0 +1,57 @@ +/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +#include "rsimd.h" + +/* This file can be included once */ +#ifdef VXF_BEGIN_H + #error "The vXf_begin.h header is already included" +#endif +#define VXF_BEGIN_H + +/* Check parameter */ +#if !defined(RSIMD_WIDTH__) + #error "Undefined RSIMD_WIDTH__ macro" +#endif +#if RSIMD_WIDTH__ != 4 && RSIMD_WIDTH__ != 8 + #error "Unexpected RSIMD_WIDTH__ value of "STR(RSIMD_WIDTH__) +#endif + +/* Check that internal macros are not already defined */ +#if defined(RSIMD_vXf__) \ + || defined(RSIMD_vXf_T__) \ + || defined(RSIMD_Sleef__) \ + || defined(RSIMD_Sleef_ULP__) \ + || defined(RSIMD_Sleef_vecf__) + #error "Unexpected macro definition" +#endif + +/* Macros generic to RSIMD_WIDTH__ */ +#define RSIMD_vXf__(Func) \ + CONCAT(CONCAT(CONCAT(CONCAT(v, RSIMD_WIDTH__), f), _), Func) +#define RSIMD_vXf_T__ CONCAT(CONCAT(v, RSIMD_WIDTH__), f_T) + +/* Sleef macros */ +#define RSIMD_Sleef__(Func) CONCAT(CONCAT(Sleef_, Func), RSIMD_WIDTH__) +#define RSIMD_Sleef_ULP__(Func, Suffix) \ + CONCAT(CONCAT(CONCAT(CONCAT(Sleef_, Func), RSIMD_WIDTH__), _), Suffix) + +/* Vector types of the Sleef library */ +#if RSIMD_WIDTH__ == 4 + #define RSIMD_Sleef_vecf__(Dim) CONCAT(Sleef___m128_, Dim) +#elif RSIMD_WIDTH__ == 8 + #define RSIMD_Sleef_vecf__(Dim) CONCAT(Sleef___m256_, Dim) +#endif + diff --git a/src/vXf_end.h b/src/vXf_end.h @@ -0,0 +1,31 @@ +/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef VXF_BEGIN_H + #error "The vXf_begin.h file must be included" +#endif + +/* Undef helper macros */ +#undef RSIMD_vXf__ +#undef RSIMD_vXf_T__ +#undef RSIMD_Sleef__ +#undef RSIMD_Sleef_ULP__ +#undef RSIMD_Sleef_vecf__ + +/* Undef parameters */ +#undef RSIMD_WIDTH__ + +#undef VXF_BEGIN_H +