Add the math functions for the v8f_T type - rsimd - Make SIMD instruction sets easier to use

commit dc44f07f5fca4178b6a4dd52cb81f06391660402
parent 85c665f40a596c1728a57523014a5eea61f1ba86
Author: vaplv <vaplv@free.fr>
Date:   Tue, 27 Apr 2021 15:59:55 +0200

Add the math functions for the v8f_T type

Diffstat:
M cmake/CMakeLists.txt  | 14 ++++++++++----
M src/math.h  | 141 ++++---------------------------------------------------------------------------
A src/math4.h  | 41 +++++++++++++++++++++++++++++++++++++++++
A src/math8.h  | 24 ++++++++++++++++++++++++
A src/mathX.h  | 137 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/soaXfY_begin.h  | 12 +++---------
M src/soaXfY_end.h  | 4 ++--
D src/test_math.c  | 138 -------------------------------------------------------------------------------
A src/test_math4.c  | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/test_math8.c  | 172 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/vXf_begin.h  | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/vXf_end.h  | 31 +++++++++++++++++++++++++++++++

12 files changed, 621 insertions(+), 288 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -63,6 +63,9 @@ set(RSIMD_FILES_INC_LEGACY
   aosf44.h
   aosq.h
   math.h
+  mathX.h
+  math4.h
+  math8.h
   rsimd.h
   soaXfY.h
   soaXfY_begin.h
@@ -74,7 +77,9 @@ set(RSIMD_FILES_INC_LEGACY
   soa4f4.h
   soa8f2.h
   soa8f3.h
-  soa8f4.h)
+  soa8f4.h
+  vXf_begin.h
+  vXf_end.h)
 set(RSIMD_FILES_INC_SSE
   sse/sse.h
   sse/ssef.h
@@ -88,7 +93,7 @@ set(RSIMD_FILES_SRC
   aosf44.c
   aosq.c)
 set(RSIMD_FILES_DOC COPYING COPYING.LESSER README.md)
-set(RSIMD_FILES_CMAKE 
+set(RSIMD_FILES_CMAKE
   RSIMDConfig.cmake
   RSIMDConfigVersion.cmake)
 rcmake_prepend_path(RSIMD_FILES_INC_LEGACY ${RSIMD_SOURCE_DIR})
@@ -131,7 +136,7 @@ if(NOT NO_TEST)
   new_test(test_aosf33)
   new_test(test_aosf44)
   new_test(test_aosq)
-  new_test(test_math)
+  new_test(test_math4)
   new_test(test_soa4f2)
   new_test(test_soa4f3)
   new_test(test_soa4f4)
@@ -149,6 +154,7 @@ if(NOT NO_TEST)
   endif()
 
   if(AVX AND CMAKE_COMPILER_IS_GNUCC)
+    new_test(test_math8 "-mavx")
     new_test(test_v8f "-mavx")
     new_test(test_v8i "-mavx")
     new_test(test_soa8f2 "-mavx")
@@ -174,6 +180,6 @@ install(FILES ${Sleef_DIR}/SleefConfig.cmake DESTINATION lib/cmake/Sleef/)
 
 install(FILES ${PROJECT_SOURCE_DIR}/RSIMDConfig.cmake
   DESTINATION lib/cmake/RSIMD)
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/RSIMDConfigVersion.cmake 
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/RSIMDConfigVersion.cmake
   DESTINATION lib/cmake/RSIMD)
 
diff --git a/src/math.h b/src/math.h
@@ -16,143 +16,14 @@
 #ifndef RSIMD_MATH_H
 #define RSIMD_MATH_H
 
-#include "rsimd.h"
+#include <rsys/rsys.h>
 
-#ifdef COMPILER_GCC
-  #pragma GCC diagnostic push
-  #pragma GCC diagnostic ignored "-Wignored-qualifiers"
+#ifdef SIMD_SSE2
+  #include "math4.h"
 #endif
-
-#include <sleef.h>
-
-#ifdef COMPILER_GCC
-  #pragma GCC diagnostic pop
+#ifdef SIMD_AVX
+  #include "math8.h"
 #endif
 
-static FINLINE v4f_T
-v4f_copysign(const v4f_T x, const v4f_T y)
-{
-  return Sleef_copysignf4(x, y);
-}
-
-static INLINE v4f_T
-v4f_floor(const v4f_T x)
-{
-  return Sleef_floorf4(x);
-}
-
-static INLINE v4f_T
-v4f_pow(const v4f_T x, const v4f_T y)
-{
-  return Sleef_powf4_u10(x, y);
-}
-
-/*******************************************************************************
- * Exponentatial functions
- ******************************************************************************/
-static INLINE v4f_T
-v4f_exp2(const v4f_T x)
-{
-  return Sleef_exp2f4_u10(x);
-}
-
-static INLINE v4f_T
-v4f_exp(const v4f_T x)
-{
-  return Sleef_expf4_u10(x);
-}
-
-static INLINE v4f_T
-v4f_exp10(const v4f_T x)
-{
-  return Sleef_exp10f4_u10(x);
-}
-
-/*******************************************************************************
- * Log functions
- ******************************************************************************/
-static INLINE v4f_T
-v4f_log2(const v4f_T x)
-{
-  return Sleef_log2f4_u10(x);
-}
-
-static INLINE v4f_T
-v4f_log(const v4f_T x)
-{
-  return Sleef_logf4_u10(x);
-}
-
-static INLINE v4f_T
-v4f_log10(const v4f_T x)
-{
-  return Sleef_log10f4_u10(x);
-}
-
-/*******************************************************************************
- * Trigonometric functions
- ******************************************************************************/
-static INLINE v4f_T
-v4f_sin(const v4f_T v)
-{
-  return Sleef_sinf4_u10(v);
-}
-
-static INLINE v4f_T
-v4f_asin(const v4f_T v)
-{
-  return Sleef_asinf4_u10(v);
-}
-
-static INLINE v4f_T
-v4f_cos(const v4f_T v)
-{
-  return Sleef_cosf4_u10(v);
-}
-
-static INLINE v4f_T
-v4f_acos(const v4f_T v)
-{
-  return Sleef_acosf4_u10(v);
-}
-
-static INLINE void
-v4f_sincos(const v4f_T v, v4f_T* RESTRICT s, v4f_T* RESTRICT c)
-{
-  const Sleef___m128_2 r = Sleef_sincosf4_u10(v);
-  *s = r.x;
-  *c = r.y;
-}
-
-static INLINE v4f_T
-v4f_tan(const v4f_T v)
-{
-  return Sleef_tanf4_u10(v);
-}
-
-static INLINE v4f_T
-v4f_atan(const v4f_T v)
-{
-  return Sleef_atanf4_u10(v);
-}
-
-/*******************************************************************************
- * Miscellaneous
- ******************************************************************************/
-static FINLINE v4f_T /* Cartesian (xyz) to spherical (r, theta, phi)*/
-v4f_xyz_to_rthetaphi(const v4f_T v)
-{
-  const v4f_T zero = v4f_zero();
-  const v4f_T len2 = v4f_len2(v);
-  const v4f_T len3 = v4f_len3(v);
-  const v4f_T theta = v4f_sel
-    (v4f_acos(v4f_div(v4f_zzzz(v), len3)), zero, v4f_eq(len3, zero));
-  const v4f_T tmp_phi = v4f_sel
-    (v4f_asin(v4f_div(v4f_yyyy(v), len2)), zero, v4f_eq(len2, zero));
-  const v4f_T phi = v4f_sel
-    (v4f_sub(v4f_set1((float)PI), tmp_phi), tmp_phi, v4f_ge(v4f_xxxx(v), zero));
-
-  return v4f_xyab(v4f_xayb(len3, theta), phi);
-}
-
 #endif /* RSIMD_MATH_H */
+
diff --git a/src/math4.h b/src/math4.h
@@ -0,0 +1,41 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_MATH4_H
+#define RSIMD_MATH4_H
+
+#define RSIMD_WIDTH__ 4
+#include "vXf_begin.h"
+#include "mathX.h"
+#include "vXf_end.h"
+
+/*******************************************************************************
+ * Miscellaneous
+ ******************************************************************************/
+static FINLINE v4f_T /* Cartesian (xyz) to spherical (r, theta, phi)*/
+v4f_xyz_to_rthetaphi(const v4f_T v)
+{
+  const v4f_T zero = v4f_zero();
+  const v4f_T len2 = v4f_len2(v);
+  const v4f_T len3 = v4f_len3(v);
+  const v4f_T theta = v4f_sel
+    (v4f_acos(v4f_div(v4f_zzzz(v), len3)), zero, v4f_eq(len3, zero));
+  const v4f_T tmp_phi = v4f_sel
+    (v4f_asin(v4f_div(v4f_yyyy(v), len2)), zero, v4f_eq(len2, zero));
+  const v4f_T phi = v4f_sel
+    (v4f_sub(v4f_set1((float)PI), tmp_phi),tmp_phi, v4f_ge(v4f_xxxx(v), zero));
+  return v4f_xyab(v4f_xayb(len3, theta), phi);
+}
+#endif /* RSIMD_MATH4_H */
diff --git a/src/math8.h b/src/math8.h
@@ -0,0 +1,24 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_MATH8_H
+#define RSIMD_MATH8_H
+
+#define RSIMD_WIDTH__ 8
+#include "vXf_begin.h"
+#include "mathX.h"
+#include "vXf_end.h"
+
+#endif /* RSIMD_MATH8_H */
diff --git a/src/mathX.h b/src/mathX.h
@@ -0,0 +1,137 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "rsimd.h"
+
+#ifdef COMPILER_GCC
+  #pragma GCC diagnostic push
+  #pragma GCC diagnostic ignored "-Wignored-qualifiers"
+#endif
+
+#include <sleef.h>
+
+#ifdef COMPILER_GCC
+  #pragma GCC diagnostic pop
+#endif
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_vXf__(copysign)(const RSIMD_vXf_T__ x, const RSIMD_vXf_T__ y)
+{
+  return RSIMD_Sleef__(copysignf)(x, y);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(floor)(const RSIMD_vXf_T__ x)
+{
+  return RSIMD_Sleef__(floorf)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(pow)(const RSIMD_vXf_T__ x, const RSIMD_vXf_T__ y)
+{
+  return RSIMD_Sleef_ULP__(powf, u10)(x, y);
+}
+
+/*******************************************************************************
+ * Exponentatial functions
+ ******************************************************************************/
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(exp2)(const RSIMD_vXf_T__ x)
+{
+  return RSIMD_Sleef_ULP__(exp2f, u10)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(exp)(const RSIMD_vXf_T__ x)
+{
+  return RSIMD_Sleef_ULP__(expf, u10)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(exp10)(const RSIMD_vXf_T__ x)
+{
+  return RSIMD_Sleef_ULP__(exp10f, u10)(x);
+}
+
+/*******************************************************************************
+ * Log functions
+ ******************************************************************************/
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(log2)(const RSIMD_vXf_T__ x)
+{
+  return RSIMD_Sleef_ULP__(log2f, u10)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(log)(const RSIMD_vXf_T__ x)
+{
+  return RSIMD_Sleef_ULP__(logf, u10)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(log10)(const RSIMD_vXf_T__ x)
+{
+  return RSIMD_Sleef_ULP__(log10f, u10)(x);
+}
+
+/*******************************************************************************
+ * Trigonometric functions
+ ******************************************************************************/
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(sin)(const RSIMD_vXf_T__ v)
+{
+  return RSIMD_Sleef_ULP__(sinf, u10)(v);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(asin)(const RSIMD_vXf_T__ v)
+{
+  return RSIMD_Sleef_ULP__(asinf, u10)(v);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(cos)(const RSIMD_vXf_T__ v)
+{
+  return RSIMD_Sleef_ULP__(cosf, u10)(v);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(acos)(const RSIMD_vXf_T__ v)
+{
+  return RSIMD_Sleef_ULP__(acosf, u10)(v);
+}
+
+static INLINE void
+RSIMD_vXf__(sincos)
+  (const RSIMD_vXf_T__ v, RSIMD_vXf_T__* RESTRICT s, RSIMD_vXf_T__* RESTRICT c)
+{
+  const RSIMD_Sleef_vecf__(2) r = RSIMD_Sleef_ULP__(sincosf, u10)(v);
+  *s = r.x;
+  *c = r.y;
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(tan)(const RSIMD_vXf_T__ v)
+{
+  return RSIMD_Sleef_ULP__(tanf, u10)(v);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(atan)(const RSIMD_vXf_T__ v)
+{
+  return RSIMD_Sleef_ULP__(atanf, u10)(v);
+}
+
+
diff --git a/src/soaXfY_begin.h b/src/soaXfY_begin.h
@@ -14,6 +14,7 @@
  * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
 
 #include "rsimd.h"
+#include "vXf_begin.h"
 
 /* This file can be included once */
 #ifdef SOAXFY_BEGIN_H
@@ -32,23 +33,16 @@
   #error "Unexpected RSIMD_SOA_DIMENSION__ value"
 #endif
 #if RSIMD_WIDTH__ != 4 && RSIMD_WIDTH__ != 8
-  #error "Unexpected RSIMD_WIDTH__ value"
+  #error "Unexpected RSIMD_WIDTH__ value of "STR(RSIMD_WIDTH__)
 #endif
 
 /* Check that internal macros are not already defined */
-#if defined(RSIMD_vXf__)                                                       \
- || defined(RSIMD_vXf_T__)                                                     \
- || defined(RSIMD_soaXfY_PREFIX__)                                             \
+#if defined(RSIMD_soaXfY_PREFIX__)                                             \
  || defined(RSIMD_soaXfY__)                                                    \
  || defined(SIZEOF_RSIMD_soaXfY__)
   #error "Unexpected macro definition"
 #endif
 
-/* Macros generic to RSIMD_WIDTH__ */
-#define RSIMD_vXf__(Func) \
-  CONCAT(CONCAT(CONCAT(CONCAT(v, RSIMD_WIDTH__), f), _), Func)
-#define RSIMD_vXf_T__ CONCAT(CONCAT(v, RSIMD_WIDTH__), f_T)
-
 /* Macros genric to RSIMD_WIDTH__ and RSIMD_SOA_DIMENSION__ */
 #define RSIMD_soaXfY_PREFIX__ \
   CONCAT(CONCAT(CONCAT(soa, RSIMD_WIDTH__), f), RSIMD_SOA_DIMENSION__)
diff --git a/src/soaXfY_end.h b/src/soaXfY_end.h
@@ -18,8 +18,6 @@
 #endif
 
 /* Undef helper macros */
-#undef RSIMD_vXf__
-#undef RSIMD_vXf_T__
 #undef RSIMD_soaXfY_PREFIX__
 #undef RSIMD_soaXfY__
 #undef SIZEOF_RSIMD_soaXfY__
@@ -29,3 +27,5 @@
 #undef RSIMD_WIDTH__
 
 #undef SOAXFY_BEGIN_H
+
+#include "vXf_end.h"
diff --git a/src/test_math.c b/src/test_math.c
@@ -1,138 +0,0 @@
-/* Copyright (C) 2013-2019 Vincent Forest (vaplv@free.fr)
- *
- * The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * The RSIMD library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-
-#define _POSIX_C_SOURCE 200112L
-
-#include "rsimd.h"
-#include "math.h"
-
-#include <math.h>
-
-#define LOG2E 1.4426950408889634074 /* log_2 e */
-#define LN10 2.30258509299404568402 /* log_e 10 */
-
-#define CHKV4_EPS(V, Ref, Eps) {                                               \
-  CHK(eq_eps(v4f_x(V), Ref[0], fabsf(Ref[0]) * Eps));                          \
-  CHK(eq_eps(v4f_y(V), Ref[1], fabsf(Ref[1]) * Eps));                          \
-  CHK(eq_eps(v4f_z(V), Ref[2], fabsf(Ref[2]) * Eps));                          \
-  CHK(eq_eps(v4f_w(V), Ref[3], fabsf(Ref[3]) * Eps));                          \
-} (void)0
-
-#define CHKV4_FUNC_EPS(V, Func, Eps) {                                         \
-  const v4f_T r__ = v4f_##Func(V);                                             \
-  float ref__[4];                                                              \
-  ref__[0] = (float)Func(v4f_x(V));                                            \
-  ref__[1] = (float)Func(v4f_y(V));                                            \
-  ref__[2] = (float)Func(v4f_z(V));                                            \
-  ref__[3] = (float)Func(v4f_w(V));                                            \
-  CHKV4_EPS(r__, ref__, Eps);                                                  \
-} (void)0
-
-static void
-test_trigo(void)
-{
-  v4f_T i, j, k;
-  float ref[4];
-
-  i = v4f_set((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
-
-  CHKV4_FUNC_EPS(i, cos, 1.e-6);
-  CHKV4_FUNC_EPS(i, sin, 1.e-6);
-
-  v4f_sincos(i, &k, &j);
-  ref[0] = (float)sin(v4f_x(i));
-  ref[1] = (float)sin(v4f_y(i));
-  ref[2] = (float)sin(v4f_z(i));
-  ref[3] = (float)sin(v4f_w(i));
-  CHKV4_EPS(k, ref, 1.e-6f);
-  ref[0] = (float)cos(v4f_x(i));
-  ref[1] = (float)cos(v4f_y(i));
-  ref[2] = (float)cos(v4f_z(i));
-  ref[3] = (float)cos(v4f_w(i));
-  CHKV4_EPS(j, ref, 1.e-6f);
-
-  i = v4f_set((float)PI/8.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
-  CHKV4_FUNC_EPS(i, tan, 1.e-6);
-  CHKV4_FUNC_EPS(v4f_cos(i), acos, 1.e-6);
-  CHKV4_FUNC_EPS(v4f_sin(i), asin, 1.e-6);
-  CHKV4_FUNC_EPS(v4f_tan(i), atan, 1.e-6);
-}
-
-static void
-test_exp(void)
-{
-  const v4f_T i = v4f_set(1.f, -1.234f, 0.f, 3.14156f);
-  v4f_T j;
-  float ref[4];
-
-  CHKV4_FUNC_EPS(i, exp, 1.e-6);
-  CHKV4_FUNC_EPS(i, exp2, 1.e-6);
-
-  j = v4f_exp10(i);
-  ref[0] = (float)exp2(LOG2E * LN10 * v4f_x(i));
-  ref[1] = (float)exp2(LOG2E * LN10 * v4f_y(i));
-  ref[2] = (float)exp2(LOG2E * LN10 * v4f_z(i));
-  ref[3] = (float)exp2(LOG2E * LN10 * v4f_w(i));
-  CHKV4_EPS(j, ref, 1.e-6f);
-}
-
-static void
-test_log(void)
-{
-  const v4f_T i = v4f_set(4.675f, 3.14f, 9.99999f, 1.234e-13f);
-
-  CHKV4_FUNC_EPS(i, log, 1.e-6);
-  CHKV4_FUNC_EPS(i, log2, 1.e-6);
-  CHKV4_FUNC_EPS(i, log10, 1.e-6);
-}
-
-static void
-test_misc(void)
-{
-  v4f_T i, j, k;
-  float ref[4];
-
-  i = v4f_set(-1.2345f, 9.3e-7f, 3.879e9f, -10.56f);
-  j = v4f_set(7.89e-9f, 0.12f, -4.9e10f, 3.14f);
-  k = v4f_copysign(i, j);
-  ref[0] = (float)copysign(v4f_x(i), v4f_x(j));
-  ref[1] = (float)copysign(v4f_y(i), v4f_y(j));
-  ref[2] = (float)copysign(v4f_z(i), v4f_z(j));
-  ref[3] = (float)copysign(v4f_w(i), v4f_w(j));
-  CHKV4_EPS(k, ref, 1.e-6f);
-
-  CHKV4_FUNC_EPS(i, floor, 1.e-6);
-
-  k = v4f_pow(v4f_abs(i), j);
-  ref[0] = (float)pow(fabsf(v4f_x(i)), v4f_x(j));
-  ref[1] = (float)pow(fabsf(v4f_y(i)), v4f_y(j));
-  ref[2] = (float)pow(fabsf(v4f_z(i)), v4f_z(j));
-  ref[3] = (float)pow(fabsf(v4f_w(i)), v4f_w(j));
-  CHKV4_EPS(k, ref, 1.e-6f);
-}
-
-int
-main(int argc, char** argv)
-{
-  (void)argc, (void)argv;
-
-  test_trigo();
-  test_exp();
-  test_log();
-  test_misc();
-
-  return 0;
-}
-
diff --git a/src/test_math4.c b/src/test_math4.c
@@ -0,0 +1,138 @@
+/* Copyright (C) 2013-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#define _POSIX_C_SOURCE 200112L
+
+#include "rsimd.h"
+#include "math.h"
+
+#include <math.h>
+
+#define LOG2E 1.4426950408889634074 /* log_2 e */
+#define LN10 2.30258509299404568402 /* log_e 10 */
+
+#define CHKV4_EPS(V, Ref, Eps) {                                               \
+  CHK(eq_eps(v4f_x(V), Ref[0], fabsf(Ref[0]) * Eps));                          \
+  CHK(eq_eps(v4f_y(V), Ref[1], fabsf(Ref[1]) * Eps));                          \
+  CHK(eq_eps(v4f_z(V), Ref[2], fabsf(Ref[2]) * Eps));                          \
+  CHK(eq_eps(v4f_w(V), Ref[3], fabsf(Ref[3]) * Eps));                          \
+} (void)0
+
+#define CHKV4_FUNC_EPS(V, Func, Eps) {                                         \
+  const v4f_T r__ = v4f_##Func(V);                                             \
+  float ref__[4];                                                              \
+  ref__[0] = (float)Func(v4f_x(V));                                            \
+  ref__[1] = (float)Func(v4f_y(V));                                            \
+  ref__[2] = (float)Func(v4f_z(V));                                            \
+  ref__[3] = (float)Func(v4f_w(V));                                            \
+  CHKV4_EPS(r__, ref__, Eps);                                                  \
+} (void)0
+
+static void
+test_trigo(void)
+{
+  v4f_T i, j, k;
+  float ref[4];
+
+  i = v4f_set((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
+
+  CHKV4_FUNC_EPS(i, cos, 1.e-6f);
+  CHKV4_FUNC_EPS(i, sin, 1.e-6f);
+
+  v4f_sincos(i, &k, &j);
+  ref[0] = (float)sin(v4f_x(i));
+  ref[1] = (float)sin(v4f_y(i));
+  ref[2] = (float)sin(v4f_z(i));
+  ref[3] = (float)sin(v4f_w(i));
+  CHKV4_EPS(k, ref, 1.e-6f);
+  ref[0] = (float)cos(v4f_x(i));
+  ref[1] = (float)cos(v4f_y(i));
+  ref[2] = (float)cos(v4f_z(i));
+  ref[3] = (float)cos(v4f_w(i));
+  CHKV4_EPS(j, ref, 1.e-6f);
+
+  i = v4f_set((float)PI/8.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
+  CHKV4_FUNC_EPS(i, tan, 1.e-6f);
+  CHKV4_FUNC_EPS(v4f_cos(i), acos, 1.e-6f);
+  CHKV4_FUNC_EPS(v4f_sin(i), asin, 1.e-6f);
+  CHKV4_FUNC_EPS(v4f_tan(i), atan, 1.e-6f);
+}
+
+static void
+test_exp(void)
+{
+  const v4f_T i = v4f_set(1.f, -1.234f, 0.f, 3.14156f);
+  v4f_T j;
+  float ref[4];
+
+  CHKV4_FUNC_EPS(i, exp, 1.e-6f);
+  CHKV4_FUNC_EPS(i, exp2, 1.e-6f);
+
+  j = v4f_exp10(i);
+  ref[0] = (float)exp2(LOG2E * LN10 * v4f_x(i));
+  ref[1] = (float)exp2(LOG2E * LN10 * v4f_y(i));
+  ref[2] = (float)exp2(LOG2E * LN10 * v4f_z(i));
+  ref[3] = (float)exp2(LOG2E * LN10 * v4f_w(i));
+  CHKV4_EPS(j, ref, 1.e-6f);
+}
+
+static void
+test_log(void)
+{
+  const v4f_T i = v4f_set(4.675f, 3.14f, 9.99999f, 1.234e-13f);
+
+  CHKV4_FUNC_EPS(i, log, 1.e-6f);
+  CHKV4_FUNC_EPS(i, log2, 1.e-6f);
+  CHKV4_FUNC_EPS(i, log10, 1.e-6f);
+}
+
+static void
+test_misc(void)
+{
+  v4f_T i, j, k;
+  float ref[4];
+
+  i = v4f_set(-1.2345f, 9.3e-7f, 3.879e9f, -10.56f);
+  j = v4f_set(7.89e-9f, 0.12f, -4.9e10f, 3.14f);
+  k = v4f_copysign(i, j);
+  ref[0] = (float)copysign(v4f_x(i), v4f_x(j));
+  ref[1] = (float)copysign(v4f_y(i), v4f_y(j));
+  ref[2] = (float)copysign(v4f_z(i), v4f_z(j));
+  ref[3] = (float)copysign(v4f_w(i), v4f_w(j));
+  CHKV4_EPS(k, ref, 1.e-6f);
+
+  CHKV4_FUNC_EPS(i, floor, 1.e-6f);
+
+  k = v4f_pow(v4f_abs(i), j);
+  ref[0] = (float)pow(fabsf(v4f_x(i)), v4f_x(j));
+  ref[1] = (float)pow(fabsf(v4f_y(i)), v4f_y(j));
+  ref[2] = (float)pow(fabsf(v4f_z(i)), v4f_z(j));
+  ref[3] = (float)pow(fabsf(v4f_w(i)), v4f_w(j));
+  CHKV4_EPS(k, ref, 1.e-6f);
+}
+
+int
+main(int argc, char** argv)
+{
+  (void)argc, (void)argv;
+
+  test_trigo();
+  test_exp();
+  test_log();
+  test_misc();
+
+  return 0;
+}
+
diff --git a/src/test_math8.c b/src/test_math8.c
@@ -0,0 +1,172 @@
+/* Copyright (C) 2013-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#define _POSIX_C_SOURCE 200112L
+
+#include "rsimd.h"
+#include "math.h"
+
+#include <math.h>
+
+#define LOG2E 1.4426950408889634074 /* log_2 e */
+#define LN10 2.30258509299404568402 /* log_e 10 */
+
+#define CHKV8_EPS(V, Ref, Eps) {                                               \
+  CHK(eq_eps(v4f_x(v8f_abcd(V)), Ref[0], fabsf(Ref[0]) * Eps));                \
+  CHK(eq_eps(v4f_y(v8f_abcd(V)), Ref[1], fabsf(Ref[1]) * Eps));                \
+  CHK(eq_eps(v4f_z(v8f_abcd(V)), Ref[2], fabsf(Ref[2]) * Eps));                \
+  CHK(eq_eps(v4f_w(v8f_abcd(V)), Ref[3], fabsf(Ref[3]) * Eps));                \
+  CHK(eq_eps(v4f_x(v8f_efgh(V)), Ref[4], fabsf(Ref[4]) * Eps));                \
+  CHK(eq_eps(v4f_y(v8f_efgh(V)), Ref[5], fabsf(Ref[5]) * Eps));                \
+  CHK(eq_eps(v4f_z(v8f_efgh(V)), Ref[6], fabsf(Ref[6]) * Eps));                \
+  CHK(eq_eps(v4f_w(v8f_efgh(V)), Ref[7], fabsf(Ref[7]) * Eps));                \
+} (void)0
+
+#define CHKV8_FUNC_EPS(V, Func, Eps) {                                         \
+  const v8f_T r__ = v8f_##Func(V);                                             \
+  float ref__[8];                                                              \
+  ref__[0] = (float)Func(v4f_x(v8f_abcd(V)));                                  \
+  ref__[1] = (float)Func(v4f_y(v8f_abcd(V)));                                  \
+  ref__[2] = (float)Func(v4f_z(v8f_abcd(V)));                                  \
+  ref__[3] = (float)Func(v4f_w(v8f_abcd(V)));                                  \
+  ref__[4] = (float)Func(v4f_x(v8f_efgh(V)));                                  \
+  ref__[5] = (float)Func(v4f_y(v8f_efgh(V)));                                  \
+  ref__[6] = (float)Func(v4f_z(v8f_efgh(V)));                                  \
+  ref__[7] = (float)Func(v4f_w(v8f_efgh(V)));                                  \
+  CHKV8_EPS(r__, ref__, Eps);                                                  \
+} (void)0
+
+static void
+test_trigo(void)
+{
+  v8f_T i, j, k;
+  float ref[8];
+
+  i = v8f_set
+    ((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f,
+     (float)PI/8.f, (float)PI/7.f, (float)PI/16.f, (float)PI/9.f);
+
+  CHKV8_FUNC_EPS(i, cos, 1.e-6f);
+  CHKV8_FUNC_EPS(i, sin, 1.e-6f);
+
+  v8f_sincos(i, &k, &j);
+  ref[0] = (float)sin(v4f_x(v8f_abcd(i)));
+  ref[1] = (float)sin(v4f_y(v8f_abcd(i)));
+  ref[2] = (float)sin(v4f_z(v8f_abcd(i)));
+  ref[3] = (float)sin(v4f_w(v8f_abcd(i)));
+  ref[4] = (float)sin(v4f_x(v8f_efgh(i)));
+  ref[5] = (float)sin(v4f_y(v8f_efgh(i)));
+  ref[6] = (float)sin(v4f_z(v8f_efgh(i)));
+  ref[7] = (float)sin(v4f_w(v8f_efgh(i)));
+  CHKV8_EPS(k, ref, 1.e-6f);
+  ref[0] = (float)cos(v4f_x(v8f_abcd(i)));
+  ref[1] = (float)cos(v4f_y(v8f_abcd(i)));
+  ref[2] = (float)cos(v4f_z(v8f_abcd(i)));
+  ref[3] = (float)cos(v4f_w(v8f_abcd(i)));
+  ref[4] = (float)cos(v4f_x(v8f_efgh(i)));
+  ref[5] = (float)cos(v4f_y(v8f_efgh(i)));
+  ref[6] = (float)cos(v4f_z(v8f_efgh(i)));
+  ref[7] = (float)cos(v4f_w(v8f_efgh(i)));
+  CHKV8_EPS(j, ref, 1.e-6f);
+
+  i = v8f_set
+    ((float)PI/2.2f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f,
+     (float)PI/8.f, (float)PI/7.f, (float)PI/16.f, (float)PI/9.f);
+
+  CHKV8_FUNC_EPS(i, tan, 1.e-6);
+  CHKV8_FUNC_EPS(v8f_cos(i), acos, 1.e-6f);
+  CHKV8_FUNC_EPS(v8f_sin(i), asin, 1.e-6f);
+  CHKV8_FUNC_EPS(v8f_tan(i), atan, 1.e-6f);
+}
+
+static void
+test_exp(void)
+{
+  const v8f_T i = v8f_set
+    (1.f, -1.234f, 0.f, 3.14156f, 0.9187f, 7.9f, 3.333f, 2.387e-7f);
+  v8f_T j;
+  float ref[8];
+
+  CHKV8_FUNC_EPS(i, exp, 1.e-6f);
+  CHKV8_FUNC_EPS(i, exp2, 1.e-6f);
+
+  j = v8f_exp10(i);
+  ref[0] = (float)exp2(LOG2E * LN10 * v4f_x(v8f_abcd(i)));
+  ref[1] = (float)exp2(LOG2E * LN10 * v4f_y(v8f_abcd(i)));
+  ref[2] = (float)exp2(LOG2E * LN10 * v4f_z(v8f_abcd(i)));
+  ref[3] = (float)exp2(LOG2E * LN10 * v4f_w(v8f_abcd(i)));
+  ref[4] = (float)exp2(LOG2E * LN10 * v4f_x(v8f_efgh(i)));
+  ref[5] = (float)exp2(LOG2E * LN10 * v4f_y(v8f_efgh(i)));
+  ref[6] = (float)exp2(LOG2E * LN10 * v4f_z(v8f_efgh(i)));
+  ref[7] = (float)exp2(LOG2E * LN10 * v4f_w(v8f_efgh(i)));
+  CHKV8_EPS(j, ref, 1.e-6f);
+}
+
+static void
+test_log(void)
+{
+  const v8f_T i = v8f_set
+    (4.675f, 3.14f, 9.99999f, 1.234e-13f, 3.33e-3f, 0.98f, 8.f, 9.87654f);
+  CHKV8_FUNC_EPS(i, log, 1.e-6f);
+  CHKV8_FUNC_EPS(i, log2, 1.e-6f);
+  CHKV8_FUNC_EPS(i, log10, 1.e-6f);
+}
+
+static void
+test_misc(void)
+{
+  v8f_T i, j, k;
+  float ref[8];
+
+  i = v8f_set(-1.2345f, 9.3e-7f, 3.879e9f, -10.56f, 9.9f, -3.1f, 0.33e-6f, 1.f);
+  j = v8f_set(7.89e-9f, 0.12f, -4.9e10f, 3.14f, 5.f, 0.1e-19f, 1.234f, -0.45f);
+  k = v8f_copysign(i, j);
+  ref[0] = (float)copysign(v4f_x(v8f_abcd(i)), v4f_x(v8f_abcd(j)));
+  ref[1] = (float)copysign(v4f_y(v8f_abcd(i)), v4f_y(v8f_abcd(j)));
+  ref[2] = (float)copysign(v4f_z(v8f_abcd(i)), v4f_z(v8f_abcd(j)));
+  ref[3] = (float)copysign(v4f_w(v8f_abcd(i)), v4f_w(v8f_abcd(j)));
+  ref[4] = (float)copysign(v4f_x(v8f_efgh(i)), v4f_x(v8f_efgh(j)));
+  ref[5] = (float)copysign(v4f_y(v8f_efgh(i)), v4f_y(v8f_efgh(j)));
+  ref[6] = (float)copysign(v4f_z(v8f_efgh(i)), v4f_z(v8f_efgh(j)));
+  ref[7] = (float)copysign(v4f_w(v8f_efgh(i)), v4f_w(v8f_efgh(j)));
+  CHKV8_EPS(k, ref, 1.e-6f);
+
+  CHKV8_FUNC_EPS(i, floor, 1.e-6f);
+
+  k = v8f_pow(v8f_abs(i), j);
+  ref[0] = (float)pow(fabsf(v4f_x(v8f_abcd(i))), v4f_x(v8f_abcd(j)));
+  ref[1] = (float)pow(fabsf(v4f_y(v8f_abcd(i))), v4f_y(v8f_abcd(j)));
+  ref[2] = (float)pow(fabsf(v4f_z(v8f_abcd(i))), v4f_z(v8f_abcd(j)));
+  ref[3] = (float)pow(fabsf(v4f_w(v8f_abcd(i))), v4f_w(v8f_abcd(j)));
+  ref[4] = (float)pow(fabsf(v4f_x(v8f_efgh(i))), v4f_x(v8f_efgh(j)));
+  ref[5] = (float)pow(fabsf(v4f_y(v8f_efgh(i))), v4f_y(v8f_efgh(j)));
+  ref[6] = (float)pow(fabsf(v4f_z(v8f_efgh(i))), v4f_z(v8f_efgh(j)));
+  ref[7] = (float)pow(fabsf(v4f_w(v8f_efgh(i))), v4f_w(v8f_efgh(j)));
+  CHKV8_EPS(k, ref, 1.e-6f);
+}
+
+int
+main(int argc, char** argv)
+{
+  (void)argc, (void)argv;
+
+  test_trigo();
+  test_exp();
+  test_log();
+  test_misc();
+
+  return 0;
+}
+
diff --git a/src/vXf_begin.h b/src/vXf_begin.h
@@ -0,0 +1,57 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "rsimd.h"
+
+/* This file can be included once */
+#ifdef VXF_BEGIN_H
+  #error "The vXf_begin.h header is already included"
+#endif
+#define VXF_BEGIN_H
+
+/* Check parameter */
+#if !defined(RSIMD_WIDTH__)
+  #error "Undefined RSIMD_WIDTH__ macro"
+#endif
+#if RSIMD_WIDTH__ != 4 && RSIMD_WIDTH__ != 8
+  #error "Unexpected RSIMD_WIDTH__ value of "STR(RSIMD_WIDTH__)
+#endif
+
+/* Check that internal macros are not already defined */
+#if defined(RSIMD_vXf__)                                                       \
+ || defined(RSIMD_vXf_T__)                                                     \
+ || defined(RSIMD_Sleef__)                                                     \
+ || defined(RSIMD_Sleef_ULP__)                                                 \
+ || defined(RSIMD_Sleef_vecf__)
+ #error "Unexpected macro definition"
+#endif
+
+/* Macros generic to RSIMD_WIDTH__ */
+#define RSIMD_vXf__(Func) \
+  CONCAT(CONCAT(CONCAT(CONCAT(v, RSIMD_WIDTH__), f), _), Func)
+#define RSIMD_vXf_T__ CONCAT(CONCAT(v, RSIMD_WIDTH__), f_T)
+
+/* Sleef macros */
+#define RSIMD_Sleef__(Func) CONCAT(CONCAT(Sleef_, Func), RSIMD_WIDTH__)
+#define RSIMD_Sleef_ULP__(Func, Suffix) \
+   CONCAT(CONCAT(CONCAT(CONCAT(Sleef_, Func), RSIMD_WIDTH__), _), Suffix)
+
+/* Vector types of the Sleef library */
+#if RSIMD_WIDTH__ == 4
+  #define RSIMD_Sleef_vecf__(Dim) CONCAT(Sleef___m128_, Dim)
+#elif RSIMD_WIDTH__ == 8
+  #define RSIMD_Sleef_vecf__(Dim) CONCAT(Sleef___m256_, Dim)
+#endif
+
diff --git a/src/vXf_end.h b/src/vXf_end.h
@@ -0,0 +1,31 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef VXF_BEGIN_H
+  #error "The vXf_begin.h file must be included"
+#endif
+
+/* Undef helper macros */
+#undef RSIMD_vXf__
+#undef RSIMD_vXf_T__
+#undef RSIMD_Sleef__
+#undef RSIMD_Sleef_ULP__
+#undef RSIMD_Sleef_vecf__
+
+/* Undef parameters */
+#undef RSIMD_WIDTH__
+
+#undef VXF_BEGIN_H
+

	rsimd Make SIMD instruction sets easier to use
	git clone git://git.meso-star.fr/rsimd.git
	Log \| Files \| Refs \| README \| LICENSE

M	cmake/CMakeLists.txt	\|	14	++++++++++----
M	src/math.h	\|	141	++++---------------------------------------------------------------------------
A	src/math4.h	\|	41	+++++++++++++++++++++++++++++++++++++++++
A	src/math8.h	\|	24	++++++++++++++++++++++++
A	src/mathX.h	\|	137	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/soaXfY_begin.h	\|	12	+++---------
M	src/soaXfY_end.h	\|	4	++--
D	src/test_math.c	\|	138	-------------------------------------------------------------------------------
A	src/test_math4.c	\|	138	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/test_math8.c	\|	172	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/vXf_begin.h	\|	57	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/vXf_end.h	\|	31	+++++++++++++++++++++++++++++++