Rely on the Sleef library for math functions - rsimd - Make SIMD instruction sets easier to use

commit e93ccab448df745675af5dcba0f3750ff34a5e71
parent 9ede110fda406d3138d015206253f20c9b0c90db
Author: vaplv <vaplv@free.fr>
Date:   Fri, 29 Jan 2021 17:00:29 +0100

Rely on the Sleef library for math functions

Add several math functions

Diffstat:
M cmake/CMakeLists.txt  | 15 +++++++++++----
A cmake/SleefConfig.cmake  | 35 +++++++++++++++++++++++++++++++++++
M src/aosq.h  | 1 +
A src/math.h  | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D src/sse/ssef.c  | 150 -------------------------------------------------------------------------------
M src/sse/ssef.h  | 48 ------------------------------------------------
A src/test_math.c  | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/test_v4f.c  | 49 +------------------------------------------------

8 files changed, 345 insertions(+), 250 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -13,7 +13,7 @@
 # You should have received a copy of the GNU General Public License
 # along with the RSIMD CMake. If not, see <http://www.gnu.org/licenses/>.
 
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 3.1)
 project(rsimd C)
 cmake_policy(SET CMP0011 NEW)
 enable_testing()
@@ -24,10 +24,15 @@ set(RSIMD_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../src)
 ################################################################################
 # Check dependencies
 ################################################################################
+get_filename_component(_current_source_dir ${CMAKE_CURRENT_LIST_FILE} PATH)
+set(Sleef_DIR ${_current_source_dir}/)
+
 find_package(RCMake REQUIRED)
 find_package(RSys 0.7 REQUIRED)
+find_package(PkgConfig REQUIRED)
+find_package(Sleef REQUIRED)
 
-include_directories(${RSys_INCLUDE_DIR})
+include_directories(${RSys_INCLUDE_DIR} ${Sleef_INCLUDE_DIR})
 set(CMAKE_MODULE_PATH ${RCMAKE_SOURCE_DIR})
 include(rcmake)
 
@@ -52,6 +57,7 @@ set(RSIMD_FILES_INC_LEGACY
   aosf33.h
   aosf44.h
   aosq.h
+  math.h
   rsimd.h
   soaXfY.h
   soaXfY_begin.h
@@ -75,8 +81,7 @@ set(RSIMD_FILES_INC_AVX
   avx/avxi.h)
 set(RSIMD_FILES_SRC
   aosf44.c
-  aosq.c
-  sse/ssef.c)
+  aosq.c)
 set(RSIMD_FILES_DOC COPYING COPYING.LESSER README.md)
 rcmake_prepend_path(RSIMD_FILES_INC_LEGACY ${RSIMD_SOURCE_DIR})
 rcmake_prepend_path(RSIMD_FILES_INC_SSE ${RSIMD_SOURCE_DIR})
@@ -89,6 +94,7 @@ set(RSIMD_FILES_INC
   ${RSIMD_FILES_INC_AVX})
 
 add_library(rsimd SHARED ${RSIMD_FILES_INC} ${RSIMD_FILES_SRC})
+target_link_libraries(rsimd Sleef)
 set_target_properties(rsimd PROPERTIES  DEFINE_SYMBOL RSIMD_SHARED_BUILD)
 
 set_target_properties(rsimd PROPERTIES
@@ -119,6 +125,7 @@ if(NOT NO_TEST)
   new_test(test_aosf33)
   new_test(test_aosf44)
   new_test(test_aosq)
+  new_test(test_math)
   new_test(test_soa4f2)
   new_test(test_soa4f3)
   new_test(test_soa4f4)
diff --git a/cmake/SleefConfig.cmake b/cmake/SleefConfig.cmake
@@ -0,0 +1,35 @@
+# Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr)
+#
+# The RSIMD CMake is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# The RSIMD CMake is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the RSIMD CMake. If not, see <http://www.gnu.org/licenses/>.
+
+cmake_minimum_required(VERSION 3.1)
+
+# Look for library header
+find_path(Sleef_INCLUDE_DIR sleef.h)
+
+find_library(Sleef_LIBRARY sleef PATH_SUFFIXES lib64
+  DOC "Path the the sleef library")
+
+# Create the imported library target
+add_library(Sleef SHARED IMPORTED)
+set_target_properties(Sleef PROPERTIES
+  IMPORTED_LOCATION ${Sleef_LIBRARY}
+  INTERFACE_INCLUDE_DIRECTORIES ${Sleef_INCLUDE_DIR})
+
+# Check the package
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(Sleef DEFAULT_MSG
+  Sleef_INCLUDE_DIR
+  Sleef_LIBRARY)
+
diff --git a/src/aosq.h b/src/aosq.h
@@ -17,6 +17,7 @@
 #define AOSQ_H
 
 #include "rsimd.h"
+#include "math.h"
 
 /*
  * Functions on AoS quaternion encoded into a v4f_T as { i, j, k, a }
diff --git a/src/math.h b/src/math.h
@@ -0,0 +1,159 @@
+/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_MATH_H
+#define RSIMD_MATH_H
+
+#include "rsimd.h"
+
+#ifdef COMPILER_GCC
+  #pragma GCC diagnostic push
+  #pragma GCC diagnostic ignored "-Wignored-qualifiers"
+#endif
+
+#include <sleef.h>
+
+#ifdef COMPILER_GCC
+  #pragma GCC diagnostic pop
+#endif
+
+static FINLINE v4f_T
+v4f_copysign(const v4f_T x, const v4f_T y)
+{
+  return Sleef_copysignf4(x, y);
+}
+
+static INLINE v4f_T
+v4f_floor(const v4f_T x)
+{
+  return Sleef_floorf4(x);
+}
+
+static INLINE v4f_T
+v4f_pow(const v4f_T x, const v4f_T y)
+{
+  return Sleef_powf4_u10(x, y);
+}
+
+/*******************************************************************************
+ * Exponentatial functions
+ ******************************************************************************/
+static INLINE v4f_T
+v4f_exp2(const v4f_T x)
+{
+  return Sleef_exp2f4_u10(x);
+}
+
+static INLINE v4f_T
+v4f_exp(const v4f_T x)
+{
+  return Sleef_expf4_u10(x);
+}
+
+static INLINE v4f_T
+v4f_exp10(const v4f_T x)
+{
+  return Sleef_exp10f4_u10(x);
+}
+
+/*******************************************************************************
+ * Log functions
+ ******************************************************************************/
+static INLINE v4f_T
+v4f_log2(const v4f_T x)
+{
+  return Sleef_log2f4_u10(x);
+}
+
+static INLINE v4f_T
+v4f_log(const v4f_T x)
+{
+  return Sleef_logf4_u10(x);
+}
+
+static INLINE v4f_T
+v4f_log10(const v4f_T x)
+{
+  return Sleef_log10f4_u10(x);
+}
+
+/*******************************************************************************
+ * Trigonometric functions
+ ******************************************************************************/
+static INLINE v4f_T
+v4f_sin(const v4f_T v)
+{
+  return Sleef_sinf4_u10(v);
+}
+
+static INLINE v4f_T
+v4f_asin(const v4f_T v)
+{
+  return Sleef_asinf4_u10(v);
+}
+
+static INLINE v4f_T
+v4f_cos(const v4f_T v)
+{
+  return Sleef_cosf4_u10(v);
+}
+
+static INLINE v4f_T
+v4f_acos(const v4f_T v)
+{
+  return Sleef_acosf4_u10(v);
+}
+
+static INLINE void
+v4f_sincos(const v4f_T v, v4f_T* RESTRICT s, v4f_T* RESTRICT c)
+{
+  const Sleef___m128_2 r = Sleef_sincosf4_u10(v);
+  *s = r.x;
+  *c = r.y;
+}
+
+static INLINE v4f_T
+v4f_tan(const v4f_T v)
+{
+  return Sleef_tanf4_u10(v);
+}
+
+static INLINE v4f_T
+v4f_atan(const v4f_T v)
+{
+  return Sleef_atanf4_u10(v);
+}
+
+/*******************************************************************************
+ * Miscellaneous
+ ******************************************************************************/
+static FINLINE v4f_T /* Cartesian (xyz) to spherical (r, theta, phi)*/
+v4f_xyz_to_rthetaphi(const v4f_T v)
+{
+  const v4f_T zero = v4f_zero();
+  const v4f_T len2 = v4f_len2(v);
+  const v4f_T len3 = v4f_len3(v);
+  const v4f_T theta = v4f_sel
+    (v4f_acos(v4f_div(v4f_zzzz(v), len3)), zero, v4f_eq(len3, zero));
+  const v4f_T tmp_phi = v4f_sel
+    (v4f_asin(v4f_div(v4f_yyyy(v), len2)), zero, v4f_eq(len2, zero));
+  const v4f_T phi = v4f_sel
+    (v4f_sub(v4f_set1((float)PI), tmp_phi), tmp_phi, v4f_ge(v4f_xxxx(v), zero));
+
+  return v4f_xyab(v4f_xayb(len3, theta), phi);
+}
+
+
+#endif /* RSIMD_MATH_H */
diff --git a/src/sse/ssef.c b/src/sse/ssef.c
@@ -1,150 +0,0 @@
-/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr)
- *
- * The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * The RSIMD library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-
-#include "../rsimd.h"
-
-#define KC0 v4f_set1(0.63661977236f)
-#define KC1 v4f_set1(1.57079625129f)
-#define KC2 v4f_set1(7.54978995489e-8f)
-#define CC0 v4f_set1(-0.0013602249f)
-#define CC1 v4f_set1(0.0416566950f)
-#define CC2 v4f_set1(-0.4999990225f)
-#define SC0 v4f_set1(-0.0001950727f)
-#define SC1 v4f_set1(0.0083320758f)
-#define SC2 v4f_set1(-0.1666665247f)
-#define ONE v4f_set1(1.f)
-
-v4f_T
-v4f_sin(const v4f_T v)
-{
-  const v4i_T zeroi = v4i_zero();
-  const v4i_T onei = v4i_set1(1);
-  const v4i_T twoi = v4i_set1(2);
-  const v4i_T threei = v4i_set1(3);
-
-  const v4f_T x = v4f_mul(v, KC0);
-  const v4i_T q = v4f_to_v4i(x);
-  const v4i_T off = v4i_and(q, threei);
-  const v4f_T qf = v4i_to_v4f(q);
-
-  const v4f_T tmp = v4f_sub(v, v4f_mul(qf, KC1));
-  const v4f_T xl = v4f_sub(tmp, v4f_mul(qf, KC2));
-  const v4f_T xl2 = v4f_mul(xl, xl);
-  const v4f_T xl3 = v4f_mul(xl2, xl);
-
-  const v4f_T cx =
-    v4f_madd(v4f_madd(v4f_madd(CC0, xl2, CC1), xl2, CC2), xl2, ONE);
-  const v4f_T sx =
-    v4f_madd(v4f_madd(v4f_madd(SC0, xl2, SC1), xl2, SC2), xl3, xl);
-
-  const v4f_T mask0 = (v4f_T) v4i_eq(v4i_and(off, onei), zeroi);
-  const v4f_T mask1 = (v4f_T) v4i_eq(v4i_and(off, twoi), zeroi);
-  const v4f_T res = v4f_sel(cx, sx, mask0);
-  return v4f_sel(v4f_minus(res), res, mask1);
-}
-
-v4f_T
-v4f_cos(const v4f_T v)
-{
-  const v4i_T zeroi = v4i_zero();
-  const v4i_T onei = v4i_set1(1);
-  const v4i_T twoi = v4i_set1(2);
-  const v4i_T threei = v4i_set1(3);
-
-  const v4f_T x = v4f_mul(v, KC0);
-  const v4i_T q = v4f_to_v4i(x);
-  const v4i_T off = v4i_add(v4i_and(q, threei), onei);
-  const v4f_T qf = v4i_to_v4f(q);
-
-  const v4f_T tmp = v4f_sub(v, v4f_mul(qf, KC1));
-  const v4f_T xl = v4f_sub(tmp, v4f_mul(qf, KC2));
-  const v4f_T xl2 = v4f_mul(xl, xl);
-  const v4f_T xl3 = v4f_mul(xl2, xl);
-
-  const v4f_T cx =
-    v4f_madd(v4f_madd(v4f_madd(CC0, xl2, CC1), xl2, CC2), xl2, ONE);
-  const v4f_T sx =
-    v4f_madd(v4f_madd(v4f_madd(SC0, xl2, SC1), xl2, SC2), xl3, xl);
-
-  const v4f_T mask0 = (v4f_T) v4i_eq(v4i_and(off, onei), zeroi);
-  const v4f_T mask1 = (v4f_T) v4i_eq(v4i_and(off, twoi), zeroi);
-  const v4f_T res = v4f_sel(cx, sx, mask0);
-  return v4f_sel(v4f_minus(res), res, mask1);
-}
-
-void
-v4f_sincos(const v4f_T v, v4f_T* RESTRICT s, v4f_T* RESTRICT c)
-{
-  const v4i_T zeroi = v4i_zero();
-  const v4i_T onei = v4i_set1(1);
-  const v4i_T twoi = v4i_set1(2);
-  const v4i_T threei = v4i_set1(3);
-
-  const v4f_T x = v4f_mul(v, KC0);
-  const v4i_T q = v4f_to_v4i(x);
-  const v4i_T soff = v4i_and(q, threei);
-  const v4i_T coff = v4i_add(v4i_and(q, threei), onei);
-  const v4f_T qf = v4i_to_v4f(q);
-
-  const v4f_T tmp = v4f_sub(v, v4f_mul(qf, KC1));
-  const v4f_T xl = v4f_sub(tmp, v4f_mul(qf, KC2));
-  const v4f_T xl2 = v4f_mul(xl, xl);
-  const v4f_T xl3 = v4f_mul(xl2, xl);
-
-  const v4f_T cx =
-    v4f_madd(v4f_madd(v4f_madd(CC0, xl2, CC1), xl2, CC2), xl2, ONE);
-  const v4f_T sx =
-    v4f_madd(v4f_madd(v4f_madd(SC0, xl2, SC1), xl2, SC2), xl3, xl);
-
-  const v4f_T smask0 = (v4f_T) v4i_eq(v4i_and(soff, onei), zeroi);
-  const v4f_T smask1 = (v4f_T) v4i_eq(v4i_and(soff, twoi), zeroi);
-  const v4f_T sres = v4f_sel(cx, sx, smask0);
-
-  const v4f_T cmask0 = (v4f_T) v4i_eq(v4i_and(coff, onei), zeroi);
-  const v4f_T cmask1 = (v4f_T) v4i_eq(v4i_and(coff, twoi), zeroi);
-  const v4f_T cres = v4f_sel(cx, sx, cmask0);
-
-  *s = v4f_sel(v4f_minus(sres), sres, smask1);
-  *c = v4f_sel(v4f_minus(cres), cres, cmask1);
-}
-
-v4f_T
-v4f_acos(const v4f_T v)
-{
-  const v4f_T absv = v4f_abs(v);
-  const v4f_T t0 = v4f_sqrt(v4f_sub(v4f_set1(1.f), absv));
-  const v4f_T absv2 =v4f_mul(absv, absv);
-  const v4f_T absv4 = v4f_mul(absv2, absv2);
-
-  const v4f_T h0 = v4f_set1(-0.0012624911f);
-  const v4f_T h1 = v4f_set1(0.0066700901f);
-  const v4f_T h2 = v4f_set1(-0.0170881256f);
-  const v4f_T h3 = v4f_set1(0.0308918810f);
-  const v4f_T hi =
-    v4f_madd(v4f_madd(v4f_madd(h0, absv, h1), absv, h2), absv, h3);
-
-  const v4f_T l0 = v4f_set1(-0.0501743046f);
-  const v4f_T l1 = v4f_set1(0.0889789874f);
-  const v4f_T l2 = v4f_set1(-0.2145988016f);
-  const v4f_T l3 = v4f_set1((float)(PI*0.5));
-  const v4f_T lo =
-    v4f_madd(v4f_madd(v4f_madd(l0, absv, l1), absv, l2), absv, l3);
-
-  const v4f_T res = v4f_mul(v4f_madd(hi, absv4, lo), t0);
-  const v4f_T mask = v4f_lt(v, v4f_zero());
-
-  return v4f_sel(res, v4f_set1((float)PI) - res, mask);
-}
-
diff --git a/src/sse/ssef.h b/src/sse/ssef.h
@@ -473,35 +473,6 @@ v4f_normalize3(const v4f_T v)
 }
 
 /*******************************************************************************
- * Trigonometric operations
- ******************************************************************************/
-RSIMD_API v4f_T v4f_sin(const v4f_T v);
-RSIMD_API v4f_T v4f_cos(const v4f_T v);
-RSIMD_API v4f_T v4f_acos(const v4f_T v);
-RSIMD_API void v4f_sincos(const v4f_T v, v4f_T* RESTRICT s, v4f_T* RESTRICT c);
-
-static FINLINE v4f_T
-v4f_tan(const v4f_T v)
-{
-  v4f_T s, c;
-  v4f_sincos(v, &s, &c);
-  return v4f_div(s, c);
-}
-
-static FINLINE v4f_T
-v4f_asin(const v4f_T v)
-{
-  return v4f_sub(v4f_set1((float)(PI*0.5)), v4f_acos(v));
-}
-
-static FINLINE v4f_T
-v4f_atan(v4f_T v)
-{
-  const v4f_T tmp = v4f_rsqrt(v4f_madd(v, v, v4f_set1(1.f)));
-  return v4f_asin(v4f_mul(v, tmp));
-}
-
-/*******************************************************************************
  * Comparators
  ******************************************************************************/
 static FINLINE v4f_T
@@ -578,24 +549,5 @@ v4f_clamp(const v4f_T v, const v4f_T vmin, const v4f_T vmax)
   return v4f_min(v4f_max(v, vmin), vmax);
 }
 
-/*******************************************************************************
- * Miscellaneous
- ******************************************************************************/
-static FINLINE v4f_T /* Cartesian (xyz) to spherical (r, theta, phi)*/
-v4f_xyz_to_rthetaphi(const v4f_T v)
-{
-  const v4f_T zero = v4f_zero();
-  const v4f_T len2 = v4f_len2(v);
-  const v4f_T len3 = v4f_len3(v);
-  const v4f_T theta = v4f_sel
-    (v4f_acos(v4f_div(v4f_zzzz(v), len3)), zero, v4f_eq(len3, zero));
-  const v4f_T tmp_phi = v4f_sel
-    (v4f_asin(v4f_div(v4f_yyyy(v), len2)), zero, v4f_eq(len2, zero));
-  const v4f_T phi = v4f_sel
-    (v4f_sub(v4f_set1((float)PI), tmp_phi), tmp_phi, v4f_ge(v4f_xxxx(v), zero));
-
-  return v4f_xyab(v4f_xayb(len3, theta), phi);
-}
-
 #endif /* RSIMD_SSEF_H */
 
diff --git a/src/test_math.c b/src/test_math.c
@@ -0,0 +1,138 @@
+/* Copyright (C) 2013-2019 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#define _POSIX_C_SOURCE 200112L
+
+#include "rsimd.h"
+#include "math.h"
+
+#include <math.h>
+
+#define LOG2E 1.4426950408889634074 /* log_2 e */
+#define LN10 2.30258509299404568402 /* log_e 10 */
+
+#define CHKV4_EPS(V, Ref, Eps) {                                               \
+  CHK(eq_eps(v4f_x(V), Ref[0], fabsf(Ref[0]) * Eps));                          \
+  CHK(eq_eps(v4f_y(V), Ref[1], fabsf(Ref[1]) * Eps));                          \
+  CHK(eq_eps(v4f_z(V), Ref[2], fabsf(Ref[2]) * Eps));                          \
+  CHK(eq_eps(v4f_w(V), Ref[3], fabsf(Ref[3]) * Eps));                          \
+} (void)0
+
+#define CHKV4_FUNC_EPS(V, Func, Eps) {                                         \
+  const v4f_T r__ = v4f_##Func(V);                                             \
+  float ref__[4];                                                              \
+  ref__[0] = (float)Func(v4f_x(V));                                            \
+  ref__[1] = (float)Func(v4f_y(V));                                            \
+  ref__[2] = (float)Func(v4f_z(V));                                            \
+  ref__[3] = (float)Func(v4f_w(V));                                            \
+  CHKV4_EPS(r__, ref__, Eps);                                                  \
+} (void)0
+
+static void
+test_trigo(void)
+{
+  v4f_T i, j, k;
+  float ref[4];
+
+  i = v4f_set((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
+
+  CHKV4_FUNC_EPS(i, cos, 1.e-6);
+  CHKV4_FUNC_EPS(i, sin, 1.e-6);
+
+  v4f_sincos(i, &k, &j);
+  ref[0] = (float)sin(v4f_x(i));
+  ref[1] = (float)sin(v4f_y(i));
+  ref[2] = (float)sin(v4f_z(i));
+  ref[3] = (float)sin(v4f_w(i));
+  CHKV4_EPS(k, ref, 1.e-6f);
+  ref[0] = (float)cos(v4f_x(i));
+  ref[1] = (float)cos(v4f_y(i));
+  ref[2] = (float)cos(v4f_z(i));
+  ref[3] = (float)cos(v4f_w(i));
+  CHKV4_EPS(j, ref, 1.e-6f);
+
+  i = v4f_set((float)PI/8.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
+  CHKV4_FUNC_EPS(i, tan, 1.e-6);
+  CHKV4_FUNC_EPS(v4f_cos(i), acos, 1.e-6);
+  CHKV4_FUNC_EPS(v4f_sin(i), asin, 1.e-6);
+  CHKV4_FUNC_EPS(v4f_tan(i), atan, 1.e-6);
+}
+
+static void
+test_exp(void)
+{
+  const v4f_T i = v4f_set(1.f, -1.234f, 0.f, 3.14156f);
+  v4f_T j;
+  float ref[4];
+
+  CHKV4_FUNC_EPS(i, exp, 1.e-6);
+  CHKV4_FUNC_EPS(i, exp2, 1.e-6);
+
+  j = v4f_exp10(i);
+  ref[0] = (float)exp2(LOG2E * LN10 * v4f_x(i));
+  ref[1] = (float)exp2(LOG2E * LN10 * v4f_y(i));
+  ref[2] = (float)exp2(LOG2E * LN10 * v4f_z(i));
+  ref[3] = (float)exp2(LOG2E * LN10 * v4f_w(i));
+  CHKV4_EPS(j, ref, 1.e-6f);
+}
+
+static void
+test_log(void)
+{
+  const v4f_T i = v4f_set(4.675f, 3.14f, 9.99999f, 1.234e-13f);
+
+  CHKV4_FUNC_EPS(i, log, 1.e-6);
+  CHKV4_FUNC_EPS(i, log2, 1.e-6);
+  CHKV4_FUNC_EPS(i, log10, 1.e-6);
+}
+
+static void
+test_misc(void)
+{
+  v4f_T i, j, k;
+  float ref[4];
+
+  i = v4f_set(-1.2345f, 9.3e-7f, 3.879e9f, -10.56f);
+  j = v4f_set(7.89e-9f, 0.12f, -4.9e10f, 3.14f);
+  k = v4f_copysign(i, j);
+  ref[0] = (float)copysign(v4f_x(i), v4f_x(j));
+  ref[1] = (float)copysign(v4f_y(i), v4f_y(j));
+  ref[2] = (float)copysign(v4f_z(i), v4f_z(j));
+  ref[3] = (float)copysign(v4f_w(i), v4f_w(j));
+  CHKV4_EPS(k, ref, 1.e-6f);
+
+  CHKV4_FUNC_EPS(i, floor, 1.e-6);
+
+  k = v4f_pow(v4f_abs(i), j);
+  ref[0] = (float)pow(fabsf(v4f_x(i)), v4f_x(j));
+  ref[1] = (float)pow(fabsf(v4f_y(i)), v4f_y(j));
+  ref[2] = (float)pow(fabsf(v4f_z(i)), v4f_z(j));
+  ref[3] = (float)pow(fabsf(v4f_w(i)), v4f_w(j));
+  CHKV4_EPS(k, ref, 1.e-6f);
+}
+
+int
+main(int argc, char** argv)
+{
+  (void)argc, (void)argv;
+
+  test_trigo();
+  test_exp();
+  test_log();
+  test_misc();
+
+  return 0;
+}
+
diff --git a/src/test_v4f.c b/src/test_v4f.c
@@ -14,6 +14,7 @@
  * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
 
 #include "rsimd.h"
+#include "math.h"
 
 int
 main(int argc, char** argv)
@@ -378,54 +379,6 @@ main(int argc, char** argv)
   CHK(v4f_y(k) == 7.5f);
   CHK(v4f_z(k) == 7.f);
 
-  i = v4f_set((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
-  k = v4f_cos(i);
-  CHK(eq_eps(v4f_x(k), (float)cos(PI/2.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_y(k), (float)cos(PI/3.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_z(k), (float)cos(PI/4.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_w(k), (float)cos(PI/6.0), 1.e-6f) == 1);
-
-  k = v4f_sin(i);
-  CHK(eq_eps(v4f_x(k), (float)sin(PI/2.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_y(k), (float)sin(PI/3.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_z(k), (float)sin(PI/4.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_w(k), (float)sin(PI/6.0), 1.e-6f) == 1);
-
-  v4f_sincos(i, &k, &j);
-  CHK(eq_eps(v4f_x(k), (float)sin(PI/2.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_y(k), (float)sin(PI/3.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_z(k), (float)sin(PI/4.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_w(k), (float)sin(PI/6.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_x(j), (float)cos(PI/2.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_y(j), (float)cos(PI/3.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_z(j), (float)cos(PI/4.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_w(j), (float)cos(PI/6.0), 1.e-6f) == 1);
-
-  i = v4f_set((float)PI/8.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
-  k = v4f_tan(i);
-  CHK(eq_eps(v4f_x(k), (float)tan(PI/8.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_y(k), (float)tan(PI/3.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_z(k), (float)tan(PI/4.0), 1.e-6f) == 1);
-  CHK(eq_eps(v4f_w(k), (float)tan(PI/6.0), 1.e-6f) == 1);
-
-  k = v4f_acos(v4f_cos(i));
-  CHK(eq_eps(v4f_x(k), PI/8.f, 1.e-6f) == 1);
-  CHK(eq_eps(v4f_y(k), PI/3.f, 1.e-6f) == 1);
-  CHK(eq_eps(v4f_z(k), PI/4.f, 1.e-6f) == 1);
-  CHK(eq_eps(v4f_w(k), PI/6.f, 1.e-6f) == 1);
-
-  k = v4f_asin(v4f_sin(i));
-  CHK(eq_eps(v4f_x(k), PI/8.f, 1.e-6f) == 1);
-  CHK(eq_eps(v4f_y(k), PI/3.f, 1.e-6f) == 1);
-  CHK(eq_eps(v4f_z(k), PI/4.f, 1.e-6f) == 1);
-  CHK(eq_eps(v4f_w(k), PI/6.f, 1.e-6f) == 1);
-
-  k = v4f_atan(v4f_tan(i));
-  CHK(eq_eps(v4f_x(k), PI/8.f, 1.e-6f) == 1);
-  CHK(eq_eps(v4f_y(k), PI/3.f, 1.e-6f) == 1);
-  CHK(eq_eps(v4f_z(k), PI/4.f, 1.e-6f) == 1);
-  CHK(eq_eps(v4f_w(k), PI/6.f, 1.e-6f) == 1);
-
   i = v4f_set(1.f, 2.f, 3.f, 4.f);
   j = v4f_set(-2.f, -4.f, 3.f, 6.f);
   k = v4f_eq(i, j);

	rsimd Make SIMD instruction sets easier to use
	git clone git://git.meso-star.fr/rsimd.git
	Log \| Files \| Refs \| README \| LICENSE

M	cmake/CMakeLists.txt	\|	15	+++++++++++----
A	cmake/SleefConfig.cmake	\|	35	+++++++++++++++++++++++++++++++++++
M	src/aosq.h	\|	1	+
A	src/math.h	\|	159	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	src/sse/ssef.c	\|	150	-------------------------------------------------------------------------------
M	src/sse/ssef.h	\|	48	------------------------------------------------
A	src/test_math.c	\|	138	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/test_v4f.c	\|	49	+------------------------------------------------