commit e93ccab448df745675af5dcba0f3750ff34a5e71
parent 9ede110fda406d3138d015206253f20c9b0c90db
Author: vaplv <vaplv@free.fr>
Date: Fri, 29 Jan 2021 17:00:29 +0100
Rely on the Sleef library for math functions
Add several math functions
Diffstat:
8 files changed, 345 insertions(+), 250 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -13,7 +13,7 @@
# You should have received a copy of the GNU General Public License
# along with the RSIMD CMake. If not, see <http://www.gnu.org/licenses/>.
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 3.1)
project(rsimd C)
cmake_policy(SET CMP0011 NEW)
enable_testing()
@@ -24,10 +24,15 @@ set(RSIMD_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../src)
################################################################################
# Check dependencies
################################################################################
+get_filename_component(_current_source_dir ${CMAKE_CURRENT_LIST_FILE} PATH)
+set(Sleef_DIR ${_current_source_dir}/)
+
find_package(RCMake REQUIRED)
find_package(RSys 0.7 REQUIRED)
+find_package(PkgConfig REQUIRED)
+find_package(Sleef REQUIRED)
-include_directories(${RSys_INCLUDE_DIR})
+include_directories(${RSys_INCLUDE_DIR} ${Sleef_INCLUDE_DIR})
set(CMAKE_MODULE_PATH ${RCMAKE_SOURCE_DIR})
include(rcmake)
@@ -52,6 +57,7 @@ set(RSIMD_FILES_INC_LEGACY
aosf33.h
aosf44.h
aosq.h
+ math.h
rsimd.h
soaXfY.h
soaXfY_begin.h
@@ -75,8 +81,7 @@ set(RSIMD_FILES_INC_AVX
avx/avxi.h)
set(RSIMD_FILES_SRC
aosf44.c
- aosq.c
- sse/ssef.c)
+ aosq.c)
set(RSIMD_FILES_DOC COPYING COPYING.LESSER README.md)
rcmake_prepend_path(RSIMD_FILES_INC_LEGACY ${RSIMD_SOURCE_DIR})
rcmake_prepend_path(RSIMD_FILES_INC_SSE ${RSIMD_SOURCE_DIR})
@@ -89,6 +94,7 @@ set(RSIMD_FILES_INC
${RSIMD_FILES_INC_AVX})
add_library(rsimd SHARED ${RSIMD_FILES_INC} ${RSIMD_FILES_SRC})
+target_link_libraries(rsimd Sleef)
set_target_properties(rsimd PROPERTIES DEFINE_SYMBOL RSIMD_SHARED_BUILD)
set_target_properties(rsimd PROPERTIES
@@ -119,6 +125,7 @@ if(NOT NO_TEST)
new_test(test_aosf33)
new_test(test_aosf44)
new_test(test_aosq)
+ new_test(test_math)
new_test(test_soa4f2)
new_test(test_soa4f3)
new_test(test_soa4f4)
diff --git a/cmake/SleefConfig.cmake b/cmake/SleefConfig.cmake
@@ -0,0 +1,35 @@
+# Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr)
+#
+# The RSIMD CMake is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# The RSIMD CMake is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the RSIMD CMake. If not, see <http://www.gnu.org/licenses/>.
+
+cmake_minimum_required(VERSION 3.1)
+
+# Look for library header
+find_path(Sleef_INCLUDE_DIR sleef.h)
+
+find_library(Sleef_LIBRARY sleef PATH_SUFFIXES lib64
+ DOC "Path the the sleef library")
+
+# Create the imported library target
+add_library(Sleef SHARED IMPORTED)
+set_target_properties(Sleef PROPERTIES
+ IMPORTED_LOCATION ${Sleef_LIBRARY}
+ INTERFACE_INCLUDE_DIRECTORIES ${Sleef_INCLUDE_DIR})
+
+# Check the package
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(Sleef DEFAULT_MSG
+ Sleef_INCLUDE_DIR
+ Sleef_LIBRARY)
+
diff --git a/src/aosq.h b/src/aosq.h
@@ -17,6 +17,7 @@
#define AOSQ_H
#include "rsimd.h"
+#include "math.h"
/*
* Functions on AoS quaternion encoded into a v4f_T as { i, j, k, a }
diff --git a/src/math.h b/src/math.h
@@ -0,0 +1,159 @@
+/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_MATH_H
+#define RSIMD_MATH_H
+
+#include "rsimd.h"
+
+#ifdef COMPILER_GCC
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wignored-qualifiers"
+#endif
+
+#include <sleef.h>
+
+#ifdef COMPILER_GCC
+ #pragma GCC diagnostic pop
+#endif
+
+static FINLINE v4f_T
+v4f_copysign(const v4f_T x, const v4f_T y)
+{
+ return Sleef_copysignf4(x, y);
+}
+
+static INLINE v4f_T
+v4f_floor(const v4f_T x)
+{
+ return Sleef_floorf4(x);
+}
+
+static INLINE v4f_T
+v4f_pow(const v4f_T x, const v4f_T y)
+{
+ return Sleef_powf4_u10(x, y);
+}
+
+/*******************************************************************************
+ * Exponentatial functions
+ ******************************************************************************/
+static INLINE v4f_T
+v4f_exp2(const v4f_T x)
+{
+ return Sleef_exp2f4_u10(x);
+}
+
+static INLINE v4f_T
+v4f_exp(const v4f_T x)
+{
+ return Sleef_expf4_u10(x);
+}
+
+static INLINE v4f_T
+v4f_exp10(const v4f_T x)
+{
+ return Sleef_exp10f4_u10(x);
+}
+
+/*******************************************************************************
+ * Log functions
+ ******************************************************************************/
+static INLINE v4f_T
+v4f_log2(const v4f_T x)
+{
+ return Sleef_log2f4_u10(x);
+}
+
+static INLINE v4f_T
+v4f_log(const v4f_T x)
+{
+ return Sleef_logf4_u10(x);
+}
+
+static INLINE v4f_T
+v4f_log10(const v4f_T x)
+{
+ return Sleef_log10f4_u10(x);
+}
+
+/*******************************************************************************
+ * Trigonometric functions
+ ******************************************************************************/
+static INLINE v4f_T
+v4f_sin(const v4f_T v)
+{
+ return Sleef_sinf4_u10(v);
+}
+
+static INLINE v4f_T
+v4f_asin(const v4f_T v)
+{
+ return Sleef_asinf4_u10(v);
+}
+
+static INLINE v4f_T
+v4f_cos(const v4f_T v)
+{
+ return Sleef_cosf4_u10(v);
+}
+
+static INLINE v4f_T
+v4f_acos(const v4f_T v)
+{
+ return Sleef_acosf4_u10(v);
+}
+
+static INLINE void
+v4f_sincos(const v4f_T v, v4f_T* RESTRICT s, v4f_T* RESTRICT c)
+{
+ const Sleef___m128_2 r = Sleef_sincosf4_u10(v);
+ *s = r.x;
+ *c = r.y;
+}
+
+static INLINE v4f_T
+v4f_tan(const v4f_T v)
+{
+ return Sleef_tanf4_u10(v);
+}
+
+static INLINE v4f_T
+v4f_atan(const v4f_T v)
+{
+ return Sleef_atanf4_u10(v);
+}
+
+/*******************************************************************************
+ * Miscellaneous
+ ******************************************************************************/
+static FINLINE v4f_T /* Cartesian (xyz) to spherical (r, theta, phi)*/
+v4f_xyz_to_rthetaphi(const v4f_T v)
+{
+ const v4f_T zero = v4f_zero();
+ const v4f_T len2 = v4f_len2(v);
+ const v4f_T len3 = v4f_len3(v);
+ const v4f_T theta = v4f_sel
+ (v4f_acos(v4f_div(v4f_zzzz(v), len3)), zero, v4f_eq(len3, zero));
+ const v4f_T tmp_phi = v4f_sel
+ (v4f_asin(v4f_div(v4f_yyyy(v), len2)), zero, v4f_eq(len2, zero));
+ const v4f_T phi = v4f_sel
+ (v4f_sub(v4f_set1((float)PI), tmp_phi), tmp_phi, v4f_ge(v4f_xxxx(v), zero));
+
+ return v4f_xyab(v4f_xayb(len3, theta), phi);
+}
+
+
+#endif /* RSIMD_MATH_H */
diff --git a/src/sse/ssef.c b/src/sse/ssef.c
@@ -1,150 +0,0 @@
-/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr)
- *
- * The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * The RSIMD library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-
-#include "../rsimd.h"
-
-#define KC0 v4f_set1(0.63661977236f)
-#define KC1 v4f_set1(1.57079625129f)
-#define KC2 v4f_set1(7.54978995489e-8f)
-#define CC0 v4f_set1(-0.0013602249f)
-#define CC1 v4f_set1(0.0416566950f)
-#define CC2 v4f_set1(-0.4999990225f)
-#define SC0 v4f_set1(-0.0001950727f)
-#define SC1 v4f_set1(0.0083320758f)
-#define SC2 v4f_set1(-0.1666665247f)
-#define ONE v4f_set1(1.f)
-
-v4f_T
-v4f_sin(const v4f_T v)
-{
- const v4i_T zeroi = v4i_zero();
- const v4i_T onei = v4i_set1(1);
- const v4i_T twoi = v4i_set1(2);
- const v4i_T threei = v4i_set1(3);
-
- const v4f_T x = v4f_mul(v, KC0);
- const v4i_T q = v4f_to_v4i(x);
- const v4i_T off = v4i_and(q, threei);
- const v4f_T qf = v4i_to_v4f(q);
-
- const v4f_T tmp = v4f_sub(v, v4f_mul(qf, KC1));
- const v4f_T xl = v4f_sub(tmp, v4f_mul(qf, KC2));
- const v4f_T xl2 = v4f_mul(xl, xl);
- const v4f_T xl3 = v4f_mul(xl2, xl);
-
- const v4f_T cx =
- v4f_madd(v4f_madd(v4f_madd(CC0, xl2, CC1), xl2, CC2), xl2, ONE);
- const v4f_T sx =
- v4f_madd(v4f_madd(v4f_madd(SC0, xl2, SC1), xl2, SC2), xl3, xl);
-
- const v4f_T mask0 = (v4f_T) v4i_eq(v4i_and(off, onei), zeroi);
- const v4f_T mask1 = (v4f_T) v4i_eq(v4i_and(off, twoi), zeroi);
- const v4f_T res = v4f_sel(cx, sx, mask0);
- return v4f_sel(v4f_minus(res), res, mask1);
-}
-
-v4f_T
-v4f_cos(const v4f_T v)
-{
- const v4i_T zeroi = v4i_zero();
- const v4i_T onei = v4i_set1(1);
- const v4i_T twoi = v4i_set1(2);
- const v4i_T threei = v4i_set1(3);
-
- const v4f_T x = v4f_mul(v, KC0);
- const v4i_T q = v4f_to_v4i(x);
- const v4i_T off = v4i_add(v4i_and(q, threei), onei);
- const v4f_T qf = v4i_to_v4f(q);
-
- const v4f_T tmp = v4f_sub(v, v4f_mul(qf, KC1));
- const v4f_T xl = v4f_sub(tmp, v4f_mul(qf, KC2));
- const v4f_T xl2 = v4f_mul(xl, xl);
- const v4f_T xl3 = v4f_mul(xl2, xl);
-
- const v4f_T cx =
- v4f_madd(v4f_madd(v4f_madd(CC0, xl2, CC1), xl2, CC2), xl2, ONE);
- const v4f_T sx =
- v4f_madd(v4f_madd(v4f_madd(SC0, xl2, SC1), xl2, SC2), xl3, xl);
-
- const v4f_T mask0 = (v4f_T) v4i_eq(v4i_and(off, onei), zeroi);
- const v4f_T mask1 = (v4f_T) v4i_eq(v4i_and(off, twoi), zeroi);
- const v4f_T res = v4f_sel(cx, sx, mask0);
- return v4f_sel(v4f_minus(res), res, mask1);
-}
-
-void
-v4f_sincos(const v4f_T v, v4f_T* RESTRICT s, v4f_T* RESTRICT c)
-{
- const v4i_T zeroi = v4i_zero();
- const v4i_T onei = v4i_set1(1);
- const v4i_T twoi = v4i_set1(2);
- const v4i_T threei = v4i_set1(3);
-
- const v4f_T x = v4f_mul(v, KC0);
- const v4i_T q = v4f_to_v4i(x);
- const v4i_T soff = v4i_and(q, threei);
- const v4i_T coff = v4i_add(v4i_and(q, threei), onei);
- const v4f_T qf = v4i_to_v4f(q);
-
- const v4f_T tmp = v4f_sub(v, v4f_mul(qf, KC1));
- const v4f_T xl = v4f_sub(tmp, v4f_mul(qf, KC2));
- const v4f_T xl2 = v4f_mul(xl, xl);
- const v4f_T xl3 = v4f_mul(xl2, xl);
-
- const v4f_T cx =
- v4f_madd(v4f_madd(v4f_madd(CC0, xl2, CC1), xl2, CC2), xl2, ONE);
- const v4f_T sx =
- v4f_madd(v4f_madd(v4f_madd(SC0, xl2, SC1), xl2, SC2), xl3, xl);
-
- const v4f_T smask0 = (v4f_T) v4i_eq(v4i_and(soff, onei), zeroi);
- const v4f_T smask1 = (v4f_T) v4i_eq(v4i_and(soff, twoi), zeroi);
- const v4f_T sres = v4f_sel(cx, sx, smask0);
-
- const v4f_T cmask0 = (v4f_T) v4i_eq(v4i_and(coff, onei), zeroi);
- const v4f_T cmask1 = (v4f_T) v4i_eq(v4i_and(coff, twoi), zeroi);
- const v4f_T cres = v4f_sel(cx, sx, cmask0);
-
- *s = v4f_sel(v4f_minus(sres), sres, smask1);
- *c = v4f_sel(v4f_minus(cres), cres, cmask1);
-}
-
-v4f_T
-v4f_acos(const v4f_T v)
-{
- const v4f_T absv = v4f_abs(v);
- const v4f_T t0 = v4f_sqrt(v4f_sub(v4f_set1(1.f), absv));
- const v4f_T absv2 =v4f_mul(absv, absv);
- const v4f_T absv4 = v4f_mul(absv2, absv2);
-
- const v4f_T h0 = v4f_set1(-0.0012624911f);
- const v4f_T h1 = v4f_set1(0.0066700901f);
- const v4f_T h2 = v4f_set1(-0.0170881256f);
- const v4f_T h3 = v4f_set1(0.0308918810f);
- const v4f_T hi =
- v4f_madd(v4f_madd(v4f_madd(h0, absv, h1), absv, h2), absv, h3);
-
- const v4f_T l0 = v4f_set1(-0.0501743046f);
- const v4f_T l1 = v4f_set1(0.0889789874f);
- const v4f_T l2 = v4f_set1(-0.2145988016f);
- const v4f_T l3 = v4f_set1((float)(PI*0.5));
- const v4f_T lo =
- v4f_madd(v4f_madd(v4f_madd(l0, absv, l1), absv, l2), absv, l3);
-
- const v4f_T res = v4f_mul(v4f_madd(hi, absv4, lo), t0);
- const v4f_T mask = v4f_lt(v, v4f_zero());
-
- return v4f_sel(res, v4f_set1((float)PI) - res, mask);
-}
-
diff --git a/src/sse/ssef.h b/src/sse/ssef.h
@@ -473,35 +473,6 @@ v4f_normalize3(const v4f_T v)
}
/*******************************************************************************
- * Trigonometric operations
- ******************************************************************************/
-RSIMD_API v4f_T v4f_sin(const v4f_T v);
-RSIMD_API v4f_T v4f_cos(const v4f_T v);
-RSIMD_API v4f_T v4f_acos(const v4f_T v);
-RSIMD_API void v4f_sincos(const v4f_T v, v4f_T* RESTRICT s, v4f_T* RESTRICT c);
-
-static FINLINE v4f_T
-v4f_tan(const v4f_T v)
-{
- v4f_T s, c;
- v4f_sincos(v, &s, &c);
- return v4f_div(s, c);
-}
-
-static FINLINE v4f_T
-v4f_asin(const v4f_T v)
-{
- return v4f_sub(v4f_set1((float)(PI*0.5)), v4f_acos(v));
-}
-
-static FINLINE v4f_T
-v4f_atan(v4f_T v)
-{
- const v4f_T tmp = v4f_rsqrt(v4f_madd(v, v, v4f_set1(1.f)));
- return v4f_asin(v4f_mul(v, tmp));
-}
-
-/*******************************************************************************
* Comparators
******************************************************************************/
static FINLINE v4f_T
@@ -578,24 +549,5 @@ v4f_clamp(const v4f_T v, const v4f_T vmin, const v4f_T vmax)
return v4f_min(v4f_max(v, vmin), vmax);
}
-/*******************************************************************************
- * Miscellaneous
- ******************************************************************************/
-static FINLINE v4f_T /* Cartesian (xyz) to spherical (r, theta, phi)*/
-v4f_xyz_to_rthetaphi(const v4f_T v)
-{
- const v4f_T zero = v4f_zero();
- const v4f_T len2 = v4f_len2(v);
- const v4f_T len3 = v4f_len3(v);
- const v4f_T theta = v4f_sel
- (v4f_acos(v4f_div(v4f_zzzz(v), len3)), zero, v4f_eq(len3, zero));
- const v4f_T tmp_phi = v4f_sel
- (v4f_asin(v4f_div(v4f_yyyy(v), len2)), zero, v4f_eq(len2, zero));
- const v4f_T phi = v4f_sel
- (v4f_sub(v4f_set1((float)PI), tmp_phi), tmp_phi, v4f_ge(v4f_xxxx(v), zero));
-
- return v4f_xyab(v4f_xayb(len3, theta), phi);
-}
-
#endif /* RSIMD_SSEF_H */
diff --git a/src/test_math.c b/src/test_math.c
@@ -0,0 +1,138 @@
+/* Copyright (C) 2013-2019 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#define _POSIX_C_SOURCE 200112L
+
+#include "rsimd.h"
+#include "math.h"
+
+#include <math.h>
+
+#define LOG2E 1.4426950408889634074 /* log_2 e */
+#define LN10 2.30258509299404568402 /* log_e 10 */
+
+#define CHKV4_EPS(V, Ref, Eps) { \
+ CHK(eq_eps(v4f_x(V), Ref[0], fabsf(Ref[0]) * Eps)); \
+ CHK(eq_eps(v4f_y(V), Ref[1], fabsf(Ref[1]) * Eps)); \
+ CHK(eq_eps(v4f_z(V), Ref[2], fabsf(Ref[2]) * Eps)); \
+ CHK(eq_eps(v4f_w(V), Ref[3], fabsf(Ref[3]) * Eps)); \
+} (void)0
+
+#define CHKV4_FUNC_EPS(V, Func, Eps) { \
+ const v4f_T r__ = v4f_##Func(V); \
+ float ref__[4]; \
+ ref__[0] = (float)Func(v4f_x(V)); \
+ ref__[1] = (float)Func(v4f_y(V)); \
+ ref__[2] = (float)Func(v4f_z(V)); \
+ ref__[3] = (float)Func(v4f_w(V)); \
+ CHKV4_EPS(r__, ref__, Eps); \
+} (void)0
+
+static void
+test_trigo(void)
+{
+ v4f_T i, j, k;
+ float ref[4];
+
+ i = v4f_set((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
+
+ CHKV4_FUNC_EPS(i, cos, 1.e-6);
+ CHKV4_FUNC_EPS(i, sin, 1.e-6);
+
+ v4f_sincos(i, &k, &j);
+ ref[0] = (float)sin(v4f_x(i));
+ ref[1] = (float)sin(v4f_y(i));
+ ref[2] = (float)sin(v4f_z(i));
+ ref[3] = (float)sin(v4f_w(i));
+ CHKV4_EPS(k, ref, 1.e-6f);
+ ref[0] = (float)cos(v4f_x(i));
+ ref[1] = (float)cos(v4f_y(i));
+ ref[2] = (float)cos(v4f_z(i));
+ ref[3] = (float)cos(v4f_w(i));
+ CHKV4_EPS(j, ref, 1.e-6f);
+
+ i = v4f_set((float)PI/8.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
+ CHKV4_FUNC_EPS(i, tan, 1.e-6);
+ CHKV4_FUNC_EPS(v4f_cos(i), acos, 1.e-6);
+ CHKV4_FUNC_EPS(v4f_sin(i), asin, 1.e-6);
+ CHKV4_FUNC_EPS(v4f_tan(i), atan, 1.e-6);
+}
+
+static void
+test_exp(void)
+{
+ const v4f_T i = v4f_set(1.f, -1.234f, 0.f, 3.14156f);
+ v4f_T j;
+ float ref[4];
+
+ CHKV4_FUNC_EPS(i, exp, 1.e-6);
+ CHKV4_FUNC_EPS(i, exp2, 1.e-6);
+
+ j = v4f_exp10(i);
+ ref[0] = (float)exp2(LOG2E * LN10 * v4f_x(i));
+ ref[1] = (float)exp2(LOG2E * LN10 * v4f_y(i));
+ ref[2] = (float)exp2(LOG2E * LN10 * v4f_z(i));
+ ref[3] = (float)exp2(LOG2E * LN10 * v4f_w(i));
+ CHKV4_EPS(j, ref, 1.e-6f);
+}
+
+static void
+test_log(void)
+{
+ const v4f_T i = v4f_set(4.675f, 3.14f, 9.99999f, 1.234e-13f);
+
+ CHKV4_FUNC_EPS(i, log, 1.e-6);
+ CHKV4_FUNC_EPS(i, log2, 1.e-6);
+ CHKV4_FUNC_EPS(i, log10, 1.e-6);
+}
+
+static void
+test_misc(void)
+{
+ v4f_T i, j, k;
+ float ref[4];
+
+ i = v4f_set(-1.2345f, 9.3e-7f, 3.879e9f, -10.56f);
+ j = v4f_set(7.89e-9f, 0.12f, -4.9e10f, 3.14f);
+ k = v4f_copysign(i, j);
+ ref[0] = (float)copysign(v4f_x(i), v4f_x(j));
+ ref[1] = (float)copysign(v4f_y(i), v4f_y(j));
+ ref[2] = (float)copysign(v4f_z(i), v4f_z(j));
+ ref[3] = (float)copysign(v4f_w(i), v4f_w(j));
+ CHKV4_EPS(k, ref, 1.e-6f);
+
+ CHKV4_FUNC_EPS(i, floor, 1.e-6);
+
+ k = v4f_pow(v4f_abs(i), j);
+ ref[0] = (float)pow(fabsf(v4f_x(i)), v4f_x(j));
+ ref[1] = (float)pow(fabsf(v4f_y(i)), v4f_y(j));
+ ref[2] = (float)pow(fabsf(v4f_z(i)), v4f_z(j));
+ ref[3] = (float)pow(fabsf(v4f_w(i)), v4f_w(j));
+ CHKV4_EPS(k, ref, 1.e-6f);
+}
+
+int
+main(int argc, char** argv)
+{
+ (void)argc, (void)argv;
+
+ test_trigo();
+ test_exp();
+ test_log();
+ test_misc();
+
+ return 0;
+}
+
diff --git a/src/test_v4f.c b/src/test_v4f.c
@@ -14,6 +14,7 @@
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#include "rsimd.h"
+#include "math.h"
int
main(int argc, char** argv)
@@ -378,54 +379,6 @@ main(int argc, char** argv)
CHK(v4f_y(k) == 7.5f);
CHK(v4f_z(k) == 7.f);
- i = v4f_set((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
- k = v4f_cos(i);
- CHK(eq_eps(v4f_x(k), (float)cos(PI/2.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_y(k), (float)cos(PI/3.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_z(k), (float)cos(PI/4.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_w(k), (float)cos(PI/6.0), 1.e-6f) == 1);
-
- k = v4f_sin(i);
- CHK(eq_eps(v4f_x(k), (float)sin(PI/2.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_y(k), (float)sin(PI/3.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_z(k), (float)sin(PI/4.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_w(k), (float)sin(PI/6.0), 1.e-6f) == 1);
-
- v4f_sincos(i, &k, &j);
- CHK(eq_eps(v4f_x(k), (float)sin(PI/2.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_y(k), (float)sin(PI/3.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_z(k), (float)sin(PI/4.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_w(k), (float)sin(PI/6.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_x(j), (float)cos(PI/2.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_y(j), (float)cos(PI/3.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_z(j), (float)cos(PI/4.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_w(j), (float)cos(PI/6.0), 1.e-6f) == 1);
-
- i = v4f_set((float)PI/8.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
- k = v4f_tan(i);
- CHK(eq_eps(v4f_x(k), (float)tan(PI/8.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_y(k), (float)tan(PI/3.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_z(k), (float)tan(PI/4.0), 1.e-6f) == 1);
- CHK(eq_eps(v4f_w(k), (float)tan(PI/6.0), 1.e-6f) == 1);
-
- k = v4f_acos(v4f_cos(i));
- CHK(eq_eps(v4f_x(k), PI/8.f, 1.e-6f) == 1);
- CHK(eq_eps(v4f_y(k), PI/3.f, 1.e-6f) == 1);
- CHK(eq_eps(v4f_z(k), PI/4.f, 1.e-6f) == 1);
- CHK(eq_eps(v4f_w(k), PI/6.f, 1.e-6f) == 1);
-
- k = v4f_asin(v4f_sin(i));
- CHK(eq_eps(v4f_x(k), PI/8.f, 1.e-6f) == 1);
- CHK(eq_eps(v4f_y(k), PI/3.f, 1.e-6f) == 1);
- CHK(eq_eps(v4f_z(k), PI/4.f, 1.e-6f) == 1);
- CHK(eq_eps(v4f_w(k), PI/6.f, 1.e-6f) == 1);
-
- k = v4f_atan(v4f_tan(i));
- CHK(eq_eps(v4f_x(k), PI/8.f, 1.e-6f) == 1);
- CHK(eq_eps(v4f_y(k), PI/3.f, 1.e-6f) == 1);
- CHK(eq_eps(v4f_z(k), PI/4.f, 1.e-6f) == 1);
- CHK(eq_eps(v4f_w(k), PI/6.f, 1.e-6f) == 1);
-
i = v4f_set(1.f, 2.f, 3.f, 4.f);
j = v4f_set(-2.f, -4.f, 3.f, 6.f);
k = v4f_eq(i, j);