rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

commit e93ccab448df745675af5dcba0f3750ff34a5e71
parent 9ede110fda406d3138d015206253f20c9b0c90db
Author: vaplv <vaplv@free.fr>
Date:   Fri, 29 Jan 2021 17:00:29 +0100

Rely on the Sleef library for math functions

Add several math functions

Diffstat:
Mcmake/CMakeLists.txt | 15+++++++++++----
Acmake/SleefConfig.cmake | 35+++++++++++++++++++++++++++++++++++
Msrc/aosq.h | 1+
Asrc/math.h | 159+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/sse/ssef.c | 150-------------------------------------------------------------------------------
Msrc/sse/ssef.h | 48------------------------------------------------
Asrc/test_math.c | 138+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/test_v4f.c | 49+------------------------------------------------
8 files changed, 345 insertions(+), 250 deletions(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -13,7 +13,7 @@ # You should have received a copy of the GNU General Public License # along with the RSIMD CMake. If not, see <http://www.gnu.org/licenses/>. -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 3.1) project(rsimd C) cmake_policy(SET CMP0011 NEW) enable_testing() @@ -24,10 +24,15 @@ set(RSIMD_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../src) ################################################################################ # Check dependencies ################################################################################ +get_filename_component(_current_source_dir ${CMAKE_CURRENT_LIST_FILE} PATH) +set(Sleef_DIR ${_current_source_dir}/) + find_package(RCMake REQUIRED) find_package(RSys 0.7 REQUIRED) +find_package(PkgConfig REQUIRED) +find_package(Sleef REQUIRED) -include_directories(${RSys_INCLUDE_DIR}) +include_directories(${RSys_INCLUDE_DIR} ${Sleef_INCLUDE_DIR}) set(CMAKE_MODULE_PATH ${RCMAKE_SOURCE_DIR}) include(rcmake) @@ -52,6 +57,7 @@ set(RSIMD_FILES_INC_LEGACY aosf33.h aosf44.h aosq.h + math.h rsimd.h soaXfY.h soaXfY_begin.h @@ -75,8 +81,7 @@ set(RSIMD_FILES_INC_AVX avx/avxi.h) set(RSIMD_FILES_SRC aosf44.c - aosq.c - sse/ssef.c) + aosq.c) set(RSIMD_FILES_DOC COPYING COPYING.LESSER README.md) rcmake_prepend_path(RSIMD_FILES_INC_LEGACY ${RSIMD_SOURCE_DIR}) rcmake_prepend_path(RSIMD_FILES_INC_SSE ${RSIMD_SOURCE_DIR}) @@ -89,6 +94,7 @@ set(RSIMD_FILES_INC ${RSIMD_FILES_INC_AVX}) add_library(rsimd SHARED ${RSIMD_FILES_INC} ${RSIMD_FILES_SRC}) +target_link_libraries(rsimd Sleef) set_target_properties(rsimd PROPERTIES DEFINE_SYMBOL RSIMD_SHARED_BUILD) set_target_properties(rsimd PROPERTIES @@ -119,6 +125,7 @@ if(NOT NO_TEST) new_test(test_aosf33) new_test(test_aosf44) new_test(test_aosq) + new_test(test_math) new_test(test_soa4f2) new_test(test_soa4f3) new_test(test_soa4f4) diff --git a/cmake/SleefConfig.cmake b/cmake/SleefConfig.cmake @@ -0,0 +1,35 @@ +# Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr) +# +# The RSIMD CMake is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The RSIMD CMake is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with the RSIMD CMake. If not, see <http://www.gnu.org/licenses/>. + +cmake_minimum_required(VERSION 3.1) + +# Look for library header +find_path(Sleef_INCLUDE_DIR sleef.h) + +find_library(Sleef_LIBRARY sleef PATH_SUFFIXES lib64 + DOC "Path the the sleef library") + +# Create the imported library target +add_library(Sleef SHARED IMPORTED) +set_target_properties(Sleef PROPERTIES + IMPORTED_LOCATION ${Sleef_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES ${Sleef_INCLUDE_DIR}) + +# Check the package +include(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(Sleef DEFAULT_MSG + Sleef_INCLUDE_DIR + Sleef_LIBRARY) + diff --git a/src/aosq.h b/src/aosq.h @@ -17,6 +17,7 @@ #define AOSQ_H #include "rsimd.h" +#include "math.h" /* * Functions on AoS quaternion encoded into a v4f_T as { i, j, k, a } diff --git a/src/math.h b/src/math.h @@ -0,0 +1,159 @@ +/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef RSIMD_MATH_H +#define RSIMD_MATH_H + +#include "rsimd.h" + +#ifdef COMPILER_GCC + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wignored-qualifiers" +#endif + +#include <sleef.h> + +#ifdef COMPILER_GCC + #pragma GCC diagnostic pop +#endif + +static FINLINE v4f_T +v4f_copysign(const v4f_T x, const v4f_T y) +{ + return Sleef_copysignf4(x, y); +} + +static INLINE v4f_T +v4f_floor(const v4f_T x) +{ + return Sleef_floorf4(x); +} + +static INLINE v4f_T +v4f_pow(const v4f_T x, const v4f_T y) +{ + return Sleef_powf4_u10(x, y); +} + +/******************************************************************************* + * Exponentatial functions + ******************************************************************************/ +static INLINE v4f_T +v4f_exp2(const v4f_T x) +{ + return Sleef_exp2f4_u10(x); +} + +static INLINE v4f_T +v4f_exp(const v4f_T x) +{ + return Sleef_expf4_u10(x); +} + +static INLINE v4f_T +v4f_exp10(const v4f_T x) +{ + return Sleef_exp10f4_u10(x); +} + +/******************************************************************************* + * Log functions + ******************************************************************************/ +static INLINE v4f_T +v4f_log2(const v4f_T x) +{ + return Sleef_log2f4_u10(x); +} + +static INLINE v4f_T +v4f_log(const v4f_T x) +{ + return Sleef_logf4_u10(x); +} + +static INLINE v4f_T +v4f_log10(const v4f_T x) +{ + return Sleef_log10f4_u10(x); +} + +/******************************************************************************* + * Trigonometric functions + ******************************************************************************/ +static INLINE v4f_T +v4f_sin(const v4f_T v) +{ + return Sleef_sinf4_u10(v); +} + +static INLINE v4f_T +v4f_asin(const v4f_T v) +{ + return Sleef_asinf4_u10(v); +} + +static INLINE v4f_T +v4f_cos(const v4f_T v) +{ + return Sleef_cosf4_u10(v); +} + +static INLINE v4f_T +v4f_acos(const v4f_T v) +{ + return Sleef_acosf4_u10(v); +} + +static INLINE void +v4f_sincos(const v4f_T v, v4f_T* RESTRICT s, v4f_T* RESTRICT c) +{ + const Sleef___m128_2 r = Sleef_sincosf4_u10(v); + *s = r.x; + *c = r.y; +} + +static INLINE v4f_T +v4f_tan(const v4f_T v) +{ + return Sleef_tanf4_u10(v); +} + +static INLINE v4f_T +v4f_atan(const v4f_T v) +{ + return Sleef_atanf4_u10(v); +} + +/******************************************************************************* + * Miscellaneous + ******************************************************************************/ +static FINLINE v4f_T /* Cartesian (xyz) to spherical (r, theta, phi)*/ +v4f_xyz_to_rthetaphi(const v4f_T v) +{ + const v4f_T zero = v4f_zero(); + const v4f_T len2 = v4f_len2(v); + const v4f_T len3 = v4f_len3(v); + const v4f_T theta = v4f_sel + (v4f_acos(v4f_div(v4f_zzzz(v), len3)), zero, v4f_eq(len3, zero)); + const v4f_T tmp_phi = v4f_sel + (v4f_asin(v4f_div(v4f_yyyy(v), len2)), zero, v4f_eq(len2, zero)); + const v4f_T phi = v4f_sel + (v4f_sub(v4f_set1((float)PI), tmp_phi), tmp_phi, v4f_ge(v4f_xxxx(v), zero)); + + return v4f_xyab(v4f_xayb(len3, theta), phi); +} + + +#endif /* RSIMD_MATH_H */ diff --git a/src/sse/ssef.c b/src/sse/ssef.c @@ -1,150 +0,0 @@ -/* Copyright (C) 2014-2019 Vincent Forest (vaplv@free.fr) - * - * The RSIMD library is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * The RSIMD library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ - -#include "../rsimd.h" - -#define KC0 v4f_set1(0.63661977236f) -#define KC1 v4f_set1(1.57079625129f) -#define KC2 v4f_set1(7.54978995489e-8f) -#define CC0 v4f_set1(-0.0013602249f) -#define CC1 v4f_set1(0.0416566950f) -#define CC2 v4f_set1(-0.4999990225f) -#define SC0 v4f_set1(-0.0001950727f) -#define SC1 v4f_set1(0.0083320758f) -#define SC2 v4f_set1(-0.1666665247f) -#define ONE v4f_set1(1.f) - -v4f_T -v4f_sin(const v4f_T v) -{ - const v4i_T zeroi = v4i_zero(); - const v4i_T onei = v4i_set1(1); - const v4i_T twoi = v4i_set1(2); - const v4i_T threei = v4i_set1(3); - - const v4f_T x = v4f_mul(v, KC0); - const v4i_T q = v4f_to_v4i(x); - const v4i_T off = v4i_and(q, threei); - const v4f_T qf = v4i_to_v4f(q); - - const v4f_T tmp = v4f_sub(v, v4f_mul(qf, KC1)); - const v4f_T xl = v4f_sub(tmp, v4f_mul(qf, KC2)); - const v4f_T xl2 = v4f_mul(xl, xl); - const v4f_T xl3 = v4f_mul(xl2, xl); - - const v4f_T cx = - v4f_madd(v4f_madd(v4f_madd(CC0, xl2, CC1), xl2, CC2), xl2, ONE); - const v4f_T sx = - v4f_madd(v4f_madd(v4f_madd(SC0, xl2, SC1), xl2, SC2), xl3, xl); - - const v4f_T mask0 = (v4f_T) v4i_eq(v4i_and(off, onei), zeroi); - const v4f_T mask1 = (v4f_T) v4i_eq(v4i_and(off, twoi), zeroi); - const v4f_T res = v4f_sel(cx, sx, mask0); - return v4f_sel(v4f_minus(res), res, mask1); -} - -v4f_T -v4f_cos(const v4f_T v) -{ - const v4i_T zeroi = v4i_zero(); - const v4i_T onei = v4i_set1(1); - const v4i_T twoi = v4i_set1(2); - const v4i_T threei = v4i_set1(3); - - const v4f_T x = v4f_mul(v, KC0); - const v4i_T q = v4f_to_v4i(x); - const v4i_T off = v4i_add(v4i_and(q, threei), onei); - const v4f_T qf = v4i_to_v4f(q); - - const v4f_T tmp = v4f_sub(v, v4f_mul(qf, KC1)); - const v4f_T xl = v4f_sub(tmp, v4f_mul(qf, KC2)); - const v4f_T xl2 = v4f_mul(xl, xl); - const v4f_T xl3 = v4f_mul(xl2, xl); - - const v4f_T cx = - v4f_madd(v4f_madd(v4f_madd(CC0, xl2, CC1), xl2, CC2), xl2, ONE); - const v4f_T sx = - v4f_madd(v4f_madd(v4f_madd(SC0, xl2, SC1), xl2, SC2), xl3, xl); - - const v4f_T mask0 = (v4f_T) v4i_eq(v4i_and(off, onei), zeroi); - const v4f_T mask1 = (v4f_T) v4i_eq(v4i_and(off, twoi), zeroi); - const v4f_T res = v4f_sel(cx, sx, mask0); - return v4f_sel(v4f_minus(res), res, mask1); -} - -void -v4f_sincos(const v4f_T v, v4f_T* RESTRICT s, v4f_T* RESTRICT c) -{ - const v4i_T zeroi = v4i_zero(); - const v4i_T onei = v4i_set1(1); - const v4i_T twoi = v4i_set1(2); - const v4i_T threei = v4i_set1(3); - - const v4f_T x = v4f_mul(v, KC0); - const v4i_T q = v4f_to_v4i(x); - const v4i_T soff = v4i_and(q, threei); - const v4i_T coff = v4i_add(v4i_and(q, threei), onei); - const v4f_T qf = v4i_to_v4f(q); - - const v4f_T tmp = v4f_sub(v, v4f_mul(qf, KC1)); - const v4f_T xl = v4f_sub(tmp, v4f_mul(qf, KC2)); - const v4f_T xl2 = v4f_mul(xl, xl); - const v4f_T xl3 = v4f_mul(xl2, xl); - - const v4f_T cx = - v4f_madd(v4f_madd(v4f_madd(CC0, xl2, CC1), xl2, CC2), xl2, ONE); - const v4f_T sx = - v4f_madd(v4f_madd(v4f_madd(SC0, xl2, SC1), xl2, SC2), xl3, xl); - - const v4f_T smask0 = (v4f_T) v4i_eq(v4i_and(soff, onei), zeroi); - const v4f_T smask1 = (v4f_T) v4i_eq(v4i_and(soff, twoi), zeroi); - const v4f_T sres = v4f_sel(cx, sx, smask0); - - const v4f_T cmask0 = (v4f_T) v4i_eq(v4i_and(coff, onei), zeroi); - const v4f_T cmask1 = (v4f_T) v4i_eq(v4i_and(coff, twoi), zeroi); - const v4f_T cres = v4f_sel(cx, sx, cmask0); - - *s = v4f_sel(v4f_minus(sres), sres, smask1); - *c = v4f_sel(v4f_minus(cres), cres, cmask1); -} - -v4f_T -v4f_acos(const v4f_T v) -{ - const v4f_T absv = v4f_abs(v); - const v4f_T t0 = v4f_sqrt(v4f_sub(v4f_set1(1.f), absv)); - const v4f_T absv2 =v4f_mul(absv, absv); - const v4f_T absv4 = v4f_mul(absv2, absv2); - - const v4f_T h0 = v4f_set1(-0.0012624911f); - const v4f_T h1 = v4f_set1(0.0066700901f); - const v4f_T h2 = v4f_set1(-0.0170881256f); - const v4f_T h3 = v4f_set1(0.0308918810f); - const v4f_T hi = - v4f_madd(v4f_madd(v4f_madd(h0, absv, h1), absv, h2), absv, h3); - - const v4f_T l0 = v4f_set1(-0.0501743046f); - const v4f_T l1 = v4f_set1(0.0889789874f); - const v4f_T l2 = v4f_set1(-0.2145988016f); - const v4f_T l3 = v4f_set1((float)(PI*0.5)); - const v4f_T lo = - v4f_madd(v4f_madd(v4f_madd(l0, absv, l1), absv, l2), absv, l3); - - const v4f_T res = v4f_mul(v4f_madd(hi, absv4, lo), t0); - const v4f_T mask = v4f_lt(v, v4f_zero()); - - return v4f_sel(res, v4f_set1((float)PI) - res, mask); -} - diff --git a/src/sse/ssef.h b/src/sse/ssef.h @@ -473,35 +473,6 @@ v4f_normalize3(const v4f_T v) } /******************************************************************************* - * Trigonometric operations - ******************************************************************************/ -RSIMD_API v4f_T v4f_sin(const v4f_T v); -RSIMD_API v4f_T v4f_cos(const v4f_T v); -RSIMD_API v4f_T v4f_acos(const v4f_T v); -RSIMD_API void v4f_sincos(const v4f_T v, v4f_T* RESTRICT s, v4f_T* RESTRICT c); - -static FINLINE v4f_T -v4f_tan(const v4f_T v) -{ - v4f_T s, c; - v4f_sincos(v, &s, &c); - return v4f_div(s, c); -} - -static FINLINE v4f_T -v4f_asin(const v4f_T v) -{ - return v4f_sub(v4f_set1((float)(PI*0.5)), v4f_acos(v)); -} - -static FINLINE v4f_T -v4f_atan(v4f_T v) -{ - const v4f_T tmp = v4f_rsqrt(v4f_madd(v, v, v4f_set1(1.f))); - return v4f_asin(v4f_mul(v, tmp)); -} - -/******************************************************************************* * Comparators ******************************************************************************/ static FINLINE v4f_T @@ -578,24 +549,5 @@ v4f_clamp(const v4f_T v, const v4f_T vmin, const v4f_T vmax) return v4f_min(v4f_max(v, vmin), vmax); } -/******************************************************************************* - * Miscellaneous - ******************************************************************************/ -static FINLINE v4f_T /* Cartesian (xyz) to spherical (r, theta, phi)*/ -v4f_xyz_to_rthetaphi(const v4f_T v) -{ - const v4f_T zero = v4f_zero(); - const v4f_T len2 = v4f_len2(v); - const v4f_T len3 = v4f_len3(v); - const v4f_T theta = v4f_sel - (v4f_acos(v4f_div(v4f_zzzz(v), len3)), zero, v4f_eq(len3, zero)); - const v4f_T tmp_phi = v4f_sel - (v4f_asin(v4f_div(v4f_yyyy(v), len2)), zero, v4f_eq(len2, zero)); - const v4f_T phi = v4f_sel - (v4f_sub(v4f_set1((float)PI), tmp_phi), tmp_phi, v4f_ge(v4f_xxxx(v), zero)); - - return v4f_xyab(v4f_xayb(len3, theta), phi); -} - #endif /* RSIMD_SSEF_H */ diff --git a/src/test_math.c b/src/test_math.c @@ -0,0 +1,138 @@ +/* Copyright (C) 2013-2019 Vincent Forest (vaplv@free.fr) + * + * The RSIMD library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The RSIMD library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ + +#define _POSIX_C_SOURCE 200112L + +#include "rsimd.h" +#include "math.h" + +#include <math.h> + +#define LOG2E 1.4426950408889634074 /* log_2 e */ +#define LN10 2.30258509299404568402 /* log_e 10 */ + +#define CHKV4_EPS(V, Ref, Eps) { \ + CHK(eq_eps(v4f_x(V), Ref[0], fabsf(Ref[0]) * Eps)); \ + CHK(eq_eps(v4f_y(V), Ref[1], fabsf(Ref[1]) * Eps)); \ + CHK(eq_eps(v4f_z(V), Ref[2], fabsf(Ref[2]) * Eps)); \ + CHK(eq_eps(v4f_w(V), Ref[3], fabsf(Ref[3]) * Eps)); \ +} (void)0 + +#define CHKV4_FUNC_EPS(V, Func, Eps) { \ + const v4f_T r__ = v4f_##Func(V); \ + float ref__[4]; \ + ref__[0] = (float)Func(v4f_x(V)); \ + ref__[1] = (float)Func(v4f_y(V)); \ + ref__[2] = (float)Func(v4f_z(V)); \ + ref__[3] = (float)Func(v4f_w(V)); \ + CHKV4_EPS(r__, ref__, Eps); \ +} (void)0 + +static void +test_trigo(void) +{ + v4f_T i, j, k; + float ref[4]; + + i = v4f_set((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f); + + CHKV4_FUNC_EPS(i, cos, 1.e-6); + CHKV4_FUNC_EPS(i, sin, 1.e-6); + + v4f_sincos(i, &k, &j); + ref[0] = (float)sin(v4f_x(i)); + ref[1] = (float)sin(v4f_y(i)); + ref[2] = (float)sin(v4f_z(i)); + ref[3] = (float)sin(v4f_w(i)); + CHKV4_EPS(k, ref, 1.e-6f); + ref[0] = (float)cos(v4f_x(i)); + ref[1] = (float)cos(v4f_y(i)); + ref[2] = (float)cos(v4f_z(i)); + ref[3] = (float)cos(v4f_w(i)); + CHKV4_EPS(j, ref, 1.e-6f); + + i = v4f_set((float)PI/8.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f); + CHKV4_FUNC_EPS(i, tan, 1.e-6); + CHKV4_FUNC_EPS(v4f_cos(i), acos, 1.e-6); + CHKV4_FUNC_EPS(v4f_sin(i), asin, 1.e-6); + CHKV4_FUNC_EPS(v4f_tan(i), atan, 1.e-6); +} + +static void +test_exp(void) +{ + const v4f_T i = v4f_set(1.f, -1.234f, 0.f, 3.14156f); + v4f_T j; + float ref[4]; + + CHKV4_FUNC_EPS(i, exp, 1.e-6); + CHKV4_FUNC_EPS(i, exp2, 1.e-6); + + j = v4f_exp10(i); + ref[0] = (float)exp2(LOG2E * LN10 * v4f_x(i)); + ref[1] = (float)exp2(LOG2E * LN10 * v4f_y(i)); + ref[2] = (float)exp2(LOG2E * LN10 * v4f_z(i)); + ref[3] = (float)exp2(LOG2E * LN10 * v4f_w(i)); + CHKV4_EPS(j, ref, 1.e-6f); +} + +static void +test_log(void) +{ + const v4f_T i = v4f_set(4.675f, 3.14f, 9.99999f, 1.234e-13f); + + CHKV4_FUNC_EPS(i, log, 1.e-6); + CHKV4_FUNC_EPS(i, log2, 1.e-6); + CHKV4_FUNC_EPS(i, log10, 1.e-6); +} + +static void +test_misc(void) +{ + v4f_T i, j, k; + float ref[4]; + + i = v4f_set(-1.2345f, 9.3e-7f, 3.879e9f, -10.56f); + j = v4f_set(7.89e-9f, 0.12f, -4.9e10f, 3.14f); + k = v4f_copysign(i, j); + ref[0] = (float)copysign(v4f_x(i), v4f_x(j)); + ref[1] = (float)copysign(v4f_y(i), v4f_y(j)); + ref[2] = (float)copysign(v4f_z(i), v4f_z(j)); + ref[3] = (float)copysign(v4f_w(i), v4f_w(j)); + CHKV4_EPS(k, ref, 1.e-6f); + + CHKV4_FUNC_EPS(i, floor, 1.e-6); + + k = v4f_pow(v4f_abs(i), j); + ref[0] = (float)pow(fabsf(v4f_x(i)), v4f_x(j)); + ref[1] = (float)pow(fabsf(v4f_y(i)), v4f_y(j)); + ref[2] = (float)pow(fabsf(v4f_z(i)), v4f_z(j)); + ref[3] = (float)pow(fabsf(v4f_w(i)), v4f_w(j)); + CHKV4_EPS(k, ref, 1.e-6f); +} + +int +main(int argc, char** argv) +{ + (void)argc, (void)argv; + + test_trigo(); + test_exp(); + test_log(); + test_misc(); + + return 0; +} + diff --git a/src/test_v4f.c b/src/test_v4f.c @@ -14,6 +14,7 @@ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ #include "rsimd.h" +#include "math.h" int main(int argc, char** argv) @@ -378,54 +379,6 @@ main(int argc, char** argv) CHK(v4f_y(k) == 7.5f); CHK(v4f_z(k) == 7.f); - i = v4f_set((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f); - k = v4f_cos(i); - CHK(eq_eps(v4f_x(k), (float)cos(PI/2.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_y(k), (float)cos(PI/3.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_z(k), (float)cos(PI/4.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_w(k), (float)cos(PI/6.0), 1.e-6f) == 1); - - k = v4f_sin(i); - CHK(eq_eps(v4f_x(k), (float)sin(PI/2.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_y(k), (float)sin(PI/3.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_z(k), (float)sin(PI/4.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_w(k), (float)sin(PI/6.0), 1.e-6f) == 1); - - v4f_sincos(i, &k, &j); - CHK(eq_eps(v4f_x(k), (float)sin(PI/2.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_y(k), (float)sin(PI/3.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_z(k), (float)sin(PI/4.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_w(k), (float)sin(PI/6.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_x(j), (float)cos(PI/2.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_y(j), (float)cos(PI/3.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_z(j), (float)cos(PI/4.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_w(j), (float)cos(PI/6.0), 1.e-6f) == 1); - - i = v4f_set((float)PI/8.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f); - k = v4f_tan(i); - CHK(eq_eps(v4f_x(k), (float)tan(PI/8.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_y(k), (float)tan(PI/3.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_z(k), (float)tan(PI/4.0), 1.e-6f) == 1); - CHK(eq_eps(v4f_w(k), (float)tan(PI/6.0), 1.e-6f) == 1); - - k = v4f_acos(v4f_cos(i)); - CHK(eq_eps(v4f_x(k), PI/8.f, 1.e-6f) == 1); - CHK(eq_eps(v4f_y(k), PI/3.f, 1.e-6f) == 1); - CHK(eq_eps(v4f_z(k), PI/4.f, 1.e-6f) == 1); - CHK(eq_eps(v4f_w(k), PI/6.f, 1.e-6f) == 1); - - k = v4f_asin(v4f_sin(i)); - CHK(eq_eps(v4f_x(k), PI/8.f, 1.e-6f) == 1); - CHK(eq_eps(v4f_y(k), PI/3.f, 1.e-6f) == 1); - CHK(eq_eps(v4f_z(k), PI/4.f, 1.e-6f) == 1); - CHK(eq_eps(v4f_w(k), PI/6.f, 1.e-6f) == 1); - - k = v4f_atan(v4f_tan(i)); - CHK(eq_eps(v4f_x(k), PI/8.f, 1.e-6f) == 1); - CHK(eq_eps(v4f_y(k), PI/3.f, 1.e-6f) == 1); - CHK(eq_eps(v4f_z(k), PI/4.f, 1.e-6f) == 1); - CHK(eq_eps(v4f_w(k), PI/6.f, 1.e-6f) == 1); - i = v4f_set(1.f, 2.f, 3.f, 4.f); j = v4f_set(-2.f, -4.f, 3.f, 6.f); k = v4f_eq(i, j);