commit 16f4dd73fbd19643a8cab92f9e2af0d0efbe0d9a
parent 73009ee225c3142b746eeb020c2f2142e2d4e4aa
Author: vaplv <vaplv@free.fr>
Date: Wed, 16 Jun 2021 09:14:49 +0200
Merge branch 'release_0.3'
Diffstat:
56 files changed, 4180 insertions(+), 2140 deletions(-)
diff --git a/COPYING.LESSER b/COPYING.LESSER
@@ -1,165 +0,0 @@
- GNU LESSER GENERAL PUBLIC LICENSE
- Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-
- This version of the GNU Lesser General Public License incorporates
-the terms and conditions of version 3 of the GNU General Public
-License, supplemented by the additional permissions listed below.
-
- 0. Additional Definitions.
-
- As used herein, "this License" refers to version 3 of the GNU Lesser
-General Public License, and the "GNU GPL" refers to version 3 of the GNU
-General Public License.
-
- "The Library" refers to a covered work governed by this License,
-other than an Application or a Combined Work as defined below.
-
- An "Application" is any work that makes use of an interface provided
-by the Library, but which is not otherwise based on the Library.
-Defining a subclass of a class defined by the Library is deemed a mode
-of using an interface provided by the Library.
-
- A "Combined Work" is a work produced by combining or linking an
-Application with the Library. The particular version of the Library
-with which the Combined Work was made is also called the "Linked
-Version".
-
- The "Minimal Corresponding Source" for a Combined Work means the
-Corresponding Source for the Combined Work, excluding any source code
-for portions of the Combined Work that, considered in isolation, are
-based on the Application, and not on the Linked Version.
-
- The "Corresponding Application Code" for a Combined Work means the
-object code and/or source code for the Application, including any data
-and utility programs needed for reproducing the Combined Work from the
-Application, but excluding the System Libraries of the Combined Work.
-
- 1. Exception to Section 3 of the GNU GPL.
-
- You may convey a covered work under sections 3 and 4 of this License
-without being bound by section 3 of the GNU GPL.
-
- 2. Conveying Modified Versions.
-
- If you modify a copy of the Library, and, in your modifications, a
-facility refers to a function or data to be supplied by an Application
-that uses the facility (other than as an argument passed when the
-facility is invoked), then you may convey a copy of the modified
-version:
-
- a) under this License, provided that you make a good faith effort to
- ensure that, in the event an Application does not supply the
- function or data, the facility still operates, and performs
- whatever part of its purpose remains meaningful, or
-
- b) under the GNU GPL, with none of the additional permissions of
- this License applicable to that copy.
-
- 3. Object Code Incorporating Material from Library Header Files.
-
- The object code form of an Application may incorporate material from
-a header file that is part of the Library. You may convey such object
-code under terms of your choice, provided that, if the incorporated
-material is not limited to numerical parameters, data structure
-layouts and accessors, or small macros, inline functions and templates
-(ten or fewer lines in length), you do both of the following:
-
- a) Give prominent notice with each copy of the object code that the
- Library is used in it and that the Library and its use are
- covered by this License.
-
- b) Accompany the object code with a copy of the GNU GPL and this license
- document.
-
- 4. Combined Works.
-
- You may convey a Combined Work under terms of your choice that,
-taken together, effectively do not restrict modification of the
-portions of the Library contained in the Combined Work and reverse
-engineering for debugging such modifications, if you also do each of
-the following:
-
- a) Give prominent notice with each copy of the Combined Work that
- the Library is used in it and that the Library and its use are
- covered by this License.
-
- b) Accompany the Combined Work with a copy of the GNU GPL and this license
- document.
-
- c) For a Combined Work that displays copyright notices during
- execution, include the copyright notice for the Library among
- these notices, as well as a reference directing the user to the
- copies of the GNU GPL and this license document.
-
- d) Do one of the following:
-
- 0) Convey the Minimal Corresponding Source under the terms of this
- License, and the Corresponding Application Code in a form
- suitable for, and under terms that permit, the user to
- recombine or relink the Application with a modified version of
- the Linked Version to produce a modified Combined Work, in the
- manner specified by section 6 of the GNU GPL for conveying
- Corresponding Source.
-
- 1) Use a suitable shared library mechanism for linking with the
- Library. A suitable mechanism is one that (a) uses at run time
- a copy of the Library already present on the user's computer
- system, and (b) will operate properly with a modified version
- of the Library that is interface-compatible with the Linked
- Version.
-
- e) Provide Installation Information, but only if you would otherwise
- be required to provide such information under section 6 of the
- GNU GPL, and only to the extent that such information is
- necessary to install and execute a modified version of the
- Combined Work produced by recombining or relinking the
- Application with a modified version of the Linked Version. (If
- you use option 4d0, the Installation Information must accompany
- the Minimal Corresponding Source and Corresponding Application
- Code. If you use option 4d1, you must provide the Installation
- Information in the manner specified by section 6 of the GNU GPL
- for conveying Corresponding Source.)
-
- 5. Combined Libraries.
-
- You may place library facilities that are a work based on the
-Library side by side in a single library together with other library
-facilities that are not Applications and are not covered by this
-License, and convey such a combined library under terms of your
-choice, if you do both of the following:
-
- a) Accompany the combined library with a copy of the same work based
- on the Library, uncombined with any other library facilities,
- conveyed under the terms of this License.
-
- b) Give prominent notice with the combined library that part of it
- is a work based on the Library, and explaining where to find the
- accompanying uncombined form of the same work.
-
- 6. Revised Versions of the GNU Lesser General Public License.
-
- The Free Software Foundation may publish revised and/or new versions
-of the GNU Lesser General Public License from time to time. Such new
-versions will be similar in spirit to the present version, but may
-differ in detail to address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
-Library as you received it specifies that a certain numbered version
-of the GNU Lesser General Public License "or any later version"
-applies to it, you have the option of following the terms and
-conditions either of that published version or of any later version
-published by the Free Software Foundation. If the Library as you
-received it does not specify a version number of the GNU Lesser
-General Public License, you may choose any version of the GNU Lesser
-General Public License ever published by the Free Software Foundation.
-
- If the Library as you received it specifies that a proxy can decide
-whether future versions of the GNU Lesser General Public License shall
-apply, that proxy's public statement of acceptance of any version is
-permanent authorization for you to choose that version for the
-Library.
diff --git a/README.md b/README.md
@@ -4,24 +4,42 @@ This C89 library defines an interface that encapsulates and make easier the
manipulation of SIMD instruction sets. It also provides a SIMD implementation
of linear algebra operations for 3x3 and 4x4 matrices as well as quaternions
arranged in an `Array of Structures` SIMD layout. Linear algebra functions on
-`Structure of Arrays` 2/3/4 dimensions vectors are also implemented.
-
-Note that currently only the SSE2 instruction set is supported.
+`Structure of Arrays` 2/3/4 dimensions vectors are also implemented. Finally it
+exposes a vectorized version of some math functions by relying on the
+[Sleef](https://sleef.org/) library.
## How to build
The library uses [CMake](http://www.cmake.org) and the
[RCMake](https://gitlab.com/vaplv/rcmake/) package to build. It also depends on
-the [RSys](https://gitlab.com/vaplv/rsys/) library. First, install the RCMake
-package and the RSys library. Then, generate the project from the
-cmake/CMakeLists.txt file by appending the RCMake and RSys install directories
-to the `CMAKE_PREFIX_PATH` variable. The resulting project can be edited,
- built, tested and installed as any CMake project.
+the [RSys](https://gitlab.com/vaplv/rsys/) and the [Sleef](https://sleef.org)
+library. First, install the RCMake package, the RSys and the Sleef libraries.
+Then, generate the project from the cmake/CMakeLists.txt file by appending the
+RCMake, RSys and Sleef install directories to the `CMAKE_PREFIX_PATH` variable.
+The resulting project can be edited, built, tested and installed as any CMake
+project.
-## License
+## Release notes
+
+### Version 0.3
+
+- Add 8-way vector API for the float and int32 types.
+- Add the `v<4|8>i_[reduce_]<min|max>` functions.
+- Add the `v4i_minus` function.
+- Rely on the [Sleef](https://sleef.org) library to replace the hand-crafted
+ implementation of the trigonometric functions.
+- Add math functions for both 4-way and 8-way vectors. Provided math functions are:
+ copysign, floor, pow, exp[<2|10>] and log[<2|10>].
-RSIMD is Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr). It is a free
-software released under the [OSI](https://opensource.org)-approved LGPL v3+
-license. You are welcome to redistribute it under certain conditions; refer to
-the COPYING files for details.
+### Version 0.2.1
+
+- If supported by the compiler, use the SSE4.1 blendv instruction in the
+ `v4f_sel` function
+- Turns the RSIMD library in shared library.
+
+## License
+Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr). RSIMD is free software
+released under the GPL v3+ license: GNU GPL version 3 or later. You are welcome
+to redistribute it under certain conditions; refer to the COPYING file for
+details.
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+# Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
#
# The RSIMD CMake is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -13,78 +13,149 @@
# You should have received a copy of the GNU General Public License
# along with the RSIMD CMake. If not, see <http://www.gnu.org/licenses/>.
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 3.1)
project(rsimd C)
-cmake_policy(SET CMP0011 NEW)
enable_testing()
-option(BUILD_STATIC "Build RSIMD as a static library" ON)
option(NO_TEST "Disable the tests" OFF)
set(RSIMD_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../src)
################################################################################
# Check dependencies
################################################################################
+set(Sleef_DIR ${PROJECT_SOURCE_DIR})
+
find_package(RCMake REQUIRED)
-find_package(RSys REQUIRED)
+find_package(RSys 0.12 REQUIRED)
+find_package(PkgConfig REQUIRED)
+find_package(Sleef REQUIRED)
-include_directories(${RSys_INCLUDE_DIR})
+include_directories(${RSys_INCLUDE_DIR} ${Sleef_INCLUDE_DIR})
+include(CheckCCompilerFlag)
set(CMAKE_MODULE_PATH ${RCMAKE_SOURCE_DIR})
include(rcmake)
################################################################################
# Check compiler features
################################################################################
-if(CMAKE_COMPILER_IS_GNUCC)
- include(CheckCCompilerFlag)
+if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux"
+OR NOT CMAKE_COMPILER_IS_GNUCC)
+ message(STATUS ${CMAKE_SYSTEM_NAME})
+ message(FATAL_ERROR "Unsupported platform")
+endif()
+
+execute_process(COMMAND cat "/proc/cpuinfo"
+ OUTPUT_VARIABLE CPUINFO_OUT
+ ERROR_VARIABLE CPUINFO_ERR
+ RESULT_VARIABLE CPUINFO_RES)
+if(NOT CPUINFO_RES EQUAL 0)
+ message(FATAL_ERROR "${CPUINFO_ERR}")
+endif()
+
+string(REGEX MATCH "[ \t\r\n]+sse2[ \t\r\n]+" SSE2 ${CPUINFO_OUT})
+string(REGEX MATCH "[ \t\r\n]+sse4_1[ \t\r\n]+" SSE4_1 ${CPUINFO_OUT})
+string(REGEX MATCH "[ \t\r\n]+avx[ \t\r\n]+" AVX ${CPUINFO_OUT})
+string(REGEX MATCH "[ \t\r\n]+fma[ \t\r\n]+" FMA ${CPUINFO_OUT})
+
+if(SSE2)
+ unset(SSE2)
+ CHECK_C_COMPILER_FLAG("-msse2" SSE2)
+ message(STATUS "Use the SSE2 instruction set ")
+else()
+ message(FATAL_ERROR "The SSE2 instruction set must be supported.")
+endif()
+if(SSE4_1)
+ unset(SSE4_1)
CHECK_C_COMPILER_FLAG("-msse4.1" SSE4_1)
-endif(CMAKE_COMPILER_IS_GNUCC)
+ message(STATUS "Use the SSE4.1 instruction set")
+endif()
+if(AVX)
+ unset(AVX)
+ CHECK_C_COMPILER_FLAG("-mavx" AVX)
+ message(STATUS "Use the AVX instruction set")
+endif()
+if(FMA)
+ unset(FMA)
+ CHECK_C_COMPILER_FLAG("-mfma" FMA)
+ message(STATUS "Use the FMA instruction set")
+endif()
################################################################################
# Configure and define targets
################################################################################
set(VERSION_MAJOR 0)
-set(VERSION_MINOR 2)
+set(VERSION_MINOR 3)
set(VERSION_PATCH 0)
set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH})
+set(RSIMD_SSE2 ${SSE2})
+set(RSIMD_SSE4_1 ${SSE4_1})
+set(RSIMD_AVX ${AVX})
+set(RSIMD_FMA ${FMA})
+
+# Configure the files generic to the RSIMD version
+configure_file(${RSIMD_SOURCE_DIR}/rsimd_version.h.in
+ ${CMAKE_CURRENT_BINARY_DIR}/rsimd_version.h @ONLY)
+configure_file(${PROJECT_SOURCE_DIR}/RSIMDConfigVersion.cmake.in
+ ${CMAKE_CURRENT_BINARY_DIR}/RSIMDConfigVersion.cmake @ONLY)
+configure_file(${PROJECT_SOURCE_DIR}/RSIMDConfig.cmake.in
+ ${CMAKE_CURRENT_BINARY_DIR}/RSIMDConfig.cmake @ONLY)
+
set(RSIMD_FILES_INC_LEGACY
aosf33.h
aosf44.h
aosq.h
+ math.h
+ mathX.h
+ math4.h
+ math8.h
rsimd.h
- soa4fX.h
+ soaXfY.h
+ soaXfY_begin.h
+ soaXfY_end.h
+ soaXf2.h
+ soaXf3.h
soa4f2.h
soa4f3.h
- soa4f4.h)
+ soa4f4.h
+ soa8f2.h
+ soa8f3.h
+ soa8f4.h
+ vXf_begin.h
+ vXf_end.h)
set(RSIMD_FILES_INC_SSE
sse/sse.h
sse/ssef.h
sse/ssei.h
sse/sse_swz.h)
+set(RSIMD_FILES_INC_AVX
+ avx/avx.h
+ avx/avxf.h
+ avx/avxi.h)
set(RSIMD_FILES_SRC
aosf44.c
- aosq.c
- sse/ssef.c)
+ aosq.c)
set(RSIMD_FILES_DOC COPYING COPYING.LESSER README.md)
+set(RSIMD_FILES_CMAKE
+ RSIMDConfig.cmake
+ RSIMDConfigVersion.cmake)
rcmake_prepend_path(RSIMD_FILES_INC_LEGACY ${RSIMD_SOURCE_DIR})
rcmake_prepend_path(RSIMD_FILES_INC_SSE ${RSIMD_SOURCE_DIR})
+rcmake_prepend_path(RSIMD_FILES_INC_AVX ${RSIMD_SOURCE_DIR})
rcmake_prepend_path(RSIMD_FILES_SRC ${RSIMD_SOURCE_DIR})
rcmake_prepend_path(RSIMD_FILES_DOC ${PROJECT_SOURCE_DIR}/../)
-set(RSIMD_FILES_INC ${RSIMD_FILES_INC_LEGACY} ${RSIMD_FILES_INC_SSE})
-
-if(BUILD_STATIC)
- add_library(rsimd STATIC ${RSIMD_FILES_INC} ${RSIMD_FILES_SRC})
- set_target_properties(rsimd PROPERTIES DEFINE_SYMBOL RSIMD_STATIC)
-else(BUILD_STATIC)
- add_library(rsimd SHARED ${RSIMD_FILES_INC} ${RSIMD_FILES_SRC})
- set_target_properties(rsimd PROPERTIES
- DEFINE_SYMBOL RSIMD_SHARED_BUILD
- VERSION ${VERSION}
- SOVERSION ${VERSION_MAJOR})
-endif(BUILD_STATIC)
-
-rcmake_setup_devel(rsimd RSIMD ${VERSION} rsimd/rsimd_version.h)
+rcmake_prepend_path(RSIMD_FILES_CMAKE ${PROJECT_SOURCE_DIR}/)
+set(RSIMD_FILES_INC
+ ${RSIMD_FILES_INC_LEGACY}
+ ${RSIMD_FILES_INC_SSE}
+ ${RSIMD_FILES_INC_AVX})
+
+add_library(rsimd SHARED ${RSIMD_FILES_INC} ${RSIMD_FILES_SRC})
+target_link_libraries(rsimd Sleef)
+set_target_properties(rsimd PROPERTIES DEFINE_SYMBOL RSIMD_SHARED_BUILD)
+set_target_properties(rsimd PROPERTIES
+ VERSION ${VERSION}
+ SOVERSION ${VERSION_MAJOR})
################################################################################
# Add tests
@@ -96,18 +167,19 @@ if(NOT NO_TEST)
add_test(${_name} ${_name})
if(NOT "${ARGN}" STREQUAL "")
set_target_properties(${_name} PROPERTIES COMPILE_FLAGS ${ARGN})
- endif(NOT "${ARGN}" STREQUAL "")
- endfunction(new_test_named)
+ endif()
+ endfunction()
function(new_test _name)
new_test_named(${_name} ${_name} ${ARGN})
- endfunction(new_test)
+ endfunction()
new_test(test_v4f)
new_test(test_v4i)
new_test(test_aosf33)
new_test(test_aosf44)
new_test(test_aosq)
+ new_test(test_math4)
new_test(test_soa4f2)
new_test(test_soa4f3)
new_test(test_soa4f4)
@@ -115,7 +187,23 @@ if(NOT NO_TEST)
if(SSE4_1 AND CMAKE_COMPILER_IS_GNUCC)
new_test_named(test_v4f_sse4_1 test_v4f "-msse4.1")
new_test_named(test_v4i_sse4_1 test_v4i "-msse4.1")
- endif(SSE4_1 AND CMAKE_COMPILER_IS_GNUCC)
+ endif()
+
+ if(FMA AND CMAKE_COMPILER_IS_GNUCC)
+ new_test_named(test_v4f_fma test_v4f "-mfma")
+ new_test_named(test_soa8f2_fma test_soa8f2 "-mfma")
+ new_test_named(test_soa8f3_fma test_soa8f3 "-mfma")
+ new_test_named(test_soa8f4_fma test_soa8f4 "-mfma")
+ endif()
+
+ if(AVX AND CMAKE_COMPILER_IS_GNUCC)
+ new_test(test_math8 "-mavx")
+ new_test(test_v8f "-mavx")
+ new_test(test_v8i "-mavx")
+ new_test(test_soa8f2 "-mavx")
+ new_test(test_soa8f3 "-mavx")
+ new_test(test_soa8f4 "-mavx")
+ endif(AVX AND CMAKE_COMPILER_IS_GNUCC)
endif(NOT NO_TEST)
@@ -127,6 +215,14 @@ install(TARGETS rsimd
LIBRARY DESTINATION lib
RUNTIME DESTINATION bin)
install(FILES ${RSIMD_FILES_INC_LEGACY} DESTINATION include/rsimd)
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/rsimd_version.h DESTINATION include/rsimd)
install(FILES ${RSIMD_FILES_INC_SSE} DESTINATION include/rsimd/sse)
+install(FILES ${RSIMD_FILES_INC_AVX} DESTINATION include/rsimd/avx)
install(FILES ${RSIMD_FILES_DOC} DESTINATION share/doc/rsimd)
+install(FILES ${Sleef_DIR}/SleefConfig.cmake DESTINATION lib/cmake/Sleef/)
+
+install(FILES
+ ${CMAKE_CURRENT_BINARY_DIR}/RSIMDConfig.cmake
+ ${CMAKE_CURRENT_BINARY_DIR}/RSIMDConfigVersion.cmake
+ DESTINATION lib/cmake/RSIMD)
diff --git a/cmake/RSIMDConfig.cmake.in b/cmake/RSIMDConfig.cmake.in
@@ -0,0 +1,133 @@
+# Copyright (C) 2013-2021 Vincent Forest (vaplv@free.fr)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+cmake_minimum_required(VERSION 3.1)
+
+# Check dependenc
+find_package(Sleef REQUIRED)
+
+set(RSIMD_SSE2 @RSIMD_SSE2@)
+set(RSIMD_SSE4_1 @RSIMD_SSE4_1@)
+set(RSIMD_AVX @RSIMD_AVX@)
+set(RSIMD_FMA @RSIMD_FMA@)
+
+# Check compiler features
+if(CMAKE_COMPILER_IS_GNUCC)
+ include(CheckCCompilerFlag)
+ if(RSIMD_SSE2)
+ CHECK_C_COMPILER_FLAG("-msse2" SSE2)
+ if(SSE2)
+ list(APPEND _compile_flags -msse2)
+ endif()
+ endif()
+ if(RSIMD_SSE4_1)
+ CHECK_C_COMPILER_FLAG("-msse4.1" SSE4_1)
+ if(SSE4_1)
+ list(APPEND _compile_flags -msse4.1)
+ endif()
+ endif()
+ if(RSIMD_AVX)
+ CHECK_C_COMPILER_FLAG("-mavx" AVX)
+ if(AVX)
+ list(APPEND _compile_flags -mavx)
+ endif()
+ endif()
+ if(RSIMD_FMA)
+ CHECK_C_COMPILER_FLAG("-mfma" FMA)
+ if(FMA)
+ list(APPEND _compile_flags -mfma)
+ endif()
+ endif()
+endif()
+
+# Try to find the RSIMD devel. Once done this will define:
+# - RSIMD_FOUND: system has RSIMD
+# - RSIMD_INCLUDE_DIR: the include directory
+# - RSIMD Target: Link this to use rsimd
+
+# Look for library header
+find_path(RSIMD_INCLUDE_DIR rsimd/rsimd_version.h)
+
+# Look for Release, Debug, RelWithDebInfo and MinSizeRel libraries
+unset(RSIMD_LIBRARY CACHE)
+unset(RSIMD_LIBRARY_RELEASE CACHE)
+unset(RSIMD_LIBRARY_DEBUG CACHE)
+unset(RSIMD_LIBRARY_RELWITHDEBINFO CACHE)
+unset(RSIMD_LIBRARY_MINSIZEREL CACHE)
+
+# Find per configuration type libraries
+find_library(RSIMD_LIBRARY_RELEASE
+ rsimd
+ PATH_SUFFIXES bin Bin BIN
+ DOC "Path to the library rsimd used during release builds.")
+find_library(RSIMD_LIBRARY_DEBUG
+ rsimd-dbg
+ PATH_SUFFIXES bin Bin BIN
+ DOC "Path to the library rsimd used during debug builds.")
+find_library(RSIMD_LIBRARY_RELWITHDEBINFO
+ rsimd-rdbg
+ PATH_SUFFIXES bin Bin BIN
+ DOC "Path to the library rsimd used during release with debug info builds.")
+find_library(RSIMD_LIBRARY_MINSIZEREL
+ rsimd-mszr
+ PATH_SUFFIXES bin Bin BIN
+ DOC "Path to the library rsimd used during minsize builds.")
+
+# Define the generic rsimd library
+if(RSIMD_LIBRARY_RELEASE)
+ set(RSIMD_LIBRARY ${RSIMD_LIBRARY_RELEASE})
+elseif(RSIMD_LIBRARY_RELWITHDEBINFO)
+ set(RSIMD_LIBRARY ${RSIMD_LIBRARY_RELWITHDEBINFO})
+elseif(RSIMD_LIBRARY_MINSIZEREL)
+ set(RSIMD_LIBRARY ${RSIMD_LIBRARY_MINSIZEREL})
+elseif(RSIMD_LIBRARY_DEBUG)
+ set(RSIMD_LIBRARY ${RSIMD_LIBRARY_DEBUG})
+endif()
+
+# Define the per configuration library fallback when not found
+set(_configs RELEASE DEBUG RELWITHDEBINFO MINSIZEREL)
+foreach(_cfg ${_configs})
+ if(NOT RSIMD_LIBRARY_${_cfg})
+ get_property(_doc CACHE RSIMD_LIBRARY_${_cfg} PROPERTY HELPSTRING)
+ set(RSIMD_LIBRARY_${_cfg}
+ ${RSIMD_LIBRARY} CACHE PATH ${_doc} FORCE)
+ endif()
+endforeach()
+
+# Create the imported library target
+add_library(RSIMD SHARED IMPORTED)
+
+# Setup the properties of the imported target
+if(CMAKE_HOST_WIN32)
+ set(_import_prop IMPORTED_IMPLIB)
+else()
+ set(_import_prop IMPORTED_LOCATION)
+endif()
+set_target_properties(RSIMD PROPERTIES
+ ${_import_prop} ${RSIMD_LIBRARY}
+ ${_import_prop}_RELEASE ${RSIMD_LIBRARY_RELEASE}
+ ${_import_prop}_DEBUG ${RSIMD_LIBRARY_DEBUG}
+ ${_import_prop}_RELWITHDEBINFO ${RSIMD_LIBRARY_RELWITHDEBINFO}
+ ${_import_prop}_MINSIZEREL ${RSIMD_LIBRARY_MINSIZEREL}
+ INTERFACE_INCLUDE_DIRECTORIES ${RSIMD_INCLUDE_DIR}
+ INTERFACE_LINK_LIBRARIES Sleef
+ INTERFACE_COMPILE_OPTIONS "${_compile_flags}")
+
+# Check the package
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(RSIMD DEFAULT_MSG
+ RSIMD_INCLUDE_DIR
+ RSIMD_LIBRARY)
+
diff --git a/cmake/RSIMDConfigVersion.cmake.in b/cmake/RSIMDConfigVersion.cmake.in
@@ -0,0 +1,54 @@
+# Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+set(VERSION_MAJOR @VERSION_MAJOR@)
+set(VERSION_MINOR @VERSION_MINOR@)
+set(VERSION_PATCH @VERSION_PATCH@)
+set(PACKAGE_VERSION "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}")
+
+if(NOT PACKAGE_FIND_VERSION
+ OR PACKAGE_VERSION VERSION_EQUAL PACKAGE_FIND_VERSION)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+ return()
+endif()
+
+if(NOT VERSION_MAJOR VERSION_EQUAL PACKAGE_FIND_VERSION_MAJOR)
+ set(PACKAGE_VERSION_COMPATIBLE FALSE)
+ set(PACKAGE_VERSION_EXACT FALSE)
+ set(PACKAGE_VERSION_UNSUITABLE TRUE)
+ return()
+endif()
+
+if(VERSION_MINOR VERSION_LESS PACKAGE_FIND_VERSION_MINOR)
+ set(PACKAGE_VERSION_COMPATIBLE FALSE)
+ set(PACKAGE_VERSION_EXACT FALSE)
+ set(PACKAGE_VERSION_UNSUITABLE TRUE)
+ return()
+endif()
+
+if(VERSION_MINOR VERSION_EQUAL PACKAGE_FIND_VERSION_MINOR)
+ if(VERSION_PATCH VERSION_LESS PACKAGE_FIND_VERSION_PATCH)
+ set(PACKAGE_VERSION_COMPATIBLE FALSE)
+ set(PACKAGE_VERSION_EXACT FALSE)
+ set(PACKAGE_VERSION_UNSUITABLE TRUE)
+ return()
+ endif()
+endif()
+
+set(PACKAGE_VERSION_COMPATIBLE TRUE)
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_UNSUITABLE FALSE)
diff --git a/cmake/SleefConfig.cmake b/cmake/SleefConfig.cmake
@@ -0,0 +1,35 @@
+# Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+#
+# The RSIMD CMake is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# The RSIMD CMake is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the RSIMD CMake. If not, see <http://www.gnu.org/licenses/>.
+
+cmake_minimum_required(VERSION 3.1)
+
+# Look for library header
+find_path(Sleef_INCLUDE_DIR sleef.h)
+
+find_library(Sleef_LIBRARY sleef PATH_SUFFIXES lib64
+ DOC "Path to the sleef library")
+
+# Create the imported library target
+add_library(Sleef SHARED IMPORTED)
+set_target_properties(Sleef PROPERTIES
+ IMPORTED_LOCATION ${Sleef_LIBRARY}
+ INTERFACE_INCLUDE_DIRECTORIES ${Sleef_INCLUDE_DIR})
+
+# Check the package
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(Sleef DEFAULT_MSG
+ Sleef_INCLUDE_DIR
+ Sleef_LIBRARY)
+
diff --git a/src/aosf33.h b/src/aosf33.h
@@ -1,16 +1,16 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#ifndef AOSF33_H
diff --git a/src/aosf44.c b/src/aosf44.c
@@ -1,16 +1,16 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#include "aosf44.h"
diff --git a/src/aosf44.h b/src/aosf44.h
@@ -1,16 +1,16 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#ifndef AOSF44_H
diff --git a/src/aosq.c b/src/aosq.c
@@ -1,16 +1,16 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#include "aosq.h"
diff --git a/src/aosq.h b/src/aosq.h
@@ -1,22 +1,23 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#ifndef AOSQ_H
#define AOSQ_H
#include "rsimd.h"
+#include "math.h"
/*
* Functions on AoS quaternion encoded into a v4f_T as { i, j, k, a }
diff --git a/src/avx/avx.h b/src/avx/avx.h
@@ -0,0 +1,26 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_AVX_H
+#define RSIMD_AVX_H
+
+#include "avxf.h"
+#include "avxi.h"
+
+/* Reinterpret cast */
+static FINLINE v8i_T v8f_rcast_v8i(const v8f_T v) {return _mm256_castps_si256(v);}
+static FINLINE v8f_T v8i_rcast_v8f(const v8i_T v) {return _mm256_castsi256_ps(v);}
+
+#endif /* RSIMD_AVX_H */
diff --git a/src/avx/avxf.h b/src/avx/avxf.h
@@ -0,0 +1,330 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_AVXF_H
+#define RSIMD_AVXF_H
+
+/*
+ * 8 packed single precision floating-point values
+ */
+
+#include "avx.h"
+
+#include <rsys/math.h>
+#include <immintrin.h>
+
+typedef __m256 v8f_T;
+
+/*******************************************************************************
+ * Set operations
+ ******************************************************************************/
+static FINLINE float*
+v8f_store(float dst[8], v8f_T v)
+{
+ ASSERT(dst && IS_ALIGNED(dst, 32));
+ _mm256_store_ps(dst, v);
+ return dst;
+}
+
+static FINLINE v8f_T
+v8f_load(const float src[8])
+{
+ ASSERT(src && IS_ALIGNED(src, 32));
+ return _mm256_load_ps(src);
+}
+
+static FINLINE v8f_T
+v8f_loadu(const float f[8])
+{
+ ASSERT(f);
+ return _mm256_set_ps(f[7], f[6], f[5], f[4], f[3],f[2], f[1], f[0]);
+}
+
+static FINLINE v8f_T
+v8f_set1(const float x)
+{
+ return _mm256_set1_ps(x);
+}
+
+static FINLINE v8f_T
+v8f_set
+ (const float a, const float b, const float c, const float d,
+ const float e, const float f, const float g, const float h)
+{
+ return _mm256_set_ps(h, g, f, e, d, c, b, a);
+}
+
+static FINLINE v8f_T
+v8f_zero(void)
+{
+ return _mm256_setzero_ps();
+}
+
+static FINLINE v8f_T
+v8f_mask
+ (const int32_t a, const int32_t b, const int32_t c, const int32_t d,
+ const int32_t e, const int32_t f, const int32_t g, const int32_t h)
+{
+ return _mm256_castsi256_ps(_mm256_set_epi32(h, g, f, e, d, c, b, a));
+}
+
+static FINLINE v8f_T
+v8f_mask1(const int32_t x)
+{
+ return _mm256_castsi256_ps(_mm256_set1_epi32(x));
+}
+
+static FINLINE v8f_T
+v8f_true(void)
+{
+ return _mm256_castsi256_ps(_mm256_set1_epi32(~0));
+}
+
+static FINLINE v8f_T
+v8f_false(void)
+{
+ return v8f_zero();
+}
+
+/*******************************************************************************
+ * Extract components
+ ******************************************************************************/
+static FINLINE v4f_T
+v8f_abcd(const v8f_T v)
+{
+ return _mm256_extractf128_ps(v, 0);
+}
+
+static FINLINE v4f_T
+v8f_efgh(const v8f_T v)
+{
+ return _mm256_extractf128_ps(v, 1);
+}
+
+static FINLINE int
+v8f_movemask(const v8f_T v)
+{
+ return _mm256_movemask_ps(v);
+}
+
+/*******************************************************************************
+ * Bitwise operations
+ ******************************************************************************/
+static FINLINE v8f_T
+v8f_or(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_or_ps(v0, v1);
+}
+
+static FINLINE v8f_T
+v8f_and(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_and_ps(v0, v1);
+}
+
+static FINLINE v8f_T
+v8f_andnot(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_andnot_ps(v0, v1);
+}
+
+static FINLINE v8f_T
+v8f_xor(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_xor_ps(v0, v1);
+}
+
+static FINLINE v8f_T
+v8f_sel(const v8f_T vfalse, const v8f_T vtrue, const v8f_T vcond)
+{
+ return _mm256_blendv_ps(vfalse, vtrue, vcond);
+}
+
+/*******************************************************************************
+ * Arithmetic operations
+ ******************************************************************************/
+static FINLINE v8f_T
+v8f_minus(const v8f_T v)
+{
+ return v8f_xor(v8f_set1(-0.f), v);
+}
+
+static FINLINE v8f_T
+v8f_add(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_add_ps(v0, v1);
+}
+
+static FINLINE v8f_T
+v8f_sub(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_sub_ps(v0, v1);
+}
+
+static FINLINE v8f_T
+v8f_mul(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_mul_ps(v0, v1);
+}
+
+static FINLINE v8f_T
+v8f_div(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_div_ps(v0, v1);
+}
+
+static FINLINE v8f_T
+v8f_madd(const v8f_T v0, const v8f_T v1, const v8f_T v2)
+{
+ return _mm256_add_ps(_mm256_mul_ps(v0, v1), v2);
+}
+
+static FINLINE v8f_T
+v8f_abs(const v8f_T v)
+{
+ const union { int32_t i; float f; } mask = { 0x7fffffff };
+ return v8f_and(v, v8f_set1(mask.f));
+}
+
+static FINLINE v8f_T
+v8f_sqrt(const v8f_T v)
+{
+ return _mm256_sqrt_ps(v);
+}
+
+static FINLINE v8f_T
+v8f_rsqrte(const v8f_T v)
+{
+ return _mm256_rsqrt_ps(v);
+}
+
+static FINLINE v8f_T
+v8f_rsqrt(const v8f_T v)
+{
+ const v8f_T y = v8f_rsqrte(v);
+ const v8f_T yyv = v8f_mul(v8f_mul(y, y), v);
+ const v8f_T tmp = v8f_sub(v8f_set1(1.5f), v8f_mul(yyv, v8f_set1(0.5f)));
+ return v8f_mul(tmp, y);
+}
+
+static FINLINE v8f_T
+v8f_rcpe(const v8f_T v)
+{
+ return _mm256_rcp_ps(v);
+}
+
+static FINLINE v8f_T
+v8f_rcp(const v8f_T v)
+{
+ const v8f_T y = v8f_rcpe(v);
+ const v8f_T tmp = v8f_sub(v8f_set1(2.f), v8f_mul(y, v));
+ return v8f_mul(tmp, y);
+}
+
+static FINLINE v8f_T
+v8f_lerp(const v8f_T from, const v8f_T to, const v8f_T param)
+{
+ return v8f_madd(v8f_sub(to, from), param, from);
+}
+
+/*******************************************************************************
+ * Comparators
+ ******************************************************************************/
+static FINLINE v8f_T
+v8f_eq(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_cmp_ps(v0, v1, _CMP_EQ_OS);
+}
+
+static FINLINE v8f_T
+v8f_neq(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_cmp_ps(v0, v1, _CMP_NEQ_OS);
+}
+
+static FINLINE v8f_T
+v8f_ge(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_cmp_ps(v0, v1, _CMP_GE_OS);
+}
+
+static FINLINE v8f_T
+v8f_le(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_cmp_ps(v0, v1, _CMP_LE_OS);
+}
+
+static FINLINE v8f_T
+v8f_gt(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_cmp_ps(v0, v1, _CMP_GT_OS);
+}
+
+static FINLINE v8f_T
+v8f_lt(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_cmp_ps(v0, v1, _CMP_LT_OS);
+}
+
+static FINLINE v8f_T
+v8f_eq_eps(const v8f_T v0, const v8f_T v1, const v8f_T eps)
+{
+ return v8f_le(v8f_abs(v8f_sub(v0, v1)), eps);
+}
+
+static FINLINE v8f_T
+v8f_min(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_min_ps(v0, v1);
+}
+
+static FINLINE v8f_T
+v8f_max(const v8f_T v0, const v8f_T v1)
+{
+ return _mm256_max_ps(v0, v1);
+}
+
+static FINLINE float
+v8f_reduce_min(const v8f_T v0)
+{
+ ALIGN(32) float tmp[8];
+ const v8f_T v1 = _mm256_permute_ps(v0, _MM_SHUFFLE(1, 0, 3, 2));
+ const v8f_T v2 = _mm256_min_ps(v0, v1);
+ const v8f_T v3 = _mm256_permute_ps(v2, _MM_SHUFFLE(2, 3, 0, 1));
+ const v8f_T v4 = _mm256_min_ps(v2, v3);
+ _mm256_store_ps(tmp, v4);
+ return MMIN(tmp[0], tmp[4]);
+}
+
+static FINLINE float
+v8f_reduce_max(const v8f_T v0)
+{
+ ALIGN(32) float tmp[8];
+ const v8f_T v1 = _mm256_permute_ps(v0, _MM_SHUFFLE(1, 0, 3, 2));
+ const v8f_T v2 = _mm256_max_ps(v0, v1);
+ const v8f_T v3 = _mm256_permute_ps(v2, _MM_SHUFFLE(2, 3, 0, 1));
+ const v8f_T v4 = _mm256_max_ps(v2, v3);
+ _mm256_store_ps(tmp, v4);
+ return MMAX(tmp[0], tmp[4]);
+}
+
+static FINLINE v8f_T
+v8f_clamp(const v8f_T v, const v8f_T vmin, const v8f_T vmax)
+{
+ return v8f_min(v8f_max(v, vmin), vmax);
+}
+
+#endif /* RSIMD_AVX_H */
+
diff --git a/src/avx/avxi.h b/src/avx/avxi.h
@@ -0,0 +1,204 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_AVXI_H
+#define RSIMD_AVXI_H
+
+/*
+ * 8 packed signed integers
+ */
+
+#include <rsys/math.h>
+#include <immintrin.h>
+
+typedef __m256i v8i_T;
+
+/*******************************************************************************
+ * Set operations
+ ******************************************************************************/
+static FINLINE int32_t*
+v8i_store(int32_t dst[8], v8i_T v)
+{
+ ASSERT(dst && IS_ALIGNED(dst, 32));
+ _mm256_store_si256((v8i_T*)dst, v);
+ return dst;
+}
+
+static FINLINE v8i_T
+v8i_load(const int32_t src[8])
+{
+ ASSERT(src && IS_ALIGNED(src, 32));
+ return _mm256_load_si256((const v8i_T*)src);
+}
+
+static FINLINE v8i_T
+v8i_set1(const int32_t i)
+{
+ return _mm256_set1_epi32(i);
+}
+
+static FINLINE v8i_T
+v8i_set
+ (const int32_t a, const int32_t b, const int32_t c, const int32_t d,
+ const int32_t e, const int32_t f, const int32_t g, const int32_t h)
+{
+ return _mm256_set_epi32(h, g, f, e, d, c, b, a);
+}
+
+static FINLINE v8i_T
+v8i_zero(void)
+{
+ return _mm256_setzero_si256();
+}
+
+static FINLINE v8i_T
+v8i_set_v4i(const v4i_T abcd, const v4i_T efgh)
+{
+ v8i_T tmp = v8i_zero();
+ tmp = _mm256_insertf128_si256(tmp, abcd, 0);
+ tmp = _mm256_insertf128_si256(tmp, efgh, 1);
+ return tmp;
+}
+
+/*******************************************************************************
+ * Extract components
+ ******************************************************************************/
+static FINLINE v4i_T
+v8i_abcd(const v8i_T v)
+{
+ return _mm256_extractf128_si256(v, 0);
+}
+
+static FINLINE v4i_T
+v8i_efgh(const v8i_T v)
+{
+ return _mm256_extractf128_si256(v, 1);
+}
+
+/*******************************************************************************
+ * Bitwise operators
+ ******************************************************************************/
+static FINLINE v8i_T
+v8i_or(const v8i_T v0, const v8i_T v1)
+{
+ const v8f_T a = _mm256_castsi256_ps(v0);
+ const v8f_T b = _mm256_castsi256_ps(v1);
+ const v8f_T c = _mm256_or_ps(a, b);
+ return _mm256_castps_si256(c);
+}
+
+static FINLINE v8i_T
+v8i_and(const v8i_T v0, const v8i_T v1)
+{
+ const v8f_T a = _mm256_castsi256_ps(v0);
+ const v8f_T b = _mm256_castsi256_ps(v1);
+ const v8f_T c = _mm256_and_ps(a, b);
+ return _mm256_castps_si256(c);
+}
+
+static FINLINE v8i_T
+v8i_andnot(const v8i_T v0, const v8i_T v1)
+{
+ const v8f_T a = _mm256_castsi256_ps(v0);
+ const v8f_T b = _mm256_castsi256_ps(v1);
+ const v8f_T c = _mm256_andnot_ps(a, b);
+ return _mm256_castps_si256(c);
+}
+
+static FINLINE v8i_T
+v8i_xor(const v8i_T v0, const v8i_T v1)
+{
+ const v8f_T a = _mm256_castsi256_ps(v0);
+ const v8f_T b = _mm256_castsi256_ps(v1);
+ const v8f_T c = _mm256_xor_ps(a, b);
+ return _mm256_castps_si256(c);
+}
+
+/*******************************************************************************
+ * Comparators
+ ******************************************************************************/
+static FINLINE v8i_T
+v8i_eq(const v8i_T v0, const v8i_T v1)
+{
+ const v4i_T v0_abcd = v8i_abcd(v0);
+ const v4i_T v0_efgh = v8i_efgh(v0);
+ const v4i_T v1_abcd = v8i_abcd(v1);
+ const v4i_T v1_efgh = v8i_efgh(v1);
+ const v4i_T abcd = v4i_eq(v0_abcd, v1_abcd);
+ const v4i_T efgh = v4i_eq(v0_efgh, v1_efgh);
+ return v8i_set_v4i(abcd, efgh);
+}
+
+static FINLINE v8i_T
+v8i_neq(const v8i_T v0, const v8i_T v1)
+{
+ const v4i_T v0_abcd = v8i_abcd(v0);
+ const v4i_T v0_efgh = v8i_efgh(v0);
+ const v4i_T v1_abcd = v8i_abcd(v1);
+ const v4i_T v1_efgh = v8i_efgh(v1);
+ const v4i_T abcd = v4i_neq(v0_abcd, v1_abcd);
+ const v4i_T efgh = v4i_neq(v0_efgh, v1_efgh);
+ return v8i_set_v4i(abcd, efgh);
+}
+
+static FINLINE v8i_T
+v8i_sel(const v8i_T vfalse, const v8i_T vtrue, const v8i_T vcond)
+{
+ const v8f_T a = _mm256_castsi256_ps(vfalse);
+ const v8f_T b = _mm256_castsi256_ps(vtrue);
+ const v8f_T c = _mm256_castsi256_ps(vcond);
+ return _mm256_castps_si256(_mm256_blendv_ps(a, b, c));
+}
+
+static FINLINE v8i_T
+v8i_min(const v8i_T v0, const v8i_T v1)
+{
+ const v4i_T v0_abcd = v8i_abcd(v0);
+ const v4i_T v0_efgh = v8i_efgh(v0);
+ const v4i_T v1_abcd = v8i_abcd(v1);
+ const v4i_T v1_efgh = v8i_efgh(v1);
+ const v4i_T abcd = v4i_min(v0_abcd, v1_abcd);
+ const v4i_T efgh = v4i_min(v0_efgh, v1_efgh);
+ return v8i_set_v4i(abcd, efgh);
+}
+
+static FINLINE v8i_T
+v8i_max(const v8i_T v0, const v8i_T v1)
+{
+ const v4i_T v0_abcd = v8i_abcd(v0);
+ const v4i_T v0_efgh = v8i_efgh(v0);
+ const v4i_T v1_abcd = v8i_abcd(v1);
+ const v4i_T v1_efgh = v8i_efgh(v1);
+ const v4i_T abcd = v4i_max(v0_abcd, v1_abcd);
+ const v4i_T efgh = v4i_max(v0_efgh, v1_efgh);
+ return v8i_set_v4i(abcd, efgh);
+}
+
+static FINLINE int32_t
+v8i_reduce_min_i32(const v8i_T v)
+{
+ const v4i_T tmp = v4i_min(v8i_abcd(v), v8i_efgh(v));
+ return v4i_x(v4i_reduce_min(tmp));
+}
+
+static FINLINE int32_t
+v8i_reduce_max_i32(const v8i_T v)
+{
+ const v4i_T tmp = v4i_max(v8i_abcd(v), v8i_efgh(v));
+ return v4i_x(v4i_reduce_max(tmp));
+}
+
+#endif /* RSIMD_AVXI_H */
+
diff --git a/src/math.h b/src/math.h
@@ -0,0 +1,29 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_MATH_H
+#define RSIMD_MATH_H
+
+#include <rsys/rsys.h>
+
+#ifdef SIMD_SSE2
+ #include "math4.h"
+#endif
+#ifdef SIMD_AVX
+ #include "math8.h"
+#endif
+
+#endif /* RSIMD_MATH_H */
+
diff --git a/src/math4.h b/src/math4.h
@@ -0,0 +1,41 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_MATH4_H
+#define RSIMD_MATH4_H
+
+#define RSIMD_WIDTH__ 4
+#include "vXf_begin.h"
+#include "mathX.h"
+#include "vXf_end.h"
+
+/*******************************************************************************
+ * Miscellaneous
+ ******************************************************************************/
+static FINLINE v4f_T /* Cartesian (xyz) to spherical (r, theta, phi)*/
+v4f_xyz_to_rthetaphi(const v4f_T v)
+{
+ const v4f_T zero = v4f_zero();
+ const v4f_T len2 = v4f_len2(v);
+ const v4f_T len3 = v4f_len3(v);
+ const v4f_T theta = v4f_sel
+ (v4f_acos(v4f_div(v4f_zzzz(v), len3)), zero, v4f_eq(len3, zero));
+ const v4f_T tmp_phi = v4f_sel
+ (v4f_asin(v4f_div(v4f_yyyy(v), len2)), zero, v4f_eq(len2, zero));
+ const v4f_T phi = v4f_sel
+ (v4f_sub(v4f_set1((float)PI), tmp_phi),tmp_phi, v4f_ge(v4f_xxxx(v), zero));
+ return v4f_xyab(v4f_xayb(len3, theta), phi);
+}
+#endif /* RSIMD_MATH4_H */
diff --git a/src/math8.h b/src/math8.h
@@ -0,0 +1,24 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_MATH8_H
+#define RSIMD_MATH8_H
+
+#define RSIMD_WIDTH__ 8
+#include "vXf_begin.h"
+#include "mathX.h"
+#include "vXf_end.h"
+
+#endif /* RSIMD_MATH8_H */
diff --git a/src/mathX.h b/src/mathX.h
@@ -0,0 +1,137 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "rsimd.h"
+
+#ifdef COMPILER_GCC
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wignored-qualifiers"
+#endif
+
+#include <sleef.h>
+
+#ifdef COMPILER_GCC
+ #pragma GCC diagnostic pop
+#endif
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_vXf__(copysign)(const RSIMD_vXf_T__ x, const RSIMD_vXf_T__ y)
+{
+ return RSIMD_Sleef__(copysignf)(x, y);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(floor)(const RSIMD_vXf_T__ x)
+{
+ return RSIMD_Sleef__(floorf)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(pow)(const RSIMD_vXf_T__ x, const RSIMD_vXf_T__ y)
+{
+ return RSIMD_Sleef_ULP__(powf, u10)(x, y);
+}
+
+/*******************************************************************************
+ * Exponentatial functions
+ ******************************************************************************/
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(exp2)(const RSIMD_vXf_T__ x)
+{
+ return RSIMD_Sleef_ULP__(exp2f, u10)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(exp)(const RSIMD_vXf_T__ x)
+{
+ return RSIMD_Sleef_ULP__(expf, u10)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(exp10)(const RSIMD_vXf_T__ x)
+{
+ return RSIMD_Sleef_ULP__(exp10f, u10)(x);
+}
+
+/*******************************************************************************
+ * Log functions
+ ******************************************************************************/
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(log2)(const RSIMD_vXf_T__ x)
+{
+ return RSIMD_Sleef_ULP__(log2f, u10)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(log)(const RSIMD_vXf_T__ x)
+{
+ return RSIMD_Sleef_ULP__(logf, u10)(x);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(log10)(const RSIMD_vXf_T__ x)
+{
+ return RSIMD_Sleef_ULP__(log10f, u10)(x);
+}
+
+/*******************************************************************************
+ * Trigonometric functions
+ ******************************************************************************/
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(sin)(const RSIMD_vXf_T__ v)
+{
+ return RSIMD_Sleef_ULP__(sinf, u10)(v);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(asin)(const RSIMD_vXf_T__ v)
+{
+ return RSIMD_Sleef_ULP__(asinf, u10)(v);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(cos)(const RSIMD_vXf_T__ v)
+{
+ return RSIMD_Sleef_ULP__(cosf, u10)(v);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(acos)(const RSIMD_vXf_T__ v)
+{
+ return RSIMD_Sleef_ULP__(acosf, u10)(v);
+}
+
+static INLINE void
+RSIMD_vXf__(sincos)
+ (const RSIMD_vXf_T__ v, RSIMD_vXf_T__* RESTRICT s, RSIMD_vXf_T__* RESTRICT c)
+{
+ const RSIMD_Sleef_vecf__(2) r = RSIMD_Sleef_ULP__(sincosf, u10)(v);
+ *s = r.x;
+ *c = r.y;
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(tan)(const RSIMD_vXf_T__ v)
+{
+ return RSIMD_Sleef_ULP__(tanf, u10)(v);
+}
+
+static INLINE RSIMD_vXf_T__
+RSIMD_vXf__(atan)(const RSIMD_vXf_T__ v)
+{
+ return RSIMD_Sleef_ULP__(atanf, u10)(v);
+}
+
+
diff --git a/src/rsimd.h b/src/rsimd.h
@@ -1,16 +1,16 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#ifndef RSIMD_H
@@ -20,7 +20,7 @@
#if defined(RSIMD_SHARED_BUILD)
#define RSIMD_API extern EXPORT_SYM
-#elif defined(RSIMD_STATIC)
+#elif defined(RSIMD_STATIC_BUILD)
#define RSIMD_API extern LOCAL_SYM
#else
#define RSIMD_API extern IMPORT_SYM
@@ -28,8 +28,9 @@
#ifdef SIMD_SSE2
#include "sse/sse.h"
-#else
- #error Unsupported_Platform
+#endif
+#ifdef SIMD_AVX
+ #include "avx/avx.h"
#endif
#endif /* RSIMD_H */
diff --git a/src/rsimd_version.h.in b/src/rsimd_version.h.in
@@ -0,0 +1,23 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_VERSION_H
+#define RSIMD_VERSION_H
+
+#define RSIMD_VERSION_MAJOR @VERSION_MAJOR@
+#define RSIMD_VERSION_MINOR @VERSION_MINOR@
+#define RSIMD_VERSION_PATCH @VERSION_PATCH@
+
+#endif /* RSIMD_VERSION_H */
diff --git a/src/soa4f2.h b/src/soa4f2.h
@@ -1,30 +1,22 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#ifndef SOA4F2_H
#define SOA4F2_H
-/* Generate the common soa4fX funcs */
-#define SOA4FX_DIMENSION__ 2
-#include "soa4fX.h"
-
-static FINLINE v4f_T
-soa4f2_cross(const v4f_T a[2], const v4f_T b[2])
-{
- ASSERT(a && b);
- return v4f_sub(v4f_mul(a[0], b[1]), v4f_mul(a[1], b[0]));
-}
+#define RSIMD_WIDTH__ 4
+#include "soaXf2.h"
#endif /* SOA4F2_H */
diff --git a/src/soa4f3.h b/src/soa4f3.h
@@ -1,34 +1,22 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#ifndef SOA4F3_H
#define SOA4F3_H
-/* Generate the common soa4fX functions */
-#define SOA4FX_DIMENSION__ 3
-#include "soa4fX.h"
-
-static FINLINE v4f_T*
-soa4f3_cross(v4f_T dst[3], const v4f_T a[3], const v4f_T b[3])
-{
- v4f_T tmp[3];
- ASSERT(dst && a && b);
- tmp[0] = v4f_sub(v4f_mul(a[1], b[2]), v4f_mul(a[2], b[1]));
- tmp[1] = v4f_sub(v4f_mul(a[2], b[0]), v4f_mul(a[0], b[2]));
- tmp[2] = v4f_sub(v4f_mul(a[0], b[1]), v4f_mul(a[1], b[0]));
- return soa4f3_set__(dst, tmp);
-}
+#define RSIMD_WIDTH__ 4
+#include "soaXf3.h"
#endif /* SOA4F3_H */
diff --git a/src/soa4f4.h b/src/soa4f4.h
@@ -1,24 +1,27 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#ifndef SOA4F4_H
#define SOA4F4_H
-/* Generate the common soa4fX functions */
-#define SOA4FX_DIMENSION__ 4
-#include "soa4fX.h"
+/* Generate the common soa4f4 functions */
+#define RSIMD_WIDTH__ 4
+#define RSIMD_SOA_DIMENSION__ 4
+#include "soaXfY_begin.h"
+#include "soaXfY.h"
+#include "soaXfY_end.h"
#endif /* SOA4F4_H */
diff --git a/src/soa4fX.h b/src/soa4fX.h
@@ -1,352 +0,0 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
- *
- * The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * The RSIMD library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-
-/*
- * Header used to generate funcs on SoA SIMD float vectors of X dimensions
- */
-#if !defined(SOA4FX_DIMENSION__)
- #error Missing arguments
-#endif
-
-#if defined(SOA4FX_FUNC__)
- #error Unexpected SOA4FX_FUNC__ macro defintion
-#endif
-
-#include "rsimd.h"
-
-#ifdef COMPILER_GCC
- #pragma GCC push_options
- #pragma GCC optimize("unroll-loops")
-#endif
-
-STATIC_ASSERT(SOA4FX_DIMENSION__ > 1, Unexpected_value);
-
-#define SOA4FX_FUNC__(Func) \
- CONCAT(CONCAT(CONCAT(soa4f, SOA4FX_DIMENSION__), _), Func)
-
-/* Helper macro */
-#define SIZEOF_SOA4FX__ sizeof(v4f_T[SOA4FX_DIMENSION__])
-
-#if SOA4FX_DIMENSION__ <= 4
-static FINLINE v4f_T*
-CONCAT(soa4f, SOA4FX_DIMENSION__)
- (v4f_T* dst
- ,const v4f_T x
- ,const v4f_T y
-#if SOA4FX_DIMENSION__ > 2
- ,const v4f_T z
-#endif
-#if SOA4FX_DIMENSION__ > 3
- ,const v4f_T w
-#endif
- )
-{
- ASSERT(dst);
- dst[0] = x;
- dst[1] = y;
-#if SOA4FX_DIMENSION__ > 2
- dst[2] = z;
-#endif
-#if SOA4FX_DIMENSION__ > 3
- dst[3] = w;
-#endif
- return dst;
-}
-#endif
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(splat)(v4f_T* dst, const v4f_T val)
-{
- int i;
- ASSERT(dst);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- dst[i] = val;
- return dst;
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(set__)(v4f_T* dst, const v4f_T* src)
-{
- int i;
- ASSERT(dst && src);
- ASSERT(!MEM_AREA_OVERLAP(dst, SIZEOF_SOA4FX__, src, SIZEOF_SOA4FX__));
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- dst[i] = src[i];
- return dst;
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(set)(v4f_T* dst, const v4f_T* src)
-{
- ASSERT(dst && src);
- if(!MEM_AREA_OVERLAP(dst, SIZEOF_SOA4FX__, src, SIZEOF_SOA4FX__)) {
- return SOA4FX_FUNC__(set__)(dst, src);
- } else {
- v4f_T tmp[SOA4FX_DIMENSION__];
- return SOA4FX_FUNC__(set__)(dst, SOA4FX_FUNC__(set__)(tmp, src));
- }
-}
-
-static FINLINE v4f_T
-SOA4FX_FUNC__(dot)(const v4f_T* a, const v4f_T* b)
-{
- v4f_T dot;
- int i;
- ASSERT(a && b);
- dot = v4f_mul(a[0], b[0]);
- FOR_EACH(i, 1, SOA4FX_DIMENSION__) {
- dot = v4f_add(dot, v4f_mul(a[i], b[i]));
- }
- return dot;
-}
-
-static FINLINE v4f_T
-SOA4FX_FUNC__(len)(const v4f_T* a)
-{
- ASSERT(a);
- return v4f_sqrt(SOA4FX_FUNC__(dot)(a, a));
-}
-
-static FINLINE v4f_T
-SOA4FX_FUNC__(normalize)(v4f_T* dst, const v4f_T* a)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- v4f_T sqr_len, rcp_len;
- v4f_T mask;
- int i;
- ASSERT(dst && a);
-
- sqr_len = SOA4FX_FUNC__(dot)(a, a);
- mask = v4f_neq(sqr_len, v4f_zero());
- rcp_len = v4f_rsqrt(sqr_len);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_and(mask, v4f_mul(a[i], rcp_len));
- SOA4FX_FUNC__(set__)(dst, tmp);
- return v4f_mul(sqr_len, rcp_len);
-}
-
-static FINLINE v4f_T
-SOA4FX_FUNC__(is_normalized)(const v4f_T* a)
-{
- return v4f_eq_eps(SOA4FX_FUNC__(len)(a), v4f_set1(1.f), v4f_set1(1.e-6f));
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(add)(v4f_T* dst, const v4f_T* a, const v4f_T* b)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a && b);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_add(a[i], b[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(addf)(v4f_T* dst, const v4f_T* a, const v4f_T f)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_add(a[i], f);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(sub)(v4f_T* dst, const v4f_T* a, const v4f_T* b)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a && b);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_sub(a[i], b[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(subf)(v4f_T* dst, const v4f_T* a, const v4f_T f)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_sub(a[i], f);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(mul)(v4f_T* dst, const v4f_T* a, const v4f_T* b)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a && b);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_mul(a[i], b[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(mulf)(v4f_T* dst, const v4f_T* a, const v4f_T f)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_mul(a[i], f);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(div)(v4f_T* dst, const v4f_T* a, const v4f_T* b)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a && b);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_div(a[i], b[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(divf)(v4f_T* dst, const v4f_T* a, const v4f_T f)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_div(a[i], f);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(minus)(v4f_T* dst, const v4f_T* a)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_minus(a[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T
-SOA4FX_FUNC__(sum)(const v4f_T* a)
-{
- v4f_T f;
- int i = 0;
- ASSERT(a);
- f = a[i];
- FOR_EACH(i, 1, SOA4FX_DIMENSION__)
- f = v4f_add(f, a[i]);
- return f;
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(lerp)
- (v4f_T* dst,
- const v4f_T* from,
- const v4f_T* to,
- const v4f_T t)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- v4f_T t_adjusted;
- int i;
- ASSERT(dst && from && to);
- t_adjusted = v4f_min(v4f_max(t, v4f_zero()), v4f_set1(1.f));
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_add(from[i], v4f_mul(t_adjusted, v4f_sub(to[i], from[i])));
- SOA4FX_FUNC__(set__)(dst, tmp);
- return dst;
-}
-
-static FINLINE v4f_T
-SOA4FX_FUNC__(eq)(const v4f_T* a, const v4f_T* b)
-{
- v4f_T is_eq;
- int i = 0;
- ASSERT(a && b);
- is_eq = v4f_eq(a[0], b[0]);
- FOR_EACH(i, 1, SOA4FX_DIMENSION__)
- is_eq = v4f_and(is_eq, v4f_eq(a[i], b[i]));
- return is_eq;
-}
-
-static FINLINE v4f_T
-SOA4FX_FUNC__(eq_eps)(const v4f_T* a, const v4f_T* b, const v4f_T eps)
-{
- v4f_T is_eq;
- int i = 0;
- ASSERT(a && b);
- is_eq = v4f_eq_eps(a[0], b[0], eps);
- FOR_EACH(i, 1, SOA4FX_DIMENSION__)
- is_eq = v4f_and(is_eq, v4f_eq_eps(a[i], b[i], eps));
- return is_eq;
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(max)(v4f_T* dst, const v4f_T* a, const v4f_T* b)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a && b);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_max(a[i], b[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(min)(v4f_T* dst, const v4f_T* a, const v4f_T* b)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a && b);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_min(a[i], b[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(sel)
- (v4f_T* dst, const v4f_T* vfalse, const v4f_T* vtrue, const v4f_T cond)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && vfalse && vtrue);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_sel(vfalse[i], vtrue[i], cond);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(selv)
- (v4f_T* dst, const v4f_T* vfalse, const v4f_T* vtrue, const v4f_T* vcond)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && vfalse && vtrue);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_sel(vfalse[i], vtrue[i], vcond[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-#undef SIZEOF_SOA4FX__
-#undef SOA4FX_DIMENSION__
-#undef SOA4FX_FUNC__
-
-#ifdef COMPILER_GCC
- #pragma GCC pop_options
-#endif
-
diff --git a/src/soa8f2.h b/src/soa8f2.h
@@ -0,0 +1,22 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef SOA8F2_H
+#define SOA8F2_H
+
+#define RSIMD_WIDTH__ 8
+#include "soaXf2.h"
+
+#endif /* SOA8F2_H */
diff --git a/src/soa8f3.h b/src/soa8f3.h
@@ -0,0 +1,22 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef SOA8F3_H
+#define SOA8F3_H
+
+#define RSIMD_WIDTH__ 8
+#include "soaXf3.h"
+
+#endif /* SOA8F3_H */
diff --git a/src/soa8f4.h b/src/soa8f4.h
@@ -0,0 +1,27 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef SOA8F4_H
+#define SOA8F4_H
+
+/* Generate the common soa4f4 functions */
+#define RSIMD_WIDTH__ 8
+#define RSIMD_SOA_DIMENSION__ 4
+#include "soaXfY_begin.h"
+#include "soaXfY.h"
+#include "soaXfY_end.h"
+
+#endif /* SOA8F4_H */
+
diff --git a/src/soaXf2.h b/src/soaXf2.h
@@ -0,0 +1,33 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_WIDTH__
+ #error "Undefined RSIMD_WIDTH__ macro"
+#endif
+
+#define RSIMD_SOA_DIMENSION__ 2
+#include "soaXfY_begin.h"
+#include "soaXfY.h"
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(cross)(const RSIMD_vXf_T__ a[2], const RSIMD_vXf_T__ b[2])
+{
+ ASSERT(a && b);
+ return RSIMD_vXf__(sub)
+ (RSIMD_vXf__(mul)(a[0], b[1]),
+ RSIMD_vXf__(mul)(a[1], b[0]));
+}
+
+#include "soaXfY_end.h"
diff --git a/src/soaXf3.h b/src/soaXf3.h
@@ -0,0 +1,38 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef RSIMD_WIDTH__
+ #error "Undefined RSIMD_WIDTH__ macro"
+#endif
+
+#define RSIMD_SOA_DIMENSION__ 3
+#include "soaXfY_begin.h"
+#include "soaXfY.h"
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(cross)
+ (RSIMD_vXf_T__ dst[3],
+ const RSIMD_vXf_T__ a[3],
+ const RSIMD_vXf_T__ b[3])
+{
+ RSIMD_vXf_T__ tmp[3];
+ ASSERT(dst && a && b);
+ tmp[0] = RSIMD_vXf__(sub)(RSIMD_vXf__(mul)(a[1], b[2]), RSIMD_vXf__(mul)(a[2], b[1]));
+ tmp[1] = RSIMD_vXf__(sub)(RSIMD_vXf__(mul)(a[2], b[0]), RSIMD_vXf__(mul)(a[0], b[2]));
+ tmp[2] = RSIMD_vXf__(sub)(RSIMD_vXf__(mul)(a[0], b[1]), RSIMD_vXf__(mul)(a[1], b[0]));
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+#include "soaXfY_end.h"
diff --git a/src/soaXfY.h b/src/soaXfY.h
@@ -0,0 +1,356 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+/*
+ * Header used to generate funcs on SoA SIMD float vectors of Y dimensions
+ */
+
+#ifndef SOAXFY_BEGIN_H
+ #error "The soaXfY_begin.h header must be included first"
+#endif
+
+/* Force GCC to unroll the loops */
+#ifdef COMPILER_GCC
+ #pragma GCC push_options
+ #pragma GCC optimize("unroll-loops")
+#endif
+
+#if RSIMD_SOA_DIMENSION__ <= 4
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY_PREFIX__
+ (RSIMD_vXf_T__* dst
+ ,const RSIMD_vXf_T__ x
+ ,const RSIMD_vXf_T__ y
+#if RSIMD_SOA_DIMENSION__ > 2
+ ,const RSIMD_vXf_T__ z
+#endif
+#if RSIMD_SOA_DIMENSION__ > 3
+ ,const RSIMD_vXf_T__ w
+#endif
+ )
+{
+ ASSERT(dst);
+ dst[0] = x;
+ dst[1] = y;
+#if RSIMD_SOA_DIMENSION__ > 2
+ dst[2] = z;
+#endif
+#if RSIMD_SOA_DIMENSION__ > 3
+ dst[3] = w;
+#endif
+ return dst;
+}
+#endif
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(splat)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__ val)
+{
+ int i;
+ ASSERT(dst);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ dst[i] = val;
+ return dst;
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(set__)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* src)
+{
+ int i;
+ ASSERT(dst && src);
+ ASSERT(!MEM_AREA_OVERLAP(dst, SIZEOF_RSIMD_soaXfY__, src, SIZEOF_RSIMD_soaXfY__));
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ dst[i] = src[i];
+ return dst;
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(set)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* src)
+{
+ ASSERT(dst && src);
+ if(!MEM_AREA_OVERLAP(dst, SIZEOF_RSIMD_soaXfY__, src, SIZEOF_RSIMD_soaXfY__)) {
+ return RSIMD_soaXfY__(set__)(dst, src);
+ } else {
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ return RSIMD_soaXfY__(set__)(dst, RSIMD_soaXfY__(set__)(tmp, src));
+ }
+}
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(dot)(const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ dot;
+ int i;
+ ASSERT(a && b);
+ dot = RSIMD_vXf__(mul)(a[0], b[0]);
+ FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__) {
+ dot = RSIMD_vXf__(madd)(a[i], b[i], dot);
+ }
+ return dot;
+}
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(len)(const RSIMD_vXf_T__* a)
+{
+ ASSERT(a);
+ return RSIMD_vXf__(sqrt)(RSIMD_soaXfY__(dot)(a, a));
+}
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(normalize)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ RSIMD_vXf_T__ sqr_len, rcp_len;
+ RSIMD_vXf_T__ mask;
+ int i;
+ ASSERT(dst && a);
+
+ sqr_len = RSIMD_soaXfY__(dot)(a, a);
+ mask = RSIMD_vXf__(neq)(sqr_len, RSIMD_vXf__(zero)());
+ rcp_len = RSIMD_vXf__(rsqrt)(sqr_len);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(and)(mask, RSIMD_vXf__(mul)(a[i], rcp_len));
+ RSIMD_soaXfY__(set__)(dst, tmp);
+ return RSIMD_vXf__(mul)(sqr_len, rcp_len);
+}
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(is_normalized)(const RSIMD_vXf_T__* a)
+{
+ return RSIMD_vXf__(eq_eps)
+ (RSIMD_soaXfY__(len)(a),
+ RSIMD_vXf__(set1)(1.f),
+ RSIMD_vXf__(set1)(1.e-6f));
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(add)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a && b);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(add)(a[i], b[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(addf)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(add)(a[i], f);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(sub)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a && b);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(sub)(a[i], b[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(subf)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(sub)(a[i], f);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(mul)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a && b);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(mul)(a[i], b[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(mulf)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(mul)(a[i], f);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(div)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a && b);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(div)(a[i], b[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(divf)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(div)(a[i], f);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(minus)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(minus)(a[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(sum)(const RSIMD_vXf_T__* a)
+{
+ RSIMD_vXf_T__ f;
+ int i = 0;
+ ASSERT(a);
+ f = a[i];
+ FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__)
+ f = RSIMD_vXf__(add)(f, a[i]);
+ return f;
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(lerp)
+ (RSIMD_vXf_T__* dst,
+ const RSIMD_vXf_T__* from,
+ const RSIMD_vXf_T__* to,
+ const RSIMD_vXf_T__ t)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && from && to);
+
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(lerp)(from[i], to[i], t);
+ RSIMD_soaXfY__(set__)(dst, tmp);
+ return dst;
+}
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(eq)(const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ is_eq;
+ int i = 0;
+ ASSERT(a && b);
+ is_eq = RSIMD_vXf__(eq)(a[0], b[0]);
+ FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__)
+ is_eq = RSIMD_vXf__(and)(is_eq, RSIMD_vXf__(eq)(a[i], b[i]));
+ return is_eq;
+}
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(eq_eps)
+ (const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b, const RSIMD_vXf_T__ eps)
+{
+ RSIMD_vXf_T__ is_eq;
+ int i = 0;
+ ASSERT(a && b);
+ is_eq = RSIMD_vXf__(eq_eps)(a[0], b[0], eps);
+ FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__)
+ is_eq = RSIMD_vXf__(and)(is_eq, RSIMD_vXf__(eq_eps)(a[i], b[i], eps));
+ return is_eq;
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(max)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a && b);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(max)(a[i], b[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(min)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a && b);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(min)(a[i], b[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(sel)
+ (RSIMD_vXf_T__* dst,
+ const RSIMD_vXf_T__* vfalse,
+ const RSIMD_vXf_T__* vtrue,
+ const RSIMD_vXf_T__ cond)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && vfalse && vtrue);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(sel)(vfalse[i], vtrue[i], cond);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(selv)
+ (RSIMD_vXf_T__* dst,
+ const RSIMD_vXf_T__* vfalse,
+ const RSIMD_vXf_T__* vtrue,
+ const RSIMD_vXf_T__* vcond)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && vfalse && vtrue);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(sel)(vfalse[i], vtrue[i], vcond[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+/* Restore compilation parameters */
+#ifdef COMPILER_GCC
+ #pragma GCC pop_options
+#endif
+
diff --git a/src/soaXfY_begin.h b/src/soaXfY_begin.h
@@ -0,0 +1,51 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "rsimd.h"
+#include "vXf_begin.h"
+
+/* This file can be included once */
+#ifdef SOAXFY_BEGIN_H
+ #error "The soaXfY_begin.h header is already included"
+#endif
+#define SOAXFY_BEGIN_H
+
+/* Check parameters */
+#if !defined(RSIMD_SOA_DIMENSION__)
+ #error "Undefined RSIMD_SOA_DIMENSION__ macro"
+#endif
+#if !defined(RSIMD_WIDTH__)
+ #error "Undefined RSIMD_WIDTH__ macro"
+#endif
+#if RSIMD_SOA_DIMENSION__ < 1 || RSIMD_SOA_DIMENSION__ > 4
+ #error "Unexpected RSIMD_SOA_DIMENSION__ value"
+#endif
+#if RSIMD_WIDTH__ != 4 && RSIMD_WIDTH__ != 8
+ #error "Unexpected RSIMD_WIDTH__ value of "STR(RSIMD_WIDTH__)
+#endif
+
+/* Check that internal macros are not already defined */
+#if defined(RSIMD_soaXfY_PREFIX__) \
+ || defined(RSIMD_soaXfY__) \
+ || defined(SIZEOF_RSIMD_soaXfY__)
+ #error "Unexpected macro definition"
+#endif
+
+/* Macros genric to RSIMD_WIDTH__ and RSIMD_SOA_DIMENSION__ */
+#define RSIMD_soaXfY_PREFIX__ \
+ CONCAT(CONCAT(CONCAT(soa, RSIMD_WIDTH__), f), RSIMD_SOA_DIMENSION__)
+#define RSIMD_soaXfY__(Func) CONCAT(CONCAT(RSIMD_soaXfY_PREFIX__, _), Func)
+#define SIZEOF_RSIMD_soaXfY__ sizeof(RSIMD_vXf_T__[RSIMD_SOA_DIMENSION__])
+
diff --git a/src/soaXfY_end.h b/src/soaXfY_end.h
@@ -0,0 +1,31 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef SOAXFY_BEGIN_H
+ #error "The soaXfY_begin.h file must be included"
+#endif
+
+/* Undef helper macros */
+#undef RSIMD_soaXfY_PREFIX__
+#undef RSIMD_soaXfY__
+#undef SIZEOF_RSIMD_soaXfY__
+
+/* Undef parameters */
+#undef RSIMD_SOA_DIMENSION__
+#undef RSIMD_WIDTH__
+
+#undef SOAXFY_BEGIN_H
+
+#include "vXf_end.h"
diff --git a/src/sse/sse.h b/src/sse/sse.h
@@ -1,16 +1,16 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#ifndef RSIMD_SSE_H
diff --git a/src/sse/sse_swz.h b/src/sse/sse_swz.h
@@ -1,16 +1,16 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#ifndef RSIMD_SSE_SWZ_H
diff --git a/src/sse/ssef.c b/src/sse/ssef.c
@@ -1,150 +0,0 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
- *
- * The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * The RSIMD library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-
-#include "../rsimd.h"
-
-#define KC0 v4f_set1(0.63661977236f)
-#define KC1 v4f_set1(1.57079625129f)
-#define KC2 v4f_set1(7.54978995489e-8f)
-#define CC0 v4f_set1(-0.0013602249f)
-#define CC1 v4f_set1(0.0416566950f)
-#define CC2 v4f_set1(-0.4999990225f)
-#define SC0 v4f_set1(-0.0001950727f)
-#define SC1 v4f_set1(0.0083320758f)
-#define SC2 v4f_set1(-0.1666665247f)
-#define ONE v4f_set1(1.f)
-
-v4f_T
-v4f_sin(const v4f_T v)
-{
- const v4i_T zeroi = v4i_zero();
- const v4i_T onei = v4i_set1(1);
- const v4i_T twoi = v4i_set1(2);
- const v4i_T threei = v4i_set1(3);
-
- const v4f_T x = v4f_mul(v, KC0);
- const v4i_T q = v4f_to_v4i(x);
- const v4i_T off = v4i_and(q, threei);
- const v4f_T qf = v4i_to_v4f(q);
-
- const v4f_T tmp = v4f_sub(v, v4f_mul(qf, KC1));
- const v4f_T xl = v4f_sub(tmp, v4f_mul(qf, KC2));
- const v4f_T xl2 = v4f_mul(xl, xl);
- const v4f_T xl3 = v4f_mul(xl2, xl);
-
- const v4f_T cx =
- v4f_madd(v4f_madd(v4f_madd(CC0, xl2, CC1), xl2, CC2), xl2, ONE);
- const v4f_T sx =
- v4f_madd(v4f_madd(v4f_madd(SC0, xl2, SC1), xl2, SC2), xl3, xl);
-
- const v4f_T mask0 = (v4f_T) v4i_eq(v4i_and(off, onei), zeroi);
- const v4f_T mask1 = (v4f_T) v4i_eq(v4i_and(off, twoi), zeroi);
- const v4f_T res = v4f_sel(cx, sx, mask0);
- return v4f_sel(v4f_minus(res), res, mask1);
-}
-
-v4f_T
-v4f_cos(const v4f_T v)
-{
- const v4i_T zeroi = v4i_zero();
- const v4i_T onei = v4i_set1(1);
- const v4i_T twoi = v4i_set1(2);
- const v4i_T threei = v4i_set1(3);
-
- const v4f_T x = v4f_mul(v, KC0);
- const v4i_T q = v4f_to_v4i(x);
- const v4i_T off = v4i_add(v4i_and(q, threei), onei);
- const v4f_T qf = v4i_to_v4f(q);
-
- const v4f_T tmp = v4f_sub(v, v4f_mul(qf, KC1));
- const v4f_T xl = v4f_sub(tmp, v4f_mul(qf, KC2));
- const v4f_T xl2 = v4f_mul(xl, xl);
- const v4f_T xl3 = v4f_mul(xl2, xl);
-
- const v4f_T cx =
- v4f_madd(v4f_madd(v4f_madd(CC0, xl2, CC1), xl2, CC2), xl2, ONE);
- const v4f_T sx =
- v4f_madd(v4f_madd(v4f_madd(SC0, xl2, SC1), xl2, SC2), xl3, xl);
-
- const v4f_T mask0 = (v4f_T) v4i_eq(v4i_and(off, onei), zeroi);
- const v4f_T mask1 = (v4f_T) v4i_eq(v4i_and(off, twoi), zeroi);
- const v4f_T res = v4f_sel(cx, sx, mask0);
- return v4f_sel(v4f_minus(res), res, mask1);
-}
-
-void
-v4f_sincos(const v4f_T v, v4f_T* RESTRICT s, v4f_T* RESTRICT c)
-{
- const v4i_T zeroi = v4i_zero();
- const v4i_T onei = v4i_set1(1);
- const v4i_T twoi = v4i_set1(2);
- const v4i_T threei = v4i_set1(3);
-
- const v4f_T x = v4f_mul(v, KC0);
- const v4i_T q = v4f_to_v4i(x);
- const v4i_T soff = v4i_and(q, threei);
- const v4i_T coff = v4i_add(v4i_and(q, threei), onei);
- const v4f_T qf = v4i_to_v4f(q);
-
- const v4f_T tmp = v4f_sub(v, v4f_mul(qf, KC1));
- const v4f_T xl = v4f_sub(tmp, v4f_mul(qf, KC2));
- const v4f_T xl2 = v4f_mul(xl, xl);
- const v4f_T xl3 = v4f_mul(xl2, xl);
-
- const v4f_T cx =
- v4f_madd(v4f_madd(v4f_madd(CC0, xl2, CC1), xl2, CC2), xl2, ONE);
- const v4f_T sx =
- v4f_madd(v4f_madd(v4f_madd(SC0, xl2, SC1), xl2, SC2), xl3, xl);
-
- const v4f_T smask0 = (v4f_T) v4i_eq(v4i_and(soff, onei), zeroi);
- const v4f_T smask1 = (v4f_T) v4i_eq(v4i_and(soff, twoi), zeroi);
- const v4f_T sres = v4f_sel(cx, sx, smask0);
-
- const v4f_T cmask0 = (v4f_T) v4i_eq(v4i_and(coff, onei), zeroi);
- const v4f_T cmask1 = (v4f_T) v4i_eq(v4i_and(coff, twoi), zeroi);
- const v4f_T cres = v4f_sel(cx, sx, cmask0);
-
- *s = v4f_sel(v4f_minus(sres), sres, smask1);
- *c = v4f_sel(v4f_minus(cres), cres, cmask1);
-}
-
-v4f_T
-v4f_acos(const v4f_T v)
-{
- const v4f_T absv = v4f_abs(v);
- const v4f_T t0 = v4f_sqrt(v4f_sub(v4f_set1(1.f), absv));
- const v4f_T absv2 =v4f_mul(absv, absv);
- const v4f_T absv4 = v4f_mul(absv2, absv2);
-
- const v4f_T h0 = v4f_set1(-0.0012624911f);
- const v4f_T h1 = v4f_set1(0.0066700901f);
- const v4f_T h2 = v4f_set1(-0.0170881256f);
- const v4f_T h3 = v4f_set1(0.0308918810f);
- const v4f_T hi =
- v4f_madd(v4f_madd(v4f_madd(h0, absv, h1), absv, h2), absv, h3);
-
- const v4f_T l0 = v4f_set1(-0.0501743046f);
- const v4f_T l1 = v4f_set1(0.0889789874f);
- const v4f_T l2 = v4f_set1(-0.2145988016f);
- const v4f_T l3 = v4f_set1((float)(PI*0.5));
- const v4f_T lo =
- v4f_madd(v4f_madd(v4f_madd(l0, absv, l1), absv, l2), absv, l3);
-
- const v4f_T res = v4f_mul(v4f_madd(hi, absv4, lo), t0);
- const v4f_T mask = v4f_lt(v, v4f_zero());
-
- return v4f_sel(res, v4f_set1((float)PI) - res, mask);
-}
-
diff --git a/src/sse/ssef.h b/src/sse/ssef.h
@@ -1,16 +1,16 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#ifndef RSIMD_SSEF_H
@@ -28,6 +28,9 @@
#ifdef SIMD_SSE4_1
#include <smmintrin.h>
#endif
+#ifdef FMADD
+ #include <immintrin.h>
+#endif
typedef __m128 v4f_T;
#define V4F_AT__(Vec, Id) __builtin_ia32_vec_ext_v4sf(Vec, Id)
@@ -69,7 +72,7 @@ v4f_loadu3(const float src[3])
}
static FINLINE v4f_T
-v4f_set1(float x)
+v4f_set1(const float x)
{
return _mm_set1_ps(x);
}
@@ -315,7 +318,11 @@ v4f_div(const v4f_T v0, const v4f_T v1)
static FINLINE v4f_T
v4f_madd(const v4f_T v0, const v4f_T v1, const v4f_T v2)
{
+#ifdef FMADD
+ return _mm_fmadd_ps(v0, v1, v2);
+#else
return _mm_add_ps(_mm_mul_ps(v0, v1), v2);
+#endif
}
static FINLINE v4f_T
@@ -473,35 +480,6 @@ v4f_normalize3(const v4f_T v)
}
/*******************************************************************************
- * Trigonometric operations
- ******************************************************************************/
-RSIMD_API v4f_T v4f_sin(const v4f_T v);
-RSIMD_API v4f_T v4f_cos(const v4f_T v);
-RSIMD_API v4f_T v4f_acos(const v4f_T v);
-RSIMD_API void v4f_sincos(const v4f_T v, v4f_T* RESTRICT s, v4f_T* RESTRICT c);
-
-static FINLINE v4f_T
-v4f_tan(const v4f_T v)
-{
- v4f_T s, c;
- v4f_sincos(v, &s, &c);
- return v4f_div(s, c);
-}
-
-static FINLINE v4f_T
-v4f_asin(const v4f_T v)
-{
- return v4f_sub(v4f_set1((float)(PI*0.5)), v4f_acos(v));
-}
-
-static FINLINE v4f_T
-v4f_atan(v4f_T v)
-{
- const v4f_T tmp = v4f_rsqrt(v4f_madd(v, v, v4f_set1(1.f)));
- return v4f_asin(v4f_mul(v, tmp));
-}
-
-/*******************************************************************************
* Comparators
******************************************************************************/
static FINLINE v4f_T
@@ -578,24 +556,5 @@ v4f_clamp(const v4f_T v, const v4f_T vmin, const v4f_T vmax)
return v4f_min(v4f_max(v, vmin), vmax);
}
-/*******************************************************************************
- * Miscellaneous
- ******************************************************************************/
-static FINLINE v4f_T /* Cartesian (xyz) to spherical (r, theta, phi)*/
-v4f_xyz_to_rthetaphi(const v4f_T v)
-{
- const v4f_T zero = v4f_zero();
- const v4f_T len2 = v4f_len2(v);
- const v4f_T len3 = v4f_len3(v);
- const v4f_T theta = v4f_sel
- (v4f_acos(v4f_div(v4f_zzzz(v), len3)), zero, v4f_eq(len3, zero));
- const v4f_T tmp_phi = v4f_sel
- (v4f_asin(v4f_div(v4f_yyyy(v), len2)), zero, v4f_eq(len2, zero));
- const v4f_T phi = v4f_sel
- (v4f_sub(v4f_set1((float)PI), tmp_phi), tmp_phi, v4f_ge(v4f_xxxx(v), zero));
-
- return v4f_xyab(v4f_xayb(len3, theta), phi);
-}
-
#endif /* RSIMD_SSEF_H */
diff --git a/src/sse/ssei.h b/src/sse/ssei.h
@@ -1,16 +1,16 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#ifndef RSIMD_SSEI_H
@@ -155,6 +155,12 @@ v4i_sub(const v4i_T v0, const v4i_T v1)
return _mm_sub_epi32(v0, v1);
}
+static FINLINE v4i_T
+v4i_minus(const v4i_T v)
+{
+ return v4i_add(v4i_not(v), v4i_set1(1));
+}
+
/*******************************************************************************
* Comparators
******************************************************************************/
@@ -204,5 +210,79 @@ v4i_sel(const v4i_T vfalse, const v4i_T vtrue, const v4i_T vcond)
#endif
}
+static FINLINE v4i_T
+v4i_min(const v4i_T v0, const v4i_T v1)
+{
+#ifdef SIMD_SSE4_1
+ return _mm_min_epi32(v0, v1);
+#else
+ ALIGN(16) int32_t a[4];
+ ALIGN(16) int32_t b[4];
+ v4i_store(a, v0);
+ v4i_store(b, v1);
+ return v4i_set
+ (MMIN(a[0], b[0]),
+ MMIN(a[1], b[1]),
+ MMIN(a[2], b[2]),
+ MMIN(a[3], b[3]));
+#endif
+}
+
+static FINLINE v4i_T
+v4i_max(const v4i_T v0, const v4i_T v1)
+{
+#ifdef SIMD_SSE4_1
+ return _mm_max_epi32(v0, v1);
+#else
+ ALIGN(16) int32_t a[4];
+ ALIGN(16) int32_t b[4];
+ v4i_store(a, v0);
+ v4i_store(b, v1);
+ return v4i_set
+ (MMAX(a[0], b[0]),
+ MMAX(a[1], b[1]),
+ MMAX(a[2], b[2]),
+ MMAX(a[3], b[3]));
+#endif
+}
+
+static FINLINE v4i_T
+v4i_reduce_min(const v4i_T v)
+{
+#ifdef SIMD_SSE4_1
+ const v4i_T tmp = v4i_min(v4i_yxwz(v), v);
+ return v4i_min(v4i_zwxy(tmp), tmp);
+#else
+ ALIGN(16) int32_t a[4];
+ v4i_store(a, v);
+ return v4i_set1(MMIN(MMIN(a[0], a[1]), MMIN(a[2], a[3])));
+#endif
+}
+
+static FINLINE v4i_T
+v4i_reduce_max(const v4i_T v)
+{
+#ifdef SIMD_SSE4_1
+ const v4i_T tmp = v4i_max(v4i_yxwz(v), v);
+ return v4i_max(v4i_zwxy(tmp), tmp);
+#else
+ ALIGN(16) int32_t a[4];
+ v4i_store(a, v);
+ return v4i_set1(MMAX(MMAX(a[0], a[1]), MMAX(a[2], a[3])));
+#endif
+}
+
+static FINLINE int32_t
+v4i_reduce_min_i32(const v4i_T v)
+{
+ return v4i_x(v4i_reduce_min(v));
+}
+
+static FINLINE int32_t
+v4i_reduce_max_i32(const v4i_T v)
+{
+ return v4i_x(v4i_reduce_max(v));
+}
+
#endif /* RSIMD_SSEI_H */
diff --git a/src/test_aosf33.c b/src/test_aosf33.c
@@ -1,16 +1,16 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#include "aosf33.h"
@@ -22,7 +22,7 @@
b[0] = (A); b[1] = (B); b[2] = (C); \
b[3] = (D); b[4] = (E); b[5] = (F); \
b[6] = (G); b[7] = (H); b[8] = (I); \
- CHECK(f33_eq_eps(aosf33_store(a, (M)), b, Eps), 1); \
+ CHK(f33_eq_eps(aosf33_store(a, (M)), b, Eps) == 1); \
} (void)0
#define AOSF33_EQ(M, A, B, C, D, E, F, G, H, I) \
AOSF33_EQ_EPS(M, A, B, C, D, E, F, G, H, I, 0.f)
@@ -34,100 +34,100 @@ main(int argc, char** argv)
v4f_T m[3], n[3], o[3], v;
(void)argc, (void)argv;
- CHECK(aosf33_set(m,
+ CHK(aosf33_set(m,
v4f_set(0.f, 1.f, 2.f, 0.f),
v4f_set(3.f, 4.f, 5.f, 0.f),
- v4f_set(6.f, 7.f, 8.f, 0.f)), m);
- CHECK(aosf33_store(tmp, m), tmp);
- CHECK(tmp[0], 0.f);
- CHECK(tmp[1], 1.f);
- CHECK(tmp[2], 2.f);
- CHECK(tmp[3], 3.f);
- CHECK(tmp[4], 4.f);
- CHECK(tmp[5], 5.f);
- CHECK(tmp[6], 6.f);
- CHECK(tmp[7], 7.f);
- CHECK(tmp[8], 8.f);
+ v4f_set(6.f, 7.f, 8.f, 0.f)) == m);
+ CHK(aosf33_store(tmp, m) == tmp);
+ CHK(tmp[0] == 0.f);
+ CHK(tmp[1] == 1.f);
+ CHK(tmp[2] == 2.f);
+ CHK(tmp[3] == 3.f);
+ CHK(tmp[4] == 4.f);
+ CHK(tmp[5] == 5.f);
+ CHK(tmp[6] == 6.f);
+ CHK(tmp[7] == 7.f);
+ CHK(tmp[8] == 8.f);
AOSF33_EQ(m, 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f);
- CHECK(aosf33_identity(m), m);
+ CHK(aosf33_identity(m) == m);
AOSF33_EQ(m, 1.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f, 1.f);
- CHECK(aosf33_zero(m), m);
+ CHK(aosf33_zero(m) == m);
AOSF33_EQ(m, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f);
f3(tmp+0, -1.f, -2.f, -3.f);
f3(tmp+3, -4.f, -5.f, -6.f);
f3(tmp+6, -7.f, -8.f, -9.f);
- CHECK(aosf33_load(m, tmp), m);
+ CHK(aosf33_load(m, tmp) == m);
AOSF33_EQ(m, -1.f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f, -8.f, -9.f);
- CHECK(aosf33_zero(m), m);
- CHECK(aosf33_set_row0(m, v4f_set(0.f, 1.f, 2.f, 9.f)), m);
+ CHK(aosf33_zero(m) == m);
+ CHK(aosf33_set_row0(m, v4f_set(0.f, 1.f, 2.f, 9.f)) == m);
AOSF33_EQ(m, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 2.f, 0.f, 0.f);
- CHECK(aosf33_set_row1(m, v4f_set(3.f, 4.f, 5.f, 10.f)), m);
+ CHK(aosf33_set_row1(m, v4f_set(3.f, 4.f, 5.f, 10.f)) == m);
AOSF33_EQ(m, 0.f, 3.f, 0.f, 1.f, 4.f, 0.f, 2.f, 5.f, 0.f);
- CHECK(aosf33_set_row2(m, v4f_set(6.f, 7.f, 8.f, 11.f)), m);
+ CHK(aosf33_set_row2(m, v4f_set(6.f, 7.f, 8.f, 11.f)) == m);
AOSF33_EQ(m, 0.f, 3.f, 6.f, 1.f, 4.f, 7.f, 2.f, 5.f, 8.f);
- CHECK(aosf33_zero(m), m);
- CHECK(aosf33_set_row(m, v4f_set(0.f, 1.f, 2.f, 9.f), 0), m);
+ CHK(aosf33_zero(m) == m);
+ CHK(aosf33_set_row(m, v4f_set(0.f, 1.f, 2.f, 9.f), 0) == m);
AOSF33_EQ(m, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 2.f, 0.f, 0.f);
- CHECK(aosf33_set_row(m, v4f_set(3.f, 4.f, 5.f, 10.f), 1), m);
+ CHK(aosf33_set_row(m, v4f_set(3.f, 4.f, 5.f, 10.f), 1) == m);
AOSF33_EQ(m, 0.f, 3.f, 0.f, 1.f, 4.f, 0.f, 2.f, 5.f, 0.f);
- CHECK(aosf33_set_row(m, v4f_set(6.f, 7.f, 8.f, 11.f), 2), m);
+ CHK(aosf33_set_row(m, v4f_set(6.f, 7.f, 8.f, 11.f), 2) == m);
AOSF33_EQ(m, 0.f, 3.f, 6.f, 1.f, 4.f, 7.f, 2.f, 5.f, 8.f);
- CHECK(aosf33_zero(m), m);
- CHECK(aosf33_set_col(m, v4f_set(0.f, 1.f, 2.f, 9.f), 0), m);
+ CHK(aosf33_zero(m) == m);
+ CHK(aosf33_set_col(m, v4f_set(0.f, 1.f, 2.f, 9.f), 0) == m);
AOSF33_EQ(m, 0.f, 1.f, 2.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f);
- CHECK(aosf33_set_col(m, v4f_set(3.f, 4.f, 5.f, 10.f), 1), m);
+ CHK(aosf33_set_col(m, v4f_set(3.f, 4.f, 5.f, 10.f), 1) == m);
AOSF33_EQ(m, 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 0.f, 0.f, 0.f);
- CHECK(aosf33_set_col(m, v4f_set(6.f, 7.f, 8.f, 11.f), 2), m);
+ CHK(aosf33_set_col(m, v4f_set(6.f, 7.f, 8.f, 11.f), 2) == m);
AOSF33_EQ(m, 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f);
v = aosf33_row0(m);
- CHECK(v4f_x(v), 0.f);
- CHECK(v4f_y(v), 3.f);
- CHECK(v4f_z(v), 6.f);
+ CHK(v4f_x(v) == 0.f);
+ CHK(v4f_y(v) == 3.f);
+ CHK(v4f_z(v) == 6.f);
v = aosf33_row1(m);
- CHECK(v4f_x(v), 1.f);
- CHECK(v4f_y(v), 4.f);
- CHECK(v4f_z(v), 7.f);
+ CHK(v4f_x(v) == 1.f);
+ CHK(v4f_y(v) == 4.f);
+ CHK(v4f_z(v) == 7.f);
v = aosf33_row2(m);
- CHECK(v4f_x(v), 2.f);
- CHECK(v4f_y(v), 5.f);
- CHECK(v4f_z(v), 8.f);
+ CHK(v4f_x(v) == 2.f);
+ CHK(v4f_y(v) == 5.f);
+ CHK(v4f_z(v) == 8.f);
v = aosf33_row(m, 0);
- CHECK(v4f_x(v), 0.f);
- CHECK(v4f_y(v), 3.f);
- CHECK(v4f_z(v), 6.f);
+ CHK(v4f_x(v) == 0.f);
+ CHK(v4f_y(v) == 3.f);
+ CHK(v4f_z(v) == 6.f);
v = aosf33_row(m, 1);
- CHECK(v4f_x(v), 1.f);
- CHECK(v4f_y(v), 4.f);
- CHECK(v4f_z(v), 7.f);
+ CHK(v4f_x(v) == 1.f);
+ CHK(v4f_y(v) == 4.f);
+ CHK(v4f_z(v) == 7.f);
v = aosf33_row(m, 2);
- CHECK(v4f_x(v), 2.f);
- CHECK(v4f_y(v), 5.f);
- CHECK(v4f_z(v), 8.f);
+ CHK(v4f_x(v) == 2.f);
+ CHK(v4f_y(v) == 5.f);
+ CHK(v4f_z(v) == 8.f);
v = aosf33_col(m, 0);
- CHECK(v4f_x(v), 0.f);
- CHECK(v4f_y(v), 1.f);
- CHECK(v4f_z(v), 2.f);
+ CHK(v4f_x(v) == 0.f);
+ CHK(v4f_y(v) == 1.f);
+ CHK(v4f_z(v) == 2.f);
v = aosf33_col(m, 1);
- CHECK(v4f_x(v), 3.f);
- CHECK(v4f_y(v), 4.f);
- CHECK(v4f_z(v), 5.f);
+ CHK(v4f_x(v) == 3.f);
+ CHK(v4f_y(v) == 4.f);
+ CHK(v4f_z(v) == 5.f);
v = aosf33_col(m, 2);
- CHECK(v4f_x(v), 6.f);
- CHECK(v4f_y(v), 7.f);
- CHECK(v4f_z(v), 8.f);
+ CHK(v4f_x(v) == 6.f);
+ CHK(v4f_y(v) == 7.f);
+ CHK(v4f_z(v) == 8.f);
aosf33_set(m,
v4f_set(0.f, 1.f, 2.f, 0.f),
@@ -137,19 +137,19 @@ main(int argc, char** argv)
v4f_set(1.f, 2.f, 3.f, 0.f),
v4f_set(4.f, 5.f, 6.f, 0.f),
v4f_set(7.f, 8.f, 9.f, 0.f));
- CHECK(aosf33_add(o, m, n), o);
+ CHK(aosf33_add(o, m, n) == o);
AOSF33_EQ(o, 1.f, 3.f, 5.f, 7.f, 9.f, 11.f, 13.f, 15.f, 17.f);
- CHECK(aosf33_sub(o, o, n), o);
+ CHK(aosf33_sub(o, o, n) == o);
AOSF33_EQ(o, 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f);
aosf33_set(m,
v4f_set(1.f, 2.f, -3.f, 0.f),
v4f_set(-4.f, -5.f, 6.f, 0.f),
v4f_set(7.f, -8.f, 9.f, 0.f));
- CHECK(aosf33_minus(m, m), m);
+ CHK(aosf33_minus(m, m) == m);
AOSF33_EQ(m, -1.f, -2.f, 3.f, 4.f, 5.f, -6.f, -7.f, 8.f, -9.f);
- CHECK(aosf33_mul(o, m, v4f_set1(2.f)), o);
+ CHK(aosf33_mul(o, m, v4f_set1(2.f)) == o);
AOSF33_EQ(o, -2.f, -4.f, 6.f, 8.f, 10.f, -12.f, -14.f, 16.f, -18.f);
aosf33_set(m,
@@ -157,21 +157,21 @@ main(int argc, char** argv)
v4f_set(4.f, 5.f, 6.f, 0.f),
v4f_set(7.f, 8.f, 9.f, 0.f));
v = aosf33_mulf3(m, v4f_set(1.f, 2.f, 3.f, 0.f));
- CHECK(v4f_x(v), 30.f);
- CHECK(v4f_y(v), 36.f);
- CHECK(v4f_z(v), 42.f);
+ CHK(v4f_x(v) == 30.f);
+ CHK(v4f_y(v) == 36.f);
+ CHK(v4f_z(v) == 42.f);
v = aosf3_mulf33(v4f_set(1.f, 2.f, 3.f, 0.f), m);
- CHECK(v4f_x(v), 14.f);
- CHECK(v4f_y(v), 32.f);
- CHECK(v4f_z(v), 50.f);
+ CHK(v4f_x(v) == 14.f);
+ CHK(v4f_y(v) == 32.f);
+ CHK(v4f_z(v) == 50.f);
aosf33_set(n,
v4f_set(2.f, 9.f, 8.f, 0.f),
v4f_set(1.f, -2.f, 2.f, 0.f),
v4f_set(1.f, -8.f, -4.f, 0.f));
- CHECK(aosf33_mulf33(o, m, n), o);
+ CHK(aosf33_mulf33(o, m, n) == o);
AOSF33_EQ(o, 94.f, 113.f, 132.f, 7.f, 8.f, 9.f, -59.f, -70.f, -81.f);
- CHECK(aosf33_transpose(o, m), o);
+ CHK(aosf33_transpose(o, m) == o);
AOSF33_EQ(o, 1.f, 4.f, 7.f, 2.f, 5.f, 8.f, 3.f, 6.f, 9.f);
aosf33_set(m,
@@ -179,24 +179,24 @@ main(int argc, char** argv)
v4f_set(4.f, 5.f, 6.f, 0.f),
v4f_set(3.f, -4.f, 9.f, 0.f));
v = aosf33_det(m);
- CHECK(v4f_x(v), -60.f);
- CHECK(v4f_y(v), -60.f);
- CHECK(v4f_z(v), -60.f);
- CHECK(v4f_w(v), -60.f);
+ CHK(v4f_x(v) == -60.f);
+ CHK(v4f_y(v) == -60.f);
+ CHK(v4f_z(v) == -60.f);
+ CHK(v4f_w(v) == -60.f);
v = aosf33_inverse(n, m);
- CHECK(v4f_x(v), -60.f);
- CHECK(v4f_y(v), -60.f);
- CHECK(v4f_z(v), -60.f);
- CHECK(v4f_w(v), -60.f);
+ CHK(v4f_x(v) == -60.f);
+ CHK(v4f_y(v) == -60.f);
+ CHK(v4f_z(v) == -60.f);
+ CHK(v4f_w(v) == -60.f);
aosf33_mulf33(o, m, n);
AOSF33_EQ_EPS(o, 1.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f, 1.f, 1.e-6f);
v = aosf33_invtrans(o, m);
- CHECK(v4f_x(v), -60.f);
- CHECK(v4f_y(v), -60.f);
- CHECK(v4f_z(v), -60.f);
- CHECK(v4f_w(v), -60.f);
+ CHK(v4f_x(v) == -60.f);
+ CHK(v4f_y(v) == -60.f);
+ CHK(v4f_z(v) == -60.f);
+ CHK(v4f_w(v) == -60.f);
AOSF33_EQ(o,
v4f_x(n[0]), v4f_x(n[1]), v4f_x(n[2]),
v4f_y(n[0]), v4f_y(n[1]), v4f_y(n[2]),
diff --git a/src/test_aosf44.c b/src/test_aosf44.c
@@ -1,16 +1,16 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#include "aosf44.h"
@@ -23,7 +23,7 @@
b[4] = (E); b[5] = (F); b[6] = (G); b[7] = (H); \
b[8] = (I); b[9] = (J); b[10]= (K); b[11]= (L); \
b[12]= (M); b[13]= (N); b[14]= (O); b[15]= (P); \
- CHECK(f44_eq_eps(aosf44_store(a, (Mat)), b, Eps), 1); \
+ CHK(f44_eq_eps(aosf44_store(a, (Mat)), b, Eps) == 1); \
} (void)0
#define AOSF44_EQ(Mat, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
AOSF44_EQ_EPS(Mat, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, 0.f)
@@ -35,131 +35,131 @@ main(int argc, char** argv)
ALIGN(16) float tmp[16];
(void)argc, (void)argv;
- CHECK(aosf44_set(m,
+ CHK(aosf44_set(m,
v4f_set(0.f, 1.f, 2.f, 3.f),
v4f_set(4.f, 5.f, 6.f, 7.f),
v4f_set(8.f, 9.f, 10.f, 11.f),
- v4f_set(12.f, 13.f, 14.f, 15.f)), m);
+ v4f_set(12.f, 13.f, 14.f, 15.f)) == m);
AOSF44_EQ(m,
0.f, 1.f, 2.f, 3.f,
4.f, 5.f, 6.f, 7.f,
8.f, 9.f, 10.f, 11.f,
12.f, 13.f, 14.f, 15.f);
- CHECK(aosf44_store(tmp, m), tmp);
- CHECK(tmp[0], 0.f);
- CHECK(tmp[1], 1.f);
- CHECK(tmp[2], 2.f);
- CHECK(tmp[3], 3.f);
- CHECK(tmp[4], 4.f);
- CHECK(tmp[5], 5.f);
- CHECK(tmp[6], 6.f);
- CHECK(tmp[7], 7.f);
- CHECK(tmp[8], 8.f);
- CHECK(tmp[9], 9.f);
- CHECK(tmp[10], 10.f);
- CHECK(tmp[11], 11.f);
- CHECK(tmp[12], 12.f);
- CHECK(tmp[13], 13.f);
- CHECK(tmp[14], 14.f);
- CHECK(tmp[15], 15.f);
+ CHK(aosf44_store(tmp, m) == tmp);
+ CHK(tmp[0] == 0.f);
+ CHK(tmp[1] == 1.f);
+ CHK(tmp[2] == 2.f);
+ CHK(tmp[3] == 3.f);
+ CHK(tmp[4] == 4.f);
+ CHK(tmp[5] == 5.f);
+ CHK(tmp[6] == 6.f);
+ CHK(tmp[7] == 7.f);
+ CHK(tmp[8] == 8.f);
+ CHK(tmp[9] == 9.f);
+ CHK(tmp[10] == 10.f);
+ CHK(tmp[11] == 11.f);
+ CHK(tmp[12] == 12.f);
+ CHK(tmp[13] == 13.f);
+ CHK(tmp[14] == 14.f);
+ CHK(tmp[15] == 15.f);
tmp[0] = 0.f; tmp[1] = 2.f; tmp[2] = 4.f; tmp[3] = 6.f;
tmp[4] = 8.f; tmp[5] = 10.f; tmp[6] = 12.f; tmp[7] = 14.f;
tmp[8] = 16.f; tmp[9] = 18.f; tmp[10] = 20.f; tmp[11] = 22.f;
tmp[12] = 24.f; tmp[13] = 26.f; tmp[14] = 28.f; tmp[15] = 30.f;
- CHECK(aosf44_load(m, tmp), m);
+ CHK(aosf44_load(m, tmp) == m);
AOSF44_EQ(m,
0.f, 2.f, 4.f, 6.f,
8.f, 10.f, 12.f, 14.f,
16.f, 18.f, 20.f, 22.f,
24.f, 26.f, 28.f, 30.f);
- CHECK(aosf44_identity(m), m);
+ CHK(aosf44_identity(m) == m);
AOSF44_EQ(m,
1.f, 0.f, 0.f, 0.f,
0.f, 1.f, 0.f, 0.f,
0.f, 0.f, 1.f, 0.f,
0.f, 0.f, 0.f, 1.f);
- CHECK(aosf44_zero(m), m);
+ CHK(aosf44_zero(m) == m);
AOSF44_EQ(m,
0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f);
- CHECK(aosf44_set_row0(m, v4f_set(0.f, 1.f, 2.f, 3.f)), m);
+ CHK(aosf44_set_row0(m, v4f_set(0.f, 1.f, 2.f, 3.f)) == m);
AOSF44_EQ(m,
0.f, 0.f, 0.f, 0.f,
1.f, 0.f, 0.f, 0.f,
2.f, 0.f, 0.f, 0.f,
3.f, 0.f, 0.f, 0.f);
- CHECK(aosf44_set_row1(m, v4f_set(4.f, 5.f, 6.f, 7.f)), m);
+ CHK(aosf44_set_row1(m, v4f_set(4.f, 5.f, 6.f, 7.f)) == m);
AOSF44_EQ(m,
0.f, 4.f, 0.f, 0.f,
1.f, 5.f, 0.f, 0.f,
2.f, 6.f, 0.f, 0.f,
3.f, 7.f, 0.f, 0.f);
- CHECK(aosf44_set_row2(m, v4f_set(8.f, 9.f, 10.f, 11.f)), m);
+ CHK(aosf44_set_row2(m, v4f_set(8.f, 9.f, 10.f, 11.f)) == m);
AOSF44_EQ(m,
0.f, 4.f, 8.f, 0.f,
1.f, 5.f, 9.f, 0.f,
2.f, 6.f, 10.f, 0.f,
3.f, 7.f, 11.f, 0.f);
- CHECK(aosf44_set_row3(m, v4f_set(12.f, 13.f, 14.f, 15.f)), m);
+ CHK(aosf44_set_row3(m, v4f_set(12.f, 13.f, 14.f, 15.f)) == m);
AOSF44_EQ(m,
0.f, 4.f, 8.f, 12.f,
1.f, 5.f, 9.f, 13.f,
2.f, 6.f, 10.f, 14.f,
3.f, 7.f, 11.f, 15.f);
- CHECK(aosf44_zero(m), m);
- CHECK(aosf44_set_row(m, v4f_set(0.f, 1.f, 2.f, 3.f), 0), m);
+ CHK(aosf44_zero(m) == m);
+ CHK(aosf44_set_row(m, v4f_set(0.f, 1.f, 2.f, 3.f), 0) == m);
AOSF44_EQ(m,
0.f, 0.f, 0.f, 0.f,
1.f, 0.f, 0.f, 0.f,
2.f, 0.f, 0.f, 0.f,
3.f, 0.f, 0.f, 0.f);
- CHECK(aosf44_set_row(m, v4f_set(4.f, 5.f, 6.f, 7.f), 1), m);
+ CHK(aosf44_set_row(m, v4f_set(4.f, 5.f, 6.f, 7.f), 1) == m);
AOSF44_EQ(m,
0.f, 4.f, 0.f, 0.f,
1.f, 5.f, 0.f, 0.f,
2.f, 6.f, 0.f, 0.f,
3.f, 7.f, 0.f, 0.f);
- CHECK(aosf44_set_row(m, v4f_set(8.f, 9.f, 10.f, 11.f), 2), m);
+ CHK(aosf44_set_row(m, v4f_set(8.f, 9.f, 10.f, 11.f), 2) == m);
AOSF44_EQ(m,
0.f, 4.f, 8.f, 0.f,
1.f, 5.f, 9.f, 0.f,
2.f, 6.f, 10.f, 0.f,
3.f, 7.f, 11.f, 0.f);
- CHECK(aosf44_set_row(m, v4f_set(12.f, 13.f, 14.f, 15.f), 3), m);
+ CHK(aosf44_set_row(m, v4f_set(12.f, 13.f, 14.f, 15.f), 3) == m);
AOSF44_EQ(m,
0.f, 4.f, 8.f, 12.f,
1.f, 5.f, 9.f, 13.f,
2.f, 6.f, 10.f, 14.f,
3.f, 7.f, 11.f, 15.f);
- CHECK(aosf44_zero(m), m);
- CHECK(aosf44_set_col(m, v4f_set(0.f, 1.f, 2.f, 3.f), 0), m);
+ CHK(aosf44_zero(m) == m);
+ CHK(aosf44_set_col(m, v4f_set(0.f, 1.f, 2.f, 3.f), 0) == m);
AOSF44_EQ(m,
0.f, 1.f, 2.f, 3.f,
0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f);
- CHECK(aosf44_set_col(m, v4f_set(4.f, 5.f, 6.f, 7.f), 1), m);
+ CHK(aosf44_set_col(m, v4f_set(4.f, 5.f, 6.f, 7.f), 1) == m);
AOSF44_EQ(m,
0.f, 1.f, 2.f, 3.f,
4.f, 5.f, 6.f, 7.f,
0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f);
- CHECK(aosf44_set_col(m, v4f_set(8.f, 9.f, 10.f, 11.f), 2), m);
+ CHK(aosf44_set_col(m, v4f_set(8.f, 9.f, 10.f, 11.f), 2) == m);
AOSF44_EQ(m,
0.f, 1.f, 2.f, 3.f,
4.f, 5.f, 6.f, 7.f,
8.f, 9.f, 10.f, 11.f,
0.f, 0.f, 0.f, 0.f);
- CHECK(aosf44_set_col(m, v4f_set(12.f, 13.f, 14.f, 15.f), 3), m);
+ CHK(aosf44_set_col(m, v4f_set(12.f, 13.f, 14.f, 15.f), 3) == m);
AOSF44_EQ(m,
0.f, 1.f, 2.f, 3.f,
4.f, 5.f, 6.f, 7.f,
@@ -167,116 +167,116 @@ main(int argc, char** argv)
12.f, 13.f, 14.f, 15.f);
v = aosf44_row0(m);
- CHECK(v4f_x(v), 0.f);
- CHECK(v4f_y(v), 4.f);
- CHECK(v4f_z(v), 8.f);
- CHECK(v4f_w(v), 12.f);
+ CHK(v4f_x(v) == 0.f);
+ CHK(v4f_y(v) == 4.f);
+ CHK(v4f_z(v) == 8.f);
+ CHK(v4f_w(v) == 12.f);
v = aosf44_row1(m);
- CHECK(v4f_x(v), 1.f);
- CHECK(v4f_y(v), 5.f);
- CHECK(v4f_z(v), 9.f);
- CHECK(v4f_w(v), 13.f);
+ CHK(v4f_x(v) == 1.f);
+ CHK(v4f_y(v) == 5.f);
+ CHK(v4f_z(v) == 9.f);
+ CHK(v4f_w(v) == 13.f);
v = aosf44_row2(m);
- CHECK(v4f_x(v), 2.f);
- CHECK(v4f_y(v), 6.f);
- CHECK(v4f_z(v), 10.f);
- CHECK(v4f_w(v), 14.f);
+ CHK(v4f_x(v) == 2.f);
+ CHK(v4f_y(v) == 6.f);
+ CHK(v4f_z(v) == 10.f);
+ CHK(v4f_w(v) == 14.f);
v = aosf44_row3(m);
- CHECK(v4f_x(v), 3.f);
- CHECK(v4f_y(v), 7.f);
- CHECK(v4f_z(v), 11.f);
- CHECK(v4f_w(v), 15.f);
+ CHK(v4f_x(v) == 3.f);
+ CHK(v4f_y(v) == 7.f);
+ CHK(v4f_z(v) == 11.f);
+ CHK(v4f_w(v) == 15.f);
v = aosf44_row(m, 0);
- CHECK(v4f_x(v), 0.f);
- CHECK(v4f_y(v), 4.f);
- CHECK(v4f_z(v), 8.f);
- CHECK(v4f_w(v), 12.f);
+ CHK(v4f_x(v) == 0.f);
+ CHK(v4f_y(v) == 4.f);
+ CHK(v4f_z(v) == 8.f);
+ CHK(v4f_w(v) == 12.f);
v = aosf44_row(m, 1);
- CHECK(v4f_x(v), 1.f);
- CHECK(v4f_y(v), 5.f);
- CHECK(v4f_z(v), 9.f);
- CHECK(v4f_w(v), 13.f);
+ CHK(v4f_x(v) == 1.f);
+ CHK(v4f_y(v) == 5.f);
+ CHK(v4f_z(v) == 9.f);
+ CHK(v4f_w(v) == 13.f);
v = aosf44_row(m, 2);
- CHECK(v4f_x(v), 2.f);
- CHECK(v4f_y(v), 6.f);
- CHECK(v4f_z(v), 10.f);
- CHECK(v4f_w(v), 14.f);
+ CHK(v4f_x(v) == 2.f);
+ CHK(v4f_y(v) == 6.f);
+ CHK(v4f_z(v) == 10.f);
+ CHK(v4f_w(v) == 14.f);
v = aosf44_row(m, 3);
- CHECK(v4f_x(v), 3.f);
- CHECK(v4f_y(v), 7.f);
- CHECK(v4f_z(v), 11.f);
- CHECK(v4f_w(v), 15.f);
+ CHK(v4f_x(v) == 3.f);
+ CHK(v4f_y(v) == 7.f);
+ CHK(v4f_z(v) == 11.f);
+ CHK(v4f_w(v) == 15.f);
v = aosf44_col(m, 0);
- CHECK(v4f_x(v), 0.f);
- CHECK(v4f_y(v), 1.f);
- CHECK(v4f_z(v), 2.f);
- CHECK(v4f_w(v), 3.f);
+ CHK(v4f_x(v) == 0.f);
+ CHK(v4f_y(v) == 1.f);
+ CHK(v4f_z(v) == 2.f);
+ CHK(v4f_w(v) == 3.f);
v = aosf44_col(m, 1);
- CHECK(v4f_x(v), 4.f);
- CHECK(v4f_y(v), 5.f);
- CHECK(v4f_z(v), 6.f);
- CHECK(v4f_w(v), 7.f);
+ CHK(v4f_x(v) == 4.f);
+ CHK(v4f_y(v) == 5.f);
+ CHK(v4f_z(v) == 6.f);
+ CHK(v4f_w(v) == 7.f);
v = aosf44_col(m, 2);
- CHECK(v4f_x(v), 8.f);
- CHECK(v4f_y(v), 9.f);
- CHECK(v4f_z(v), 10.f);
- CHECK(v4f_w(v), 11.f);
+ CHK(v4f_x(v) == 8.f);
+ CHK(v4f_y(v) == 9.f);
+ CHK(v4f_z(v) == 10.f);
+ CHK(v4f_w(v) == 11.f);
v = aosf44_col(m, 3);
- CHECK(v4f_x(v), 12.f);
- CHECK(v4f_y(v), 13.f);
- CHECK(v4f_z(v), 14.f);
- CHECK(v4f_w(v), 15.f);
+ CHK(v4f_x(v) == 12.f);
+ CHK(v4f_y(v) == 13.f);
+ CHK(v4f_z(v) == 14.f);
+ CHK(v4f_w(v) == 15.f);
- CHECK(aosf44_set(m,
+ CHK(aosf44_set(m,
v4f_set(0.f, 1.f, 2.f, 3.f),
v4f_set(4.f, 5.f, 6.f, 7.f),
v4f_set(8.f, 9.f, 10.f, 11.f),
- v4f_set(12.f, 13.f, 14.f, 15.f)), m);
- CHECK(aosf44_set(n,
+ v4f_set(12.f, 13.f, 14.f, 15.f)) == m);
+ CHK(aosf44_set(n,
v4f_set(0.f, 2.f, 1.f, 3.f),
v4f_set(1.f, -2.f, -1.f, -3.f),
v4f_set(1.f, 0.f, 0.f, 2.f),
- v4f_set(3.f, 2.f, 1.f, 0.f)), n);
- CHECK(aosf44_add(o, m, n), o);
+ v4f_set(3.f, 2.f, 1.f, 0.f)) == n);
+ CHK(aosf44_add(o, m, n) == o);
AOSF44_EQ(o,
0.f, 3.f, 3.f, 6.f,
5.f, 3.f, 5.f, 4.f,
9.f, 9.f, 10.f, 13.f,
15.f, 15.f, 15.f, 15.f);
- CHECK(aosf44_sub(o, m, n), o);
+ CHK(aosf44_sub(o, m, n) == o);
AOSF44_EQ(o,
0.f, -1.f, 1.f, 0.f,
3.f, 7.f, 7.f, 10.f,
7.f, 9.f, 10.f, 9.f,
9.f, 11.f, 13.f, 15.f);
- CHECK(aosf44_minus(o, n), o);
+ CHK(aosf44_minus(o, n) == o);
AOSF44_EQ(o,
0.f, -2.f, -1.f, -3.f,
-1.f, 2.f, 1.f, 3.f,
-1.f, 0.f, 0.f, -2.f,
-3.f, -2.f, -1.f, 0.f);
- CHECK(aosf44_abs(o, o), o);
+ CHK(aosf44_abs(o, o) == o);
AOSF44_EQ(o,
0.f, 2.f, 1.f, 3.f,
1.f, 2.f, 1.f, 3.f,
1.f, 0.f, 0.f, 2.f,
3.f, 2.f, 1.f, 0.f);
- CHECK(aosf44_mul(o, n, v4f_set(1.f, 2.f, 3.f, 2.f)), o);
+ CHK(aosf44_mul(o, n, v4f_set(1.f, 2.f, 3.f, 2.f)) == o);
AOSF44_EQ(o,
0.f, 4.f, 3.f, 6.f,
1.f, -4.f, -3.f, -6.f,
@@ -289,16 +289,16 @@ main(int argc, char** argv)
v4f_set(8.f, 9.f, 10.f, 11.f),
v4f_set(12.f, 13.f, 14.f, 15.f));
v = aosf44_mulf4(m, v4f_set(1.f, 2.f, 3.f, 1.f));
- CHECK(v4f_x(v), 44.f);
- CHECK(v4f_y(v), 51.f);
- CHECK(v4f_z(v), 58.f);
- CHECK(v4f_w(v), 65.f);
+ CHK(v4f_x(v) == 44.f);
+ CHK(v4f_y(v) == 51.f);
+ CHK(v4f_z(v) == 58.f);
+ CHK(v4f_w(v) == 65.f);
v = aosf4_mulf44(v4f_set(1.f, 2.f, 3.f, 1.f), m);
- CHECK(v4f_x(v), 11.f);
- CHECK(v4f_y(v), 39.f);
- CHECK(v4f_z(v), 67.f);
- CHECK(v4f_w(v), 95.f);
+ CHK(v4f_x(v) == 11.f);
+ CHK(v4f_y(v) == 39.f);
+ CHK(v4f_z(v) == 67.f);
+ CHK(v4f_w(v) == 95.f);
aosf44_set(m,
v4f_set(1.f, 2.f, 3.f, 4.f),
@@ -310,14 +310,14 @@ main(int argc, char** argv)
v4f_set(1.f, -2.f, 2.f, 1.f),
v4f_set(1.f, -8.f, -4.f, 2.f),
v4f_set(1.f, 3.f, 4.f, 2.f));
- CHECK(aosf44_mulf44(o, m, n), o);
+ CHK(aosf44_mulf44(o, m, n) == o);
AOSF44_EQ(o,
104.f, 124.f, 144.f, 164.f,
17.f, 19.f, 21.f, 23.f,
-39.f, -48.f, -57.f, -66.f,
61.f, 71.f, 81.f, 91.f);
- CHECK(aosf44_transpose(o, n), o);
+ CHK(aosf44_transpose(o, n) == o);
AOSF44_EQ(o,
2.f, 1.f, 1.f, 1.f,
9.f, -2.f, -8.f, 3.f,
@@ -325,17 +325,17 @@ main(int argc, char** argv)
1.f, 1.f, 2.f, 2.f);
v = aosf44_det(n);
- CHECK(v4f_x(v), 78.f);
- CHECK(v4f_y(v), 78.f);
- CHECK(v4f_z(v), 78.f);
- CHECK(v4f_w(v), 78.f);
+ CHK(v4f_x(v) == 78.f);
+ CHK(v4f_y(v) == 78.f);
+ CHK(v4f_z(v) == 78.f);
+ CHK(v4f_w(v) == 78.f);
v = aosf44_inverse(m, n);
- CHECK(v4f_x(v), 78.f);
- CHECK(v4f_y(v), 78.f);
- CHECK(v4f_z(v), 78.f);
- CHECK(v4f_w(v), 78.f);
- CHECK(aosf44_mulf44(o, m, n), o);
+ CHK(v4f_x(v) == 78.f);
+ CHK(v4f_y(v) == 78.f);
+ CHK(v4f_z(v) == 78.f);
+ CHK(v4f_w(v) == 78.f);
+ CHK(aosf44_mulf44(o, m, n) == o);
AOSF44_EQ_EPS(o,
1.f, 0.f, 0.f, 0.f,
0.f, 1.f, 0.f, 0.f,
@@ -344,10 +344,10 @@ main(int argc, char** argv)
1.e-6f);
v = aosf44_invtrans(o, n);
- CHECK(v4f_x(v), 78.f);
- CHECK(v4f_y(v), 78.f);
- CHECK(v4f_z(v), 78.f);
- CHECK(v4f_w(v), 78.f);
+ CHK(v4f_x(v) == 78.f);
+ CHK(v4f_y(v) == 78.f);
+ CHK(v4f_z(v) == 78.f);
+ CHK(v4f_w(v) == 78.f);
AOSF44_EQ(o,
v4f_x(m[0]), v4f_x(m[1]), v4f_x(m[2]), v4f_x(m[3]),
v4f_y(m[0]), v4f_y(m[1]), v4f_y(m[2]), v4f_y(m[3]),
@@ -366,54 +366,54 @@ main(int argc, char** argv)
v4f_set(12.f, 13.f, 14.f, 15.f));
v = aosf44_eq(m, n);
- CHECK(v4f_mask_x(v), ~0);
- CHECK(v4f_mask_y(v), ~0);
- CHECK(v4f_mask_z(v), ~0);
- CHECK(v4f_mask_w(v), ~0);
+ CHK(v4f_mask_x(v) == ~0);
+ CHK(v4f_mask_y(v) == ~0);
+ CHK(v4f_mask_z(v) == ~0);
+ CHK(v4f_mask_w(v) == ~0);
n[0] = v4f_set(0.f, 1.0f, 2.f, 4.f);
v = aosf44_eq(m, n);
- CHECK(v4f_mask_x(v), 0);
- CHECK(v4f_mask_y(v), 0);
- CHECK(v4f_mask_z(v), 0);
- CHECK(v4f_mask_w(v), 0);
+ CHK(v4f_mask_x(v) == 0);
+ CHK(v4f_mask_y(v) == 0);
+ CHK(v4f_mask_z(v) == 0);
+ CHK(v4f_mask_w(v) == 0);
n[0] = v4f_set(0.f, 1.0f, 2.f, 3.f);
n[1] = v4f_set(4.f, 5.0f, 6.f, 7.f);
v = aosf44_eq(m, n);
- CHECK(v4f_mask_x(v), 0);
- CHECK(v4f_mask_y(v), 0);
- CHECK(v4f_mask_z(v), 0);
- CHECK(v4f_mask_w(v), 0);
+ CHK(v4f_mask_x(v) == 0);
+ CHK(v4f_mask_y(v) == 0);
+ CHK(v4f_mask_z(v) == 0);
+ CHK(v4f_mask_w(v) == 0);
n[1] = v4f_set(5.f, 5.0f, 6.f, 7.f);
m[2] = v4f_set(8.f, -9.0f, 10.f, 11.f);
v = aosf44_eq(m, n);
- CHECK(v4f_mask_x(v), 0);
- CHECK(v4f_mask_y(v), 0);
- CHECK(v4f_mask_z(v), 0);
- CHECK(v4f_mask_w(v), 0);
+ CHK(v4f_mask_x(v) == 0);
+ CHK(v4f_mask_y(v) == 0);
+ CHK(v4f_mask_z(v) == 0);
+ CHK(v4f_mask_w(v) == 0);
m[2] = v4f_set(8.f, 9.0f, 10.f, 11.f);
n[3] = v4f_set(12.f, 13.1f, 14.f, 15.f);
v = aosf44_eq(m, n);
- CHECK(v4f_mask_x(v), 0);
- CHECK(v4f_mask_y(v), 0);
- CHECK(v4f_mask_z(v), 0);
- CHECK(v4f_mask_w(v), 0);
+ CHK(v4f_mask_x(v) == 0);
+ CHK(v4f_mask_y(v) == 0);
+ CHK(v4f_mask_z(v) == 0);
+ CHK(v4f_mask_w(v) == 0);
v = aosf44_eq(m, m);
- CHECK(v4f_mask_x(v), ~0);
- CHECK(v4f_mask_y(v), ~0);
- CHECK(v4f_mask_z(v), ~0);
- CHECK(v4f_mask_w(v), ~0);
+ CHK(v4f_mask_x(v) == ~0);
+ CHK(v4f_mask_y(v) == ~0);
+ CHK(v4f_mask_z(v) == ~0);
+ CHK(v4f_mask_w(v) == ~0);
n[3] = v4f_set(12.f, 13.0f, 14.f, 15.f);
v = aosf44_eq(m, n);
- CHECK(v4f_mask_x(v), ~0);
- CHECK(v4f_mask_y(v), ~0);
- CHECK(v4f_mask_z(v), ~0);
- CHECK(v4f_mask_w(v), ~0);
+ CHK(v4f_mask_x(v) == ~0);
+ CHK(v4f_mask_y(v) == ~0);
+ CHK(v4f_mask_z(v) == ~0);
+ CHK(v4f_mask_w(v) == ~0);
return 0;
}
diff --git a/src/test_aosq.c b/src/test_aosq.c
@@ -1,16 +1,16 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#include "aosq.h"
@@ -23,7 +23,7 @@
b[0] = (A); b[1] = (B); b[2] = (C); \
b[3] = (D); b[4] = (E); b[5] = (F); \
b[6] = (G); b[7] = (H); b[8] = (I); \
- CHECK(f33_eq_eps(aosf33_store(a, (M)), b, Eps), 1); \
+ CHK(f33_eq_eps(aosf33_store(a, (M)), b, Eps) == 1); \
} (void)0
int
@@ -35,94 +35,94 @@ main(int argc, char** argv)
(void)argc, (void)argv;
q0 = aosq_identity();
- CHECK(v4f_x(q0), 0.f);
- CHECK(v4f_y(q0), 0.f);
- CHECK(v4f_z(q0), 0.f);
- CHECK(v4f_w(q0), 1.f);
+ CHK(v4f_x(q0) == 0.f);
+ CHK(v4f_y(q0) == 0.f);
+ CHK(v4f_z(q0) == 0.f);
+ CHK(v4f_w(q0) == 1.f);
q0 = aosq_set_axis_angle(v4f_set(2.f, 5.f, 1.f, 0.f), v4f_set1((float)PI*0.3f));
- CHECK(eq_eps(v4f_x(q0), 0.907981f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(q0), 2.269953f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(q0), 0.453991f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(q0), 0.891007f, 1.e-6f), 1);
+ CHK(eq_eps(v4f_x(q0), 0.907981f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_y(q0), 2.269953f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_z(q0), 0.453991f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_w(q0), 0.891007f, 1.e-6f) == 1);
q0 = v4f_set(1.f, 2.f, 3.f, -3.f);
q1 = v4f_set(1.f, 2.f, 3.f, -3.f);
t = aosq_eq(q0, q1);
- cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
- cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
- cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
- cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+ cast.f = v4f_x(t); CHK(cast.i == (int32_t)0xFFFFFFFF);
+ cast.f = v4f_y(t); CHK(cast.i == (int32_t)0xFFFFFFFF);
+ cast.f = v4f_z(t); CHK(cast.i == (int32_t)0xFFFFFFFF);
+ cast.f = v4f_w(t); CHK(cast.i == (int32_t)0xFFFFFFFF);
q1 = v4f_set(0.f, 2.f, 3.f, -3.f);
t = aosq_eq(q0, q1);
- cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_x(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_y(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_z(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_w(t); CHK(cast.i == (int32_t)0x00000000);
q1 = v4f_set(1.f, 0.f, 3.f, -3.f);
t = aosq_eq(q0, q1);
- cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_x(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_y(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_z(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_w(t); CHK(cast.i == (int32_t)0x00000000);
q1 = v4f_set(1.f, 2.f, 0.f, -3.f);
t = aosq_eq(q0, q1);
- cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_x(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_y(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_z(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_w(t); CHK(cast.i == (int32_t)0x00000000);
q1 = v4f_set(1.f, 2.f, 3.f, 0.f);
t = aosq_eq(q0, q1);
- cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_x(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_y(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_z(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_w(t); CHK(cast.i == (int32_t)0x00000000);
q1 = v4f_set(1.01f, 2.f, 3.02f, -3.f);
t = aosq_eq_eps(q0, q1, v4f_set1(0.01f));
- cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0x00000000);
- cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0x00000000);
+ cast.f = v4f_x(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_y(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_z(t); CHK(cast.i == (int32_t)0x00000000);
+ cast.f = v4f_w(t); CHK(cast.i == (int32_t)0x00000000);
t = aosq_eq_eps(q0, q1, v4f_set1(0.02f));
- cast.f = v4f_x(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
- cast.f = v4f_y(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
- cast.f = v4f_z(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
- cast.f = v4f_w(t); CHECK(cast.i, (int32_t)0xFFFFFFFF);
+ cast.f = v4f_x(t); CHK(cast.i == (int32_t)0xFFFFFFFF);
+ cast.f = v4f_y(t); CHK(cast.i == (int32_t)0xFFFFFFFF);
+ cast.f = v4f_z(t); CHK(cast.i == (int32_t)0xFFFFFFFF);
+ cast.f = v4f_w(t); CHK(cast.i == (int32_t)0xFFFFFFFF);
q0 = v4f_set(1.f, 2.f, 3.f, 4.f);
q1 = v4f_set(5.f, 6.f, 7.f, 8.f);
q2 = aosq_mul(q0, q1);
- CHECK(v4f_x(q2), 24.f);
- CHECK(v4f_y(q2), 48.f);
- CHECK(v4f_z(q2), 48.f);
- CHECK(v4f_w(q2), -6.f);
+ CHK(v4f_x(q2) == 24.f);
+ CHK(v4f_y(q2) == 48.f);
+ CHK(v4f_z(q2) == 48.f);
+ CHK(v4f_w(q2) == -6.f);
q2 = aosq_conj(q0);
- CHECK(v4f_x(q2), -1.f);
- CHECK(v4f_y(q2), -2.f);
- CHECK(v4f_z(q2), -3.f);
- CHECK(v4f_w(q2), 4.f);
+ CHK(v4f_x(q2) == -1.f);
+ CHK(v4f_y(q2) == -2.f);
+ CHK(v4f_z(q2) == -3.f);
+ CHK(v4f_w(q2) == 4.f);
q0 = v4f_normalize(v4f_set(1.f, 2.f, 5.f, 0.5f));
q1 = v4f_xyzz(q0);
q1 = v4f_xyzd(q1, aosq_calca(q1));
- CHECK(v4f_x(q0), v4f_x(q1));
- CHECK(v4f_y(q0), v4f_y(q1));
- CHECK(v4f_z(q0), v4f_z(q1));
- CHECK(eq_eps(v4f_w(q0), v4f_w(q1), 1.e-6f), 1);
+ CHK(v4f_x(q0) == v4f_x(q1));
+ CHK(v4f_y(q0) == v4f_y(q1));
+ CHK(v4f_z(q0) == v4f_z(q1));
+ CHK(eq_eps(v4f_w(q0), v4f_w(q1), 1.e-6f) == 1);
q0 = v4f_set(1.f, 2.f, 3.f, 5.f);
q1 = v4f_set(2.f, 6.f, 7.f, 6.f);
q2 = aosq_slerp(q0, q1, v4f_set1(0.3f));
- CHECK(eq_eps(v4f_x(q2), 1.3f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(q2), 3.2f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(q2), 4.2f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(q2), 5.3f, 1.e-6f), 1);
+ CHK(eq_eps(v4f_x(q2), 1.3f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_y(q2), 3.2f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_z(q2), 4.2f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_w(q2), 5.3f, 1.e-6f) == 1);
q0 = v4f_set(2.f, 5.f, 17.f, 9.f);
aosq_to_aosf33(q0, m);
diff --git a/src/test_math4.c b/src/test_math4.c
@@ -0,0 +1,138 @@
+/* Copyright (C) 2013-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#define _POSIX_C_SOURCE 200112L
+
+#include "rsimd.h"
+#include "math.h"
+
+#include <math.h>
+
+#define LOG2E 1.4426950408889634074 /* log_2 e */
+#define LN10 2.30258509299404568402 /* log_e 10 */
+
+#define CHKV4_EPS(V, Ref, Eps) { \
+ CHK(eq_eps(v4f_x(V), Ref[0], fabsf(Ref[0]) * Eps)); \
+ CHK(eq_eps(v4f_y(V), Ref[1], fabsf(Ref[1]) * Eps)); \
+ CHK(eq_eps(v4f_z(V), Ref[2], fabsf(Ref[2]) * Eps)); \
+ CHK(eq_eps(v4f_w(V), Ref[3], fabsf(Ref[3]) * Eps)); \
+} (void)0
+
+#define CHKV4_FUNC_EPS(V, Func, Eps) { \
+ const v4f_T r__ = v4f_##Func(V); \
+ float ref__[4]; \
+ ref__[0] = (float)Func(v4f_x(V)); \
+ ref__[1] = (float)Func(v4f_y(V)); \
+ ref__[2] = (float)Func(v4f_z(V)); \
+ ref__[3] = (float)Func(v4f_w(V)); \
+ CHKV4_EPS(r__, ref__, Eps); \
+} (void)0
+
+static void
+test_trigo(void)
+{
+ v4f_T i, j, k;
+ float ref[4];
+
+ i = v4f_set((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
+
+ CHKV4_FUNC_EPS(i, cos, 1.e-6f);
+ CHKV4_FUNC_EPS(i, sin, 1.e-6f);
+
+ v4f_sincos(i, &k, &j);
+ ref[0] = (float)sin(v4f_x(i));
+ ref[1] = (float)sin(v4f_y(i));
+ ref[2] = (float)sin(v4f_z(i));
+ ref[3] = (float)sin(v4f_w(i));
+ CHKV4_EPS(k, ref, 1.e-6f);
+ ref[0] = (float)cos(v4f_x(i));
+ ref[1] = (float)cos(v4f_y(i));
+ ref[2] = (float)cos(v4f_z(i));
+ ref[3] = (float)cos(v4f_w(i));
+ CHKV4_EPS(j, ref, 1.e-6f);
+
+ i = v4f_set((float)PI/8.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
+ CHKV4_FUNC_EPS(i, tan, 1.e-6f);
+ CHKV4_FUNC_EPS(v4f_cos(i), acos, 1.e-6f);
+ CHKV4_FUNC_EPS(v4f_sin(i), asin, 1.e-6f);
+ CHKV4_FUNC_EPS(v4f_tan(i), atan, 1.e-6f);
+}
+
+static void
+test_exp(void)
+{
+ const v4f_T i = v4f_set(1.f, -1.234f, 0.f, 3.14156f);
+ v4f_T j;
+ float ref[4];
+
+ CHKV4_FUNC_EPS(i, exp, 1.e-6f);
+ CHKV4_FUNC_EPS(i, exp2, 1.e-6f);
+
+ j = v4f_exp10(i);
+ ref[0] = (float)exp2(LOG2E * LN10 * v4f_x(i));
+ ref[1] = (float)exp2(LOG2E * LN10 * v4f_y(i));
+ ref[2] = (float)exp2(LOG2E * LN10 * v4f_z(i));
+ ref[3] = (float)exp2(LOG2E * LN10 * v4f_w(i));
+ CHKV4_EPS(j, ref, 1.e-6f);
+}
+
+static void
+test_log(void)
+{
+ const v4f_T i = v4f_set(4.675f, 3.14f, 9.99999f, 1.234e-13f);
+
+ CHKV4_FUNC_EPS(i, log, 1.e-6f);
+ CHKV4_FUNC_EPS(i, log2, 1.e-6f);
+ CHKV4_FUNC_EPS(i, log10, 1.e-6f);
+}
+
+static void
+test_misc(void)
+{
+ v4f_T i, j, k;
+ float ref[4];
+
+ i = v4f_set(-1.2345f, 9.3e-7f, 3.879e9f, -10.56f);
+ j = v4f_set(7.89e-9f, 0.12f, -4.9e10f, 3.14f);
+ k = v4f_copysign(i, j);
+ ref[0] = (float)copysign(v4f_x(i), v4f_x(j));
+ ref[1] = (float)copysign(v4f_y(i), v4f_y(j));
+ ref[2] = (float)copysign(v4f_z(i), v4f_z(j));
+ ref[3] = (float)copysign(v4f_w(i), v4f_w(j));
+ CHKV4_EPS(k, ref, 1.e-6f);
+
+ CHKV4_FUNC_EPS(i, floor, 1.e-6f);
+
+ k = v4f_pow(v4f_abs(i), j);
+ ref[0] = (float)pow(fabsf(v4f_x(i)), v4f_x(j));
+ ref[1] = (float)pow(fabsf(v4f_y(i)), v4f_y(j));
+ ref[2] = (float)pow(fabsf(v4f_z(i)), v4f_z(j));
+ ref[3] = (float)pow(fabsf(v4f_w(i)), v4f_w(j));
+ CHKV4_EPS(k, ref, 1.e-6f);
+}
+
+int
+main(int argc, char** argv)
+{
+ (void)argc, (void)argv;
+
+ test_trigo();
+ test_exp();
+ test_log();
+ test_misc();
+
+ return 0;
+}
+
diff --git a/src/test_math8.c b/src/test_math8.c
@@ -0,0 +1,172 @@
+/* Copyright (C) 2013-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#define _POSIX_C_SOURCE 200112L
+
+#include "rsimd.h"
+#include "math.h"
+
+#include <math.h>
+
+#define LOG2E 1.4426950408889634074 /* log_2 e */
+#define LN10 2.30258509299404568402 /* log_e 10 */
+
+#define CHKV8_EPS(V, Ref, Eps) { \
+ CHK(eq_eps(v4f_x(v8f_abcd(V)), Ref[0], fabsf(Ref[0]) * Eps)); \
+ CHK(eq_eps(v4f_y(v8f_abcd(V)), Ref[1], fabsf(Ref[1]) * Eps)); \
+ CHK(eq_eps(v4f_z(v8f_abcd(V)), Ref[2], fabsf(Ref[2]) * Eps)); \
+ CHK(eq_eps(v4f_w(v8f_abcd(V)), Ref[3], fabsf(Ref[3]) * Eps)); \
+ CHK(eq_eps(v4f_x(v8f_efgh(V)), Ref[4], fabsf(Ref[4]) * Eps)); \
+ CHK(eq_eps(v4f_y(v8f_efgh(V)), Ref[5], fabsf(Ref[5]) * Eps)); \
+ CHK(eq_eps(v4f_z(v8f_efgh(V)), Ref[6], fabsf(Ref[6]) * Eps)); \
+ CHK(eq_eps(v4f_w(v8f_efgh(V)), Ref[7], fabsf(Ref[7]) * Eps)); \
+} (void)0
+
+#define CHKV8_FUNC_EPS(V, Func, Eps) { \
+ const v8f_T r__ = v8f_##Func(V); \
+ float ref__[8]; \
+ ref__[0] = (float)Func(v4f_x(v8f_abcd(V))); \
+ ref__[1] = (float)Func(v4f_y(v8f_abcd(V))); \
+ ref__[2] = (float)Func(v4f_z(v8f_abcd(V))); \
+ ref__[3] = (float)Func(v4f_w(v8f_abcd(V))); \
+ ref__[4] = (float)Func(v4f_x(v8f_efgh(V))); \
+ ref__[5] = (float)Func(v4f_y(v8f_efgh(V))); \
+ ref__[6] = (float)Func(v4f_z(v8f_efgh(V))); \
+ ref__[7] = (float)Func(v4f_w(v8f_efgh(V))); \
+ CHKV8_EPS(r__, ref__, Eps); \
+} (void)0
+
+static void
+test_trigo(void)
+{
+ v8f_T i, j, k;
+ float ref[8];
+
+ i = v8f_set
+ ((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f,
+ (float)PI/8.f, (float)PI/7.f, (float)PI/16.f, (float)PI/9.f);
+
+ CHKV8_FUNC_EPS(i, cos, 1.e-6f);
+ CHKV8_FUNC_EPS(i, sin, 1.e-6f);
+
+ v8f_sincos(i, &k, &j);
+ ref[0] = (float)sin(v4f_x(v8f_abcd(i)));
+ ref[1] = (float)sin(v4f_y(v8f_abcd(i)));
+ ref[2] = (float)sin(v4f_z(v8f_abcd(i)));
+ ref[3] = (float)sin(v4f_w(v8f_abcd(i)));
+ ref[4] = (float)sin(v4f_x(v8f_efgh(i)));
+ ref[5] = (float)sin(v4f_y(v8f_efgh(i)));
+ ref[6] = (float)sin(v4f_z(v8f_efgh(i)));
+ ref[7] = (float)sin(v4f_w(v8f_efgh(i)));
+ CHKV8_EPS(k, ref, 1.e-6f);
+ ref[0] = (float)cos(v4f_x(v8f_abcd(i)));
+ ref[1] = (float)cos(v4f_y(v8f_abcd(i)));
+ ref[2] = (float)cos(v4f_z(v8f_abcd(i)));
+ ref[3] = (float)cos(v4f_w(v8f_abcd(i)));
+ ref[4] = (float)cos(v4f_x(v8f_efgh(i)));
+ ref[5] = (float)cos(v4f_y(v8f_efgh(i)));
+ ref[6] = (float)cos(v4f_z(v8f_efgh(i)));
+ ref[7] = (float)cos(v4f_w(v8f_efgh(i)));
+ CHKV8_EPS(j, ref, 1.e-6f);
+
+ i = v8f_set
+ ((float)PI/2.2f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f,
+ (float)PI/8.f, (float)PI/7.f, (float)PI/16.f, (float)PI/9.f);
+
+ CHKV8_FUNC_EPS(i, tan, 1.e-6);
+ CHKV8_FUNC_EPS(v8f_cos(i), acos, 1.e-6f);
+ CHKV8_FUNC_EPS(v8f_sin(i), asin, 1.e-6f);
+ CHKV8_FUNC_EPS(v8f_tan(i), atan, 1.e-6f);
+}
+
+static void
+test_exp(void)
+{
+ const v8f_T i = v8f_set
+ (1.f, -1.234f, 0.f, 3.14156f, 0.9187f, 7.9f, 3.333f, 2.387e-7f);
+ v8f_T j;
+ float ref[8];
+
+ CHKV8_FUNC_EPS(i, exp, 1.e-6f);
+ CHKV8_FUNC_EPS(i, exp2, 1.e-6f);
+
+ j = v8f_exp10(i);
+ ref[0] = (float)exp2(LOG2E * LN10 * v4f_x(v8f_abcd(i)));
+ ref[1] = (float)exp2(LOG2E * LN10 * v4f_y(v8f_abcd(i)));
+ ref[2] = (float)exp2(LOG2E * LN10 * v4f_z(v8f_abcd(i)));
+ ref[3] = (float)exp2(LOG2E * LN10 * v4f_w(v8f_abcd(i)));
+ ref[4] = (float)exp2(LOG2E * LN10 * v4f_x(v8f_efgh(i)));
+ ref[5] = (float)exp2(LOG2E * LN10 * v4f_y(v8f_efgh(i)));
+ ref[6] = (float)exp2(LOG2E * LN10 * v4f_z(v8f_efgh(i)));
+ ref[7] = (float)exp2(LOG2E * LN10 * v4f_w(v8f_efgh(i)));
+ CHKV8_EPS(j, ref, 1.e-6f);
+}
+
+static void
+test_log(void)
+{
+ const v8f_T i = v8f_set
+ (4.675f, 3.14f, 9.99999f, 1.234e-13f, 3.33e-3f, 0.98f, 8.f, 9.87654f);
+ CHKV8_FUNC_EPS(i, log, 1.e-6f);
+ CHKV8_FUNC_EPS(i, log2, 1.e-6f);
+ CHKV8_FUNC_EPS(i, log10, 1.e-6f);
+}
+
+static void
+test_misc(void)
+{
+ v8f_T i, j, k;
+ float ref[8];
+
+ i = v8f_set(-1.2345f, 9.3e-7f, 3.879e9f, -10.56f, 9.9f, -3.1f, 0.33e-6f, 1.f);
+ j = v8f_set(7.89e-9f, 0.12f, -4.9e10f, 3.14f, 5.f, 0.1e-19f, 1.234f, -0.45f);
+ k = v8f_copysign(i, j);
+ ref[0] = (float)copysign(v4f_x(v8f_abcd(i)), v4f_x(v8f_abcd(j)));
+ ref[1] = (float)copysign(v4f_y(v8f_abcd(i)), v4f_y(v8f_abcd(j)));
+ ref[2] = (float)copysign(v4f_z(v8f_abcd(i)), v4f_z(v8f_abcd(j)));
+ ref[3] = (float)copysign(v4f_w(v8f_abcd(i)), v4f_w(v8f_abcd(j)));
+ ref[4] = (float)copysign(v4f_x(v8f_efgh(i)), v4f_x(v8f_efgh(j)));
+ ref[5] = (float)copysign(v4f_y(v8f_efgh(i)), v4f_y(v8f_efgh(j)));
+ ref[6] = (float)copysign(v4f_z(v8f_efgh(i)), v4f_z(v8f_efgh(j)));
+ ref[7] = (float)copysign(v4f_w(v8f_efgh(i)), v4f_w(v8f_efgh(j)));
+ CHKV8_EPS(k, ref, 1.e-6f);
+
+ CHKV8_FUNC_EPS(i, floor, 1.e-6f);
+
+ k = v8f_pow(v8f_abs(i), j);
+ ref[0] = (float)pow(fabsf(v4f_x(v8f_abcd(i))), v4f_x(v8f_abcd(j)));
+ ref[1] = (float)pow(fabsf(v4f_y(v8f_abcd(i))), v4f_y(v8f_abcd(j)));
+ ref[2] = (float)pow(fabsf(v4f_z(v8f_abcd(i))), v4f_z(v8f_abcd(j)));
+ ref[3] = (float)pow(fabsf(v4f_w(v8f_abcd(i))), v4f_w(v8f_abcd(j)));
+ ref[4] = (float)pow(fabsf(v4f_x(v8f_efgh(i))), v4f_x(v8f_efgh(j)));
+ ref[5] = (float)pow(fabsf(v4f_y(v8f_efgh(i))), v4f_y(v8f_efgh(j)));
+ ref[6] = (float)pow(fabsf(v4f_z(v8f_efgh(i))), v4f_z(v8f_efgh(j)));
+ ref[7] = (float)pow(fabsf(v4f_w(v8f_efgh(i))), v4f_w(v8f_efgh(j)));
+ CHKV8_EPS(k, ref, 1.e-6f);
+}
+
+int
+main(int argc, char** argv)
+{
+ (void)argc, (void)argv;
+
+ test_trigo();
+ test_exp();
+ test_log();
+ test_misc();
+
+ return 0;
+}
+
diff --git a/src/test_soa4f2.c b/src/test_soa4f2.c
@@ -1,118 +1,28 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-#include "soa4f2.h"
-#include "test_soa4f_utils.h"
-
-#define CHECK_F2(V, A, B, C, D, E, F, G, H) \
- { \
- const v4f_T* v__ = (V); \
- CHECK_V4MASK(v4f_eq(v__[0], v4f_set((A), (B), (C), (D))), V4TRUE); \
- CHECK_V4MASK(v4f_eq(v__[1], v4f_set((E), (F), (G), (H))), V4TRUE); \
- } (void)0
+/* Generate the test_soa3f2 function */
+#define SOA_SIMD_WIDTH 4
+#define SOA_DIMENSION 2
+#include "test_soaXfY.h"
int
main(int argc, char** argv)
{
- v4f_T a[2], b[2], c[2], dst[2], f;
(void)argc, (void)argv;
-
- CHECK(soa4f2_set(a, soa4f2_splat(c, v4f_set1(-1.f))), a);
- CHECK_V4MASK(v4f_eq(a[0], v4f_set1(-1.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[1], v4f_set1(-1.f)), V4TRUE);
-
- CHECK(soa4f2(c, v4f_set(0.f, 1.f, 2.f, 3.f), v4f_set(5.f, 6.f, 7.f, 8.f)), c);
- CHECK(soa4f2_set(a, c), a);
- CHECK_V4MASK(v4f_eq(c[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(c[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE);
-
- CHECK(soa4f2(a, v4f_set(-1.f, 2.f, 3.f,-4.f),v4f_set(5.f,-6.f,-7.f, 8.f)), a);
- CHECK(soa4f2_minus(b, a), b);
- CHECK_F2(b, 1.f,-2.f,-3.f, 4.f, -5.f, 6.f, 7.f,-8.f);
-
- CHECK(soa4f2_addf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)), dst);
- CHECK_F2(dst, 0.f, 4.f, 3.f, -1.f, 6.f, -4.f, -7.f, 11.f);
- CHECK(soa4f2_add(dst, a, b), dst);
- CHECK_F2(dst, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f);
- CHECK(soa4f2_subf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)), dst);
- CHECK_F2(dst, -2.f, 0.f, 3.f, -7.f, 4.f, -8.f, -7.f, 5.f);
- CHECK(soa4f2_sub(dst, a, b), dst);
- CHECK_F2(dst, -2.f, 4.f, 6.f, -8.f, 10.f, -12.f, -14.f, 16.f);
- CHECK(soa4f2_mulf(dst, a, v4f_set(2.f, 3.f, 0.f, -1.f)), dst);
- CHECK_F2(dst, -2.f, 6.f, 0.f, 4.f, 10.f, -18.f, 0.f, -8.f);
- CHECK(soa4f2_mul(dst, a, b), dst);
- CHECK_F2(dst, -1.f, -4.f, -9.f, -16.f, -25.f, -36.f, -49.f, -64.f);
- CHECK(soa4f2_divf(dst, a, v4f_set(2.f, 0.5f, 1.f, 4.f)), dst);
- CHECK_F2(dst, -0.5f, 4.f, 3.f, -1.f, 2.5f, -12.f, -7.f, 2.f);
- CHECK(soa4f2_div(dst, a, b), dst);
- CHECK_F2(dst, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f, -1.f);
-
- soa4f2(a, v4f_set1(0.f), v4f_set1(1.f));
- soa4f2(b, v4f_set1(1.f), v4f_set1(2.f));
- CHECK(soa4f2_lerp(dst, a, b, v4f_set1(0.5f)), dst);
- CHECK_F2(dst, 0.5f, 0.5f, 0.5f, 0.5f, 1.5f, 1.5f, 1.5f, 1.5f);
- soa4f2(a, v4f_set(-1.f, 2.f, 3.f,-4.f), v4f_set(5.f,-6.f,-7.f, 8.f));
- soa4f2_minus(b, a);
- CHECK(soa4f2_lerp(dst, a, b, v4f_set(-0.5f, 1.f, 0.5f, 4.f)), dst);
- CHECK_F2(dst, -1.f, -2.f, 0.f, 4.f, 5.f, 6.f, 0.f, -8.f);
-
- f = soa4f2_sum(b);
- CHECK_V4MASK(v4f_eq(f, v4f_set(-4.f, 4.f, 4.f, -4.f)), V4TRUE);
- f = soa4f2_dot(a, b);
- CHECK_V4MASK(v4f_eq(f, v4f_set(-26.f, -40.f, -58.f, -80.f)), V4TRUE);
- f = soa4f2_len(a);
- CHECK_V4MASK
- (v4f_eq_eps(f, v4f_sqrt(soa4f2_dot(a, a)), v4f_set1(1.e-6f)), V4TRUE);
-
- CHECK_V4MASK(soa4f2_is_normalized(b), V4FALSE);
- f = soa4f2_normalize(dst, b);
- CHECK_V4MASK(v4f_eq_eps(f, soa4f2_len(b), v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(soa4f2_is_normalized(b), V4FALSE);
- CHECK_V4MASK(soa4f2_is_normalized(dst), V4TRUE);
- soa4f2_divf(b, b, f);
- CHECK_V4MASK(v4f_eq_eps(dst[0], b[0], v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(v4f_eq_eps(dst[1], b[1], v4f_set1(1.e-6f)), V4TRUE);
-
- CHECK_V4MASK(soa4f2_eq(a, a), V4TRUE);
- CHECK_V4MASK(soa4f2_eq(a, b), V4FALSE);
- soa4f2(a, v4f_set(-1.f, 2.f, 3.f,-4.f), v4f_set(5.f,-6.f,-7.f, 8.f));
- soa4f2(b, v4f_set(-1.f,-2.f, 5.f,-4.001f), v4f_set(5.f,-6.f, 7.f, 8.001f));
- CHECK_V4MASK__(soa4f2_eq(a, b), ~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f2_eq_eps(a, b, v4f_set1(1.e-6f)), ~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f2_eq_eps(a, b, v4f_set(0.f,0.f,0.f,1.e-6f)),~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f2_eq_eps(a, b, v4f_set(0.f,0.f,0.f,1.e-2f)),~0, 0, 0,~0);
-
- soa4f2(a, v4f_set(1.f, 2.f, 3.f,-1.f), v4f_set(-2.f, 0.f,-7.f, 0.f));
- soa4f2(b, v4f_set(3.f, 2.f, 1.f,-2.f), v4f_set(1.f,-6.f, 0.5f, 2.f));
- f = soa4f2_cross(a, b);
- CHECK_V4MASK(v4f_eq(f, v4f_set(7.f, -12.f, 8.5f, -2.f)), V4TRUE);
-
- CHECK(soa4f2_min(dst, a, b), dst);
- CHECK_F2(dst, 1.f, 2.f, 1.f, -2.f, -2.f, -6.f, -7.f, 0.f);
- CHECK(soa4f2_max(dst, a, b), dst);
- CHECK_F2(dst, 3.f, 2.f, 3.f, -1.f, 1.f, 0.f, 0.5f, 2.f);
-
- soa4f2_sel(dst, b, a, v4f_mask(~0, ~0, 0, ~0));
- CHECK_F2(dst, 1.f, 2.f, 1.f, -1.f, -2.f, 0.f, 0.5f, 0.f);
-
- soa4f2(c, v4f_mask(~0, ~0, 1, ~0), v4f_mask(~0, 0, 0, 0));
- soa4f2_selv(dst, b, a, c);
- CHECK_F2(dst, 1.f, 2.f, 1.f, -1.f, -2.f, -6.f, 0.5f, 2.f);
-
+ test_soa4f2();
return 0;
}
diff --git a/src/test_soa4f3.c b/src/test_soa4f3.c
@@ -1,148 +1,27 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-#include "soa4f3.h"
-#include "test_soa4f_utils.h"
-
-#define CHECK_F3(V, A, B, C, D, E, F, G, H, I, J, K, L) \
- { \
- const v4f_T* v__ = (V); \
- CHECK_V4MASK(v4f_eq(v__[0], v4f_set((A), (B), (C), (D))), V4TRUE); \
- CHECK_V4MASK(v4f_eq(v__[1], v4f_set((E), (F), (G), (H))), V4TRUE); \
- CHECK_V4MASK(v4f_eq(v__[2], v4f_set((I), (J), (K), (L))), V4TRUE); \
- } (void)0
+/* Generate the test_soa4f2 function */
+#define SOA_SIMD_WIDTH 4
+#define SOA_DIMENSION 3
+#include "test_soaXfY.h"
int
main(int argc, char** argv)
{
- v4f_T a[3], b[3], c[3], dst[3], f;
(void)argc, (void)argv;
-
- CHECK(soa4f3_set(a, soa4f3_splat(c, v4f_set1(-1.f))), a);
- CHECK_V4MASK(v4f_eq(a[0], v4f_set1(-1.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[1], v4f_set1(-1.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[2], v4f_set1(-1.f)), V4TRUE);
- CHECK(soa4f3(c,
- v4f_set(0.f, 1.f, 2.f, 3.f),
- v4f_set(5.f, 6.f, 7.f, 8.f),
- v4f_set(9.f, 10.f, 11.f, 12.f)), c);
- CHECK(soa4f3_set(a, c), a);
- CHECK_V4MASK(v4f_eq(c[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(c[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(c[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE);
-
- CHECK(soa4f3(a,
- v4f_set(-1.f, 2.f, 3.f, -4.f),
- v4f_set(5.f, -6.f, -7.f, 8.f),
- v4f_set(9.f, -10.f, 1.f, -2.f)), a);
- CHECK(soa4f3_minus(b, a), b);
- CHECK_F3(b, 1.f,-2.f,-3.f, 4.f,-5.f, 6.f, 7.f,-8.f,-9.f, 10.f,-1.f, 2.f);
-
- CHECK(soa4f3_addf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)), dst);
- CHECK_F3(dst, 0.f, 4.f, 3.f,-1.f, 6.f,-4.f,-7.f, 11.f, 10.f,-8.f, 1.f, 1.f);
- CHECK(soa4f3_add(dst, a, b), dst);
- CHECK_F3(dst, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f);
- CHECK(soa4f3_subf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)), dst);
- CHECK_F3(dst,-2.f, 0.f, 3.f,-7.f, 4.f,-8.f,-7.f, 5.f, 8.f,-12.f, 1.f,-5.f);
- CHECK(soa4f3_sub(dst, a, b), dst);
- CHECK_F3(dst,-2.f, 4.f, 6.f,-8.f, 10.f,-12.f,-14.f, 16.f, 18.f,-20.f, 2.f,-4.f);
- CHECK(soa4f3_mulf(dst, a, v4f_set(2.f, 3.f, 0.f, -1.f)), dst);
- CHECK_F3(dst,-2.f, 6.f, 0.f, 4.f, 10.f,-18.f, 0.f,-8.f, 18.f,-30.f, 0.f, 2.f);
- CHECK(soa4f3_mul(dst, a, b), dst);
- CHECK_F3(dst,-1.f,-4.f,-9.f,-16.f,-25.f,-36.f,-49.f,-64.f,-81.f,-100.f,-1.f,-4.f);
- CHECK(soa4f3_divf(dst, a, v4f_set(2.f, 0.5f, 1.f, 4.f)), dst);
- CHECK_F3(dst,-0.5f, 4.f, 3.f,-1.f, 2.5f,-12.f,-7.f, 2.f, 4.5f,-20.f, 1.f,-0.5f);
- CHECK(soa4f3_div(dst, a, b), dst);
- CHECK_F3(dst,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f,-1.f);
-
- soa4f3(a, v4f_set1(0.f), v4f_set1(1.f), v4f_set1(2.f));
- soa4f3(b, v4f_set1(1.f), v4f_set1(2.f), v4f_set1(-1.f));
- CHECK(soa4f3_lerp(dst, a, b, v4f_set1(0.5f)), dst);
- CHECK_F3(dst,
- 0.5f, 0.5f, 0.5f, 0.5f,
- 1.5f, 1.5f, 1.5f, 1.5f,
- 0.5f, 0.5f, 0.5f, 0.5f);
- CHECK(soa4f3(a,
- v4f_set(-1.f, 2.f, 3.f, -4.f),
- v4f_set(5.f, -6.f, -7.f, 8.f),
- v4f_set(9.f, -10.f, 1.f, -2.f)), a);
- CHECK(soa4f3_minus(b, a), b);
- CHECK(soa4f3_lerp(dst, a, b, v4f_set(-0.5f, 1.f, 0.5f, 4.f)), dst);
- CHECK_F3(dst, -1.f, -2.f, 0.f, 4.f, 5.f, 6.f, 0.f, -8.f, 9.f, 10.f, 0.f, 2.f);
-
- f = soa4f3_sum(b);
- CHECK_V4MASK(v4f_eq(f, v4f_set(-13.f, 14.f, 3.f, -2.f)), V4TRUE);
- f = soa4f3_dot(a, b);
- CHECK_V4MASK(v4f_eq(f, v4f_set(-107.f, -140.f, -59.f, -84.f)), V4TRUE);
- f = soa4f3_len(a);
- CHECK_V4MASK
- (v4f_eq_eps(f, v4f_sqrt(soa4f3_dot(a, a)), v4f_set1(1.e-6f)), V4TRUE);
-
- CHECK_V4MASK(soa4f3_is_normalized(b), V4FALSE);
- f = soa4f3_normalize(dst, b);
- CHECK_V4MASK(v4f_eq_eps(f, soa4f3_len(b), v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(soa4f3_is_normalized(b), V4FALSE);
- CHECK_V4MASK(soa4f3_is_normalized(dst), V4TRUE);
- soa4f3_divf(b, b, f);
- CHECK_V4MASK(v4f_eq_eps(dst[0], b[0], v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(v4f_eq_eps(dst[1], b[1], v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(v4f_eq_eps(dst[2], b[2], v4f_set1(1.e-6f)), V4TRUE);
-
- CHECK_V4MASK(soa4f3_eq(a, a), V4TRUE);
- CHECK_V4MASK(soa4f3_eq(a, b), V4FALSE);
- soa4f3(a,
- v4f_set(-1.f, 2.f, 3.f,-4.f),
- v4f_set(5.f,-6.f,-7.f, 8.f),
- v4f_set(9.f,-10.f,1.f, -2.f));
- soa4f3(b,
- v4f_set(-1.f, 2.f, 5.f,-4.001f),
- v4f_set(5.f,-6.03f,7.f, 8.0),
- v4f_set(9.f,-10.f,0.f, -2.001f));
- CHECK_V4MASK__(soa4f3_eq(a, b), ~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set1(1.e-6f)), ~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,0.f,0.f,1.e-6f)),~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,0.f,0.f,1.e-2f)),~0, 0, 0,~0);
- CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,1.e-2f,0.f,1.e-2f)),~0, 0, 0,~0);
- CHECK_V4MASK__(soa4f3_eq_eps(a, b, v4f_set(0.f,1.e-1f,0.f,1.e-2f)),~0,~0, 0,~0);
-
- soa4f3(a,
- v4f_set(1.f, 2.f, 3.f,-1.f),
- v4f_set(-2.f, 0.f,-7.f, 0.f),
- v4f_set(-1.f, 4.f, 3.f, 2.f));
- soa4f3(b,
- v4f_set(3.f, 2.f, 1.f,-2.f),
- v4f_set(1.f,-6.f, 0.5f, 2.f),
- v4f_set(0.f, 1.f, 0.f, 3.f));
- CHECK(soa4f3_cross(dst, a, b), dst);
- CHECK_F3(dst, 1.f, 24.f,-1.5f,-4.f,-3.f, 6.f, 3.f,-1.f, 7.f,-12.f, 8.5f,-2.f);
-
- CHECK(soa4f3_min(dst, a, b), dst);
- CHECK_F3(dst, 1.f, 2.f, 1.f, -2.f,-2.f,-6.f,-7.f, 0.f,-1.f, 1.f, 0.f, 2.f);
- CHECK(soa4f3_max(dst, a, b), dst);
- CHECK_F3(dst, 3.f, 2.f, 3.f, -1.f, 1.f, 0.f, 0.5f, 2.f, 0.f, 4.f, 3.f, 3.f);
-
- soa4f3_sel(dst, b, a, v4f_mask(~0, ~0, 1, ~0));
- CHECK_F3(dst, 1.f, 2.f, 1.f, -1.f, -2.f, 0.f, 0.5f, 0.f, -1.f, 4.f, 0.f, 2.f);
-
- soa4f3(c, v4f_mask(~0,~0, 0,~0), v4f_mask(~0, 0, 0, 0), v4f_mask(0,~0,~0, 0));
- soa4f3_selv(dst, b, a, c);
- CHECK_F3(dst, 1.f, 2.f, 1.f,-1.f,-2.f,-6.f, 0.5f, 2.f, 0.f, 4.f, 3.f, 3.f);
-
+ test_soa4f3();
return 0;
}
-
diff --git a/src/test_soa4f4.c b/src/test_soa4f4.c
@@ -1,218 +1,27 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-#include "soa4f4.h"
-#include "test_soa4f_utils.h"
-
-#define CHECK_F4(V, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
- { \
- const v4f_T* v__ = (V); \
- CHECK_V4MASK(v4f_eq(v__[0], v4f_set((A), (B), (C), (D))), V4TRUE); \
- CHECK_V4MASK(v4f_eq(v__[1], v4f_set((E), (F), (G), (H))), V4TRUE); \
- CHECK_V4MASK(v4f_eq(v__[2], v4f_set((I), (J), (K), (L))), V4TRUE); \
- CHECK_V4MASK(v4f_eq(v__[3], v4f_set((M), (N), (O), (P))), V4TRUE); \
- } (void)0
+/* Generate the test_soa4f2 function */
+#define SOA_SIMD_WIDTH 4
+#define SOA_DIMENSION 4
+#include "test_soaXfY.h"
int
main(int argc, char** argv)
{
- v4f_T a[4], b[4], c[4], dst[4], f;
(void)argc, (void)argv;
-
- CHECK(soa4f4_set(a, soa4f4_splat(c, v4f_set1(-1.f))), a);
- CHECK_V4MASK(v4f_eq(a[0], v4f_set1(-1.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[1], v4f_set1(-1.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[2], v4f_set1(-1.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[3], v4f_set1(-1.f)), V4TRUE);
- CHECK(soa4f4(c,
- v4f_set(0.f, 1.f, 2.f, 3.f),
- v4f_set(5.f, 6.f, 7.f, 8.f),
- v4f_set(9.f, 10.f, 11.f, 12.f),
- v4f_set(13.f, 14.f, 15.f, 16.f)), c);
- CHECK(soa4f4_set(a, c), a);
- CHECK_V4MASK(v4f_eq(c[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(c[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(c[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(c[3], v4f_set(13.f, 14.f, 15.f, 16.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[0], v4f_set(0.f, 1.f, 2.f, 3.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[1], v4f_set(5.f, 6.f, 7.f, 8.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[2], v4f_set(9.f, 10.f, 11.f, 12.f)), V4TRUE);
- CHECK_V4MASK(v4f_eq(a[3], v4f_set(13.f, 14.f, 15.f, 16.f)), V4TRUE);
-
- CHECK(soa4f4(a,
- v4f_set(-1.f, 2.f, 3.f, -4.f),
- v4f_set(5.f, -6.f, -7.f, 8.f),
- v4f_set(9.f, -10.f, 1.f, -2.f),
- v4f_set(5.f, -3.f, -7.f, 1.f)), a);
- CHECK(soa4f4_minus(b, a), b);
- CHECK_F4(b,
- 1.f, -2.f, -3.f, 4.f,
- -5.f, 6.f, 7.f, -8.f,
- -9.f, 10.f, -1.f, 2.f,
- -5.f, 3.f, 7.f, -1.f);
-
- CHECK(soa4f4_addf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)), dst);
- CHECK_F4(dst,
- 0.f, 4.f, 3.f, -1.f,
- 6.f, -4.f, -7.f, 11.f,
- 10.f, -8.f, 1.f, 1.f,
- 6.f, -1.f, -7.f, 4.f);
- CHECK(soa4f4_add(dst, a, b), dst);
- CHECK_F4(dst,
- 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f);
-
- CHECK(soa4f4_subf(dst, a, v4f_set(1.f, 2.f, 0.f, 3.f)), dst);
- CHECK_F4(dst,
- -2.f, 0.f, 3.f, -7.f,
- 4.f, -8.f, -7.f, 5.f,
- 8.f,-12.f, 1.f,-5.f,
- 4.f, -5.f, -7.f, -2.f);
- CHECK(soa4f4_sub(dst, a, b), dst);
- CHECK_F4(dst,
- -2.f, 4.f, 6.f, -8.f,
- 10.f, -12.f, -14.f, 16.f,
- 18.f, -20.f, 2.f, -4.f,
- 10.f, -6.f, -14.f, 2.f);
-
- CHECK(soa4f4_mulf(dst, a, v4f_set(2.f, 3.f, 0.f, -1.f)), dst);
- CHECK_F4(dst,
- -2.f, 6.f, 0.f, 4.f,
- 10.f, -18.f, 0.f, -8.f,
- 18.f, -30.f, 0.f, 2.f,
- 10.f, -9.f, 0.f, -1.f);
- CHECK(soa4f4_mul(dst, a, b), dst);
- CHECK_F4(dst,
- -1.f, -4.f, -9.f, -16.f,
- -25.f, -36.f, -49.f, -64.f,
- -81.f, -100.f, -1.f, -4.f,
- -25.f, -9.f, -49.f, -1.f);
-
- CHECK(soa4f4_divf(dst, a, v4f_set(2.f, 0.5f, 1.f, 4.f)), dst);
- CHECK_F4(dst,
- -0.5f, 4.f, 3.f, -1.f,
- 2.5f, -12.f, -7.f, 2.f,
- 4.5f, -20.f, 1.f, -0.5f,
- 2.5f, -6.f, -7.f, 0.25f);
- CHECK(soa4f4_div(dst, a, b), dst);
- CHECK_F4(dst,
- -1.f, -1.f, -1.f, -1.f,
- -1.f, -1.f, -1.f, -1.f,
- -1.f, -1.f, -1.f, -1.f,
- -1.f, -1.f, -1.f, -1.f);
-
- CHECK(soa4f4(a,
- v4f_set(-1.f, 2.f, 3.f, -4.f),
- v4f_set(5.f, -6.f, -7.f, 8.f),
- v4f_set(9.f, -10.f, 1.f, -2.f),
- v4f_set(5.f, -3.f, -7.f, 1.f)), a);
- CHECK(soa4f4_minus(b, a), b);
- CHECK(soa4f4_lerp(dst, a, b, v4f_set(-0.5f, 1.f, 0.5f, 4.f)), dst);
- CHECK_F4(dst,
- -1.f, -2.f, 0.f, 4.f,
- 5.f, 6.f, 0.f, -8.f,
- 9.f, 10.f, 0.f, 2.f,
- 5.f, 3.f, 0.f, -1.f);
-
- f = soa4f4_sum(b);
- CHECK_V4MASK(v4f_eq(f, v4f_set(-18.f, 17.f, 10.f, -3.f)), V4TRUE);
- f = soa4f4_dot(a, b);
- CHECK_V4MASK(v4f_eq(f, v4f_set(-132.f, -149.f, -108.f, -85.f)), V4TRUE);
- f = soa4f4_len(a);
- CHECK_V4MASK
- (v4f_eq_eps(f, v4f_sqrt(soa4f4_dot(a, a)), v4f_set1(1.e-6f)), V4TRUE);
-
- CHECK_V4MASK(soa4f4_is_normalized(b), V4FALSE);
- f = soa4f4_normalize(dst, b);
- CHECK_V4MASK(v4f_eq_eps(f, soa4f4_len(b), v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(soa4f4_is_normalized(b), V4FALSE);
- CHECK_V4MASK(soa4f4_is_normalized(dst), V4TRUE);
- soa4f4_divf(b, b, f);
- CHECK_V4MASK(v4f_eq_eps(dst[0], b[0], v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(v4f_eq_eps(dst[1], b[1], v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(v4f_eq_eps(dst[2], b[2], v4f_set1(1.e-6f)), V4TRUE);
- CHECK_V4MASK(v4f_eq_eps(dst[3], b[3], v4f_set1(1.e-6f)), V4TRUE);
-
- CHECK_V4MASK(soa4f4_eq(a, a), V4TRUE);
- CHECK_V4MASK(soa4f4_eq(a, b), V4FALSE);
- soa4f4(a,
- v4f_set(-1.f, 2.f, 3.f, -4.f),
- v4f_set(5.f, -6.f, -7.f, 8.f),
- v4f_set(9.f, -10.f, 1.f, -2.f),
- v4f_set(1.f, -1.f, 1.f, -2.f));
- soa4f4(b,
- v4f_set(-1.f, 2.f, 3.f,-4.001f),
- v4f_set(5.f,-6.03f,-7.f, 8.0),
- v4f_set(9.f,-10.f,1.f, -2.001f),
- v4f_set(1.f, -1.f, 1.0005f, -2.f));
- CHECK_V4MASK__(soa4f4_eq(a, b), ~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set1(1.e-6f)), ~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 0.f, 0.f, 1.e-6f)),
- ~0, 0, 0, 0);
- CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 0.f, 0.f, 1.e-2f)),
- ~0, 0, 0,~0);
- CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 1.e-2f, 0.f, 1.e-2f)),
- ~0, 0, 0,~0);
- CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 1.e-1f, 0.f, 1.e-2f)),
- ~0,~0, 0,~0);
- CHECK_V4MASK__(soa4f4_eq_eps(a, b, v4f_set(0.f, 1.e-1f, 1.e-3f, 1.e-2f)),
- ~0,~0,~0,~0);
-
- soa4f4(a,
- v4f_set(1.f, 2.f, 3.f, -1.f),
- v4f_set(-2.f, 0.f, -7.f, 0.f),
- v4f_set(-1.f, 4.f, 3.f, 2.f),
- v4f_set(-5.f, 7.f, 0.5f, -1.f));
- soa4f4(b,
- v4f_set(3.f, 2.f, 1.f,-2.f),
- v4f_set(1.f,-6.f, 0.5f, 2.f),
- v4f_set(0.f, 1.f, 0.f, 3.f),
- v4f_set(1.f,-1.f, 0.f, 0.f));
- CHECK(soa4f4_min(dst, a, b), dst);
- CHECK_F4(dst,
- 1.f, 2.f, 1.f, -2.f,
- -2.f, -6.f, -7.f, 0.f,
- -1.f, 1.f, 0.f, 2.f,
- -5.f, -1.f, 0.f, -1.f);
- CHECK(soa4f4_max(dst, a, b), dst);
- CHECK_F4(dst,
- 3.f, 2.f, 3.f, -1.f,
- 1.f, 0.f, 0.5f, 2.f,
- 0.f, 4.f, 3.f, 3.f,
- 1.f, 7.f, 0.5f, 0.f);
-
- soa4f4_sel(dst, b, a, v4f_mask(~0, ~0, 1, ~0));
- CHECK_F4(dst,
- 1.f, 2.f, 1.f, -1.f,
- -2.f, 0.f, 0.5f, 0.f,
- -1.f, 4.f, 0.f, 2.f,
- -5.f, 7.f, 0.f, -1.f);
-
- soa4f4(c,
- v4f_mask(~0,~0, 0,~0),
- v4f_mask(~0, 0, 0, 0),
- v4f_mask( 0,~0,~0, 0),
- v4f_mask(~0,~0, 0, 0));
- soa4f4_selv(dst, b, a, c);
- CHECK_F4(dst,
- 1.f, 2.f, 1.f, -1.f,
- -2.f, -6.f, 0.5f, 2.f,
- 0.f, 4.f, 3.f, 3.f,
- -5.f, 7.f, 0.f, 0.f);
-
+ test_soa4f4();
return 0;
}
diff --git a/src/test_soa4f_utils.h b/src/test_soa4f_utils.h
@@ -1,32 +0,0 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
- *
- * The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * The RSIMD library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-
-#ifndef TEST_SOA4F_UTILS_H
-#define TEST_SOA4F_UTILS_H
-
-#define V4TRUE ~0, ~0, ~0, ~0
-#define V4FALSE 0, 0, 0, 0
-#define CHECK_V4MASK__(Mask, A, B, C, D) \
- { \
- const v4f_T mask__ = (Mask); \
- CHECK(v4f_mask_x(mask__), (A)); \
- CHECK(v4f_mask_y(mask__), (B)); \
- CHECK(v4f_mask_z(mask__), (C)); \
- CHECK(v4f_mask_w(mask__), (D)); \
- } (void)0
-#define CHECK_V4MASK(Mask, Vec) CHECK_V4MASK__(Mask, Vec)
-
-#endif /* TEST_SOA4F_UTILS_H */
-
diff --git a/src/test_soa8f2.c b/src/test_soa8f2.c
@@ -0,0 +1,28 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Generate the test_soa8f2 function */
+#define SOA_SIMD_WIDTH 8
+#define SOA_DIMENSION 2
+#include "test_soaXfY.h"
+
+int
+main(int argc, char** argv)
+{
+ (void)argc, (void)argv;
+ test_soa8f2();
+ return 0;
+}
+
diff --git a/src/test_soa8f3.c b/src/test_soa8f3.c
@@ -0,0 +1,28 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Generate the test_soa8f3 function */
+#define SOA_SIMD_WIDTH 8
+#define SOA_DIMENSION 3
+#include "test_soaXfY.h"
+
+int
+main(int argc, char** argv)
+{
+ (void)argc, (void)argv;
+ test_soa8f3();
+ return 0;
+}
+
diff --git a/src/test_soa8f4.c b/src/test_soa8f4.c
@@ -0,0 +1,28 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Generate the test_soa8f4 function */
+#define SOA_SIMD_WIDTH 8
+#define SOA_DIMENSION 4
+#include "test_soaXfY.h"
+
+int
+main(int argc, char** argv)
+{
+ (void)argc, (void)argv;
+ test_soa8f4();
+ return 0;
+}
+
diff --git a/src/test_soaXfY.h b/src/test_soaXfY.h
@@ -0,0 +1,262 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "rsimd.h"
+#include <rsys/rsys.h>
+
+/* Check macros */
+#ifndef SOA_SIMD_WIDTH
+ #error "Missing the SOA_SIMD_WIDTH macro"
+#endif
+#if SOA_SIMD_WIDTH != 4 && SOA_SIMD_WIDTH != 8
+ #error "Invalid value for the SOA_SIMD_WIDTH macro"
+#endif
+#ifndef SOA_DIMENSION
+ #error "Missing the SOA_DIMENSION macro"
+#endif
+#if SOA_DIMENSION < 2 || SOA_DIMENSION > 4
+ #error "Invalid value for the SOA_DIMENSION macro"
+#endif
+
+/* Define macros generics to the SOA_SIMD_WIDTH parameter */
+#if SOA_SIMD_WIDTH == 4
+ #define soaX soa4
+ #define vXf(Func) CONCAT(v4f_, Func)
+ #define vXf_T v4f_T
+ #define VEC(A, B, C, D, E, F, G, H) v4f_set(A, B, C, D)
+ #define MASK(A, B, C, D, E, F, G, H) v4f_mask(A, B, C, D)
+ #define CHKVX(V0, V1) { \
+ const v4f_T v0__ = (V0); \
+ const v4f_T v1__ = (V1); \
+ CHK(v4f_mask_x(v0__) == v4f_mask_y(v1__)); \
+ CHK(v4f_mask_y(v0__) == v4f_mask_y(v1__)); \
+ CHK(v4f_mask_z(v0__) == v4f_mask_z(v1__)); \
+ CHK(v4f_mask_w(v0__) == v4f_mask_w(v1__)); \
+ } (void)0
+#elif SOA_SIMD_WIDTH == 8
+ #define soaX soa8
+ #define vXf(Func) CONCAT(v8f_, Func)
+ #define vXf_T v8f_T
+ #define VEC(A, B, C, D, E, F, G, H) v8f_set(A, B, C, D, E, F, G, H)
+ #define MASK(A, B, C, D, E, F, G, H) v8f_mask(A, B, C, D, E, F, G, H)
+ #define CHKVX(V0, V1) { \
+ const v8f_T v0__ = (V0); \
+ const v8f_T v1__ = (V1); \
+ CHK(v4f_mask_x(v8f_abcd(v0__)) == v4f_mask_y(v8f_abcd(v1__))); \
+ CHK(v4f_mask_y(v8f_abcd(v0__)) == v4f_mask_y(v8f_abcd(v1__))); \
+ CHK(v4f_mask_z(v8f_abcd(v0__)) == v4f_mask_z(v8f_abcd(v1__))); \
+ CHK(v4f_mask_w(v8f_abcd(v0__)) == v4f_mask_w(v8f_abcd(v1__))); \
+ CHK(v4f_mask_x(v8f_efgh(v0__)) == v4f_mask_y(v8f_efgh(v1__))); \
+ CHK(v4f_mask_y(v8f_efgh(v0__)) == v4f_mask_y(v8f_efgh(v1__))); \
+ CHK(v4f_mask_z(v8f_efgh(v0__)) == v4f_mask_z(v8f_efgh(v1__))); \
+ CHK(v4f_mask_w(v8f_efgh(v0__)) == v4f_mask_w(v8f_efgh(v1__))); \
+ } (void)0
+#endif
+
+/* Define macros generics to the SOA_DIMENSION parameter */
+#if SOA_DIMENSION == 2
+ #define soaXfY(Func) CONCAT(CONCAT(soaX, f2_), Func)
+ #define SOA_VEC(Dst, X, Y, Z, W) CONCAT(soaX, f2)(Dst, X, Y)
+#elif SOA_DIMENSION == 3
+ #define soaXfY(Func) CONCAT(CONCAT(soaX, f3_), Func)
+ #define SOA_VEC(Dst, X, Y, Z, W) CONCAT(soaX, f3)(Dst, X, Y, Z)
+#elif SOA_DIMENSION == 4
+ #define soaXfY(Func) CONCAT(CONCAT(soaX, f4_), Func)
+ #define SOA_VEC(Dst, X, Y, Z, W) CONCAT(soaX, f4)(Dst, X, Y, Z, W)
+#endif
+
+/* Include the corresponding header */
+#if SOA_SIMD_WIDTH == 4
+ #if SOA_DIMENSION == 2
+ #include "soa4f2.h"
+ #elif SOA_DIMENSION == 3
+ #include "soa4f3.h"
+ #elif SOA_DIMENSION == 4
+ #include "soa4f4.h"
+ #endif
+#else
+ #if SOA_DIMENSION == 2
+ #include "soa8f2.h"
+ #elif SOA_DIMENSION == 3
+ #include "soa8f3.h"
+ #elif SOA_DIMENSION == 4
+ #include "soa8f4.h"
+ #endif
+#endif
+
+/* Define constants */
+#define VXTRUE MASK(~0,~0,~0,~0,~0,~0,~0,~0)
+#define VXFALSE MASK(0,0,0,0,0,0,0,0)
+
+static void
+CONCAT(CONCAT(CONCAT(test_, soaX), f), SOA_DIMENSION)(void)
+{
+ vXf_T a[SOA_DIMENSION], b[SOA_DIMENSION], c[SOA_DIMENSION];
+ vXf_T v[4], f, tmp, mask;
+ int i;
+
+ v[0] = VEC(.5f, -1.f, -2.f, 3.f, -4.f, 5.f , 6.f , -7.f);
+ v[1] = VEC(-8.f, 9.f, -10.f, 11.f, 12.f, -13.f, -14.f, -15.f);
+ v[2] = VEC(16.f, -17.f, 18.f, -19.f, 20.f, 21.f, 22.f, -23.f);
+ v[3] = VEC(0.6f, -0.1f, 0.8f, -0.9f, 0.02f, 0.1f,-0.22f, -0.3f);
+ f = VEC(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
+
+ /* Setters */
+ soaXfY(splat)(a, vXf(set1)(-1));
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(a[i], vXf(set1)(-1.f)), VXTRUE);
+ }
+ CHK(soaXfY(set)(b, a) == b);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(b[i], a[i]), VXTRUE);
+ }
+ CHK(SOA_VEC(a, v[0], v[1], v[2], v[3]) == a);
+ CHK(soaXfY(set)(b, a) == b);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(a[i], v[i]), VXTRUE);
+ CHKVX(vXf(eq)(b[i], v[i]), VXTRUE);
+ }
+
+ /* Unary operator */
+ CHK(soaXfY(minus)(b, a) == b);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(a[i], v[i]), VXTRUE);
+ CHKVX(vXf(eq)(b[i], vXf(minus)(v[i])), VXTRUE);
+ }
+
+ /* Regular binary operators */
+ CHK(soaXfY(addf)(c, a, f) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(add)(a[i], f)), VXTRUE);
+ CHK(soaXfY(subf)(c, a, f) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(sub)(a[i], f)), VXTRUE);
+ CHK(soaXfY(mulf)(c, a, f) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(mul)(a[i], f)), VXTRUE);
+ CHK(soaXfY(divf)(c, a, f) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(div)(a[i], f)), VXTRUE);
+ CHK(soaXfY(add)(c, a, b) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(zero)()), VXTRUE);
+ CHK(soaXfY(sub)(c, a, b) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(sub)(a[i], b[i])), VXTRUE);
+ CHK(soaXfY(mul)(c, a, b) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(mul)(a[i], b[i])), VXTRUE);
+ CHK(soaXfY(div)(c, a, b) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) CHKVX(vXf(eq)(c[i], vXf(div)(a[i], b[i])), VXTRUE);
+
+ /* Linear interpolation */
+ CHK(soaXfY(lerp)(c, a, b, f));
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(c[i], vXf(lerp)(a[i], b[i], f)), VXTRUE);
+ }
+
+ /* Sum operator */
+ f = soaXfY(sum)(a);
+ tmp = vXf(zero)();
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ tmp = vXf(add)(tmp, a[i]);
+ }
+ CHKVX(vXf(eq)(f, tmp), VXTRUE);
+
+ /* Dot operator */
+ f = soaXfY(dot)(a, b);
+ tmp = vXf(zero)();
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ tmp = vXf(add)(tmp, vXf(mul(a[i], b[i])));
+ }
+ CHKVX(vXf(eq)(f, tmp), VXTRUE);
+
+ /* Vector normalization functions */
+ CHKVX(soaXfY(is_normalized)(a), VXFALSE);
+ f = soaXfY(normalize)(c, a);
+ CHKVX(vXf(eq)(soaXfY(len)(a), vXf(sqrt)(soaXfY(dot)(a, a))), VXTRUE);
+ tmp = vXf(sqrt)(soaXfY(dot)(a, a));
+ CHKVX(vXf(eq_eps)(f, vXf(sqrt)(soaXfY(dot)(a, a)), vXf(set1)(1.e-4f)), VXTRUE);
+ CHKVX(soaXfY(is_normalized)(c), VXTRUE);
+ CHKVX(vXf(eq_eps)(soaXfY(len)(c), vXf(set1)(1.f), vXf(set1)(1.e-4f)), VXTRUE);
+ soaXfY(divf)(b, a, f);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq_eps)(b[i], c[i], vXf(set1)(1.e-4f)), VXTRUE);
+ }
+
+ /* Comparators */
+ CHKVX(soaXfY(eq)(a, a), VXTRUE);
+ CHKVX(soaXfY(eq)(a, b), VXFALSE);
+ soaXfY(addf)(b, a, vXf(set1(1.e-4f)));
+ CHKVX(soaXfY(eq)(a, b), VXFALSE);
+ CHKVX(soaXfY(eq_eps)(a, b, vXf(set1)(1.e-3f)), VXTRUE);
+ tmp = VEC(0, 0, 1.e-3f, 0, 0, 0, 1.e-3f, 1.e-3f);
+ mask = MASK(0, 0, ~0, 0, 0, 0, ~0, ~0);
+ CHKVX(soaXfY(eq_eps)(a, b, tmp), mask);
+
+ /* Min/Max */
+ CHK(soaXfY(min)(c, a, b) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(c[i], vXf(min)(a[i], b[i])), VXTRUE);
+ }
+ CHK(soaXfY(max)(c, a, b) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(c[i], vXf(max)(a[i], b[i])), VXTRUE);
+ }
+
+ /* Select */
+ v[0] = MASK(0,0,~0,~0,0,~0,~0,0);
+ v[1] = MASK(0,~0,~0,0,0,0,0,~0);
+ v[2] = MASK(0, 0, 0,0,~0,~0,0, 0);
+ v[3] = MASK(~0,~0,~0,0,~0,0,0,~0);
+ CHK(soaXfY(sel)(c, b, a, v[0]) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(c[i], vXf(sel)(b[i], a[i], v[0])), VXTRUE);
+ }
+ CHK(soaXfY(selv)(c, b, a, v) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq)(c[i], vXf(sel)(b[i], a[i], v[i])), VXTRUE);
+ }
+
+ /* Cross product */
+#if SOA_DIMENSION == 2
+ v[0] = vXf(mul)(a[0], b[1]);
+ v[1] = vXf(mul)(a[1], b[0]);
+ tmp = vXf(sub)(v[0], v[1]);
+ f = soaXfY(cross)(a, b);
+ CHKVX(vXf(eq_eps)(f, tmp, vXf(set1)(1.e-6f)), VXTRUE);
+#elif SOA_DIMENSION == 3
+ v[0] = vXf(sub)(vXf(mul)(a[1], b[2]), vXf(mul)(a[2], b[1]));
+ v[1] = vXf(sub)(vXf(mul)(a[2], b[0]), vXf(mul)(a[0], b[2]));
+ v[2] = vXf(sub)(vXf(mul)(a[0], b[1]), vXf(mul)(a[1], b[0]));
+ CHK(soaXfY(cross)(c, a, b) == c);
+ FOR_EACH(i, 0, SOA_DIMENSION) {
+ CHKVX(vXf(eq_eps)(c[i], v[i], vXf(set1)(1.e-6f)), VXTRUE);
+ }
+#endif
+}
+
+/* Generic parameters */
+#undef SOA_SIMD_WIDTH
+#undef SOA_DIMENSION
+
+/* Macros generic to the SOA_SIMD_WIDTH parameter */
+#undef soaX
+#undef vXf
+#undef vXf_T
+#undef VEC
+#undef MASK
+#undef CHKVX
+
+/* Macros generic to the SOA_DIMENSION parameter */
+#undef soaXfY
+#undef SOA_VEC
+
+/* Constants */
+#undef VXTRUE
+#undef VXFALSE
diff --git a/src/test_v4f.c b/src/test_v4f.c
@@ -1,19 +1,20 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#include "rsimd.h"
+#include "math.h"
int
main(int argc, char** argv)
@@ -25,522 +26,474 @@ main(int argc, char** argv)
(void)argc, (void)argv;
i = v4f_loadu(tmp+1);
- CHECK(v4f_x(i), 1.f);
- CHECK(v4f_y(i), 2.f);
- CHECK(v4f_z(i), 3.f);
- CHECK(v4f_w(i), 4.f);
+ CHK(v4f_x(i) == 1.f);
+ CHK(v4f_y(i) == 2.f);
+ CHK(v4f_z(i) == 3.f);
+ CHK(v4f_w(i) == 4.f);
i = v4f_loadu3(tmp);
- CHECK(v4f_x(i), 0.f);
- CHECK(v4f_y(i), 1.f);
- CHECK(v4f_z(i), 2.f);
+ CHK(v4f_x(i) == 0.f);
+ CHK(v4f_y(i) == 1.f);
+ CHK(v4f_z(i) == 2.f);
i = v4f_load(tmp);
- CHECK(v4f_x(i), 0.f);
- CHECK(v4f_y(i), 1.f);
- CHECK(v4f_z(i), 2.f);
- CHECK(v4f_w(i), 3.f);
+ CHK(v4f_x(i) == 0.f);
+ CHK(v4f_y(i) == 1.f);
+ CHK(v4f_z(i) == 2.f);
+ CHK(v4f_w(i) == 3.f);
tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0.f;
- CHECK(v4f_store(tmp, i), tmp);
- CHECK(tmp[0], 0.f);
- CHECK(tmp[1], 1.f);
- CHECK(tmp[2], 2.f);
- CHECK(tmp[3], 3.f);
+ CHK(v4f_store(tmp, i) == tmp);
+ CHK(tmp[0] == 0.f);
+ CHK(tmp[1] == 1.f);
+ CHK(tmp[2] == 2.f);
+ CHK(tmp[3] == 3.f);
i = v4f_set(1.f, 2.f, 3.f, 4.f);
- CHECK(v4f_x(i), 1.f);
- CHECK(v4f_y(i), 2.f);
- CHECK(v4f_z(i), 3.f);
- CHECK(v4f_w(i), 4.f);
+ CHK(v4f_x(i) == 1.f);
+ CHK(v4f_y(i) == 2.f);
+ CHK(v4f_z(i) == 3.f);
+ CHK(v4f_w(i) == 4.f);
i = v4f_set1(-2.f);
- CHECK(v4f_x(i), -2.f);
- CHECK(v4f_y(i), -2.f);
- CHECK(v4f_z(i), -2.f);
- CHECK(v4f_w(i), -2.f);
+ CHK(v4f_x(i) == -2.f);
+ CHK(v4f_y(i) == -2.f);
+ CHK(v4f_z(i) == -2.f);
+ CHK(v4f_w(i) == -2.f);
i = v4f_zero();
- CHECK(v4f_x(i), 0.f);
- CHECK(v4f_y(i), 0.f);
- CHECK(v4f_z(i), 0.f);
- CHECK(v4f_w(i), 0.f);
+ CHK(v4f_x(i) == 0.f);
+ CHK(v4f_y(i) == 0.f);
+ CHK(v4f_z(i) == 0.f);
+ CHK(v4f_w(i) == 0.f);
i = v4f_mask(~0, 0, ~0, ~0);
- cast.f[0] = v4f_x(i); CHECK(cast.i[0], (int32_t)0xFFFFFFFF);
- cast.f[1] = v4f_y(i); CHECK(cast.i[1], (int32_t)0x00000000);
- cast.f[2] = v4f_z(i); CHECK(cast.i[2], (int32_t)0xFFFFFFFF);
- cast.f[3] = v4f_w(i); CHECK(cast.i[3], (int32_t)0xFFFFFFFF);
+ cast.f[0] = v4f_x(i); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
+ cast.f[1] = v4f_y(i); CHK(cast.i[1] == (int32_t)0x00000000);
+ cast.f[2] = v4f_z(i); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
+ cast.f[3] = v4f_w(i); CHK(cast.i[3] == (int32_t)0xFFFFFFFF);
i = v4f_mask1(8);
- cast.f[0] = v4f_x(i); CHECK(cast.i[0], 8);
- cast.f[1] = v4f_y(i); CHECK(cast.i[1], 8);
- cast.f[2] = v4f_z(i); CHECK(cast.i[2], 8);
- cast.f[3] = v4f_w(i); CHECK(cast.i[3], 8);
+ cast.f[0] = v4f_x(i); CHK(cast.i[0] == 8);
+ cast.f[1] = v4f_y(i); CHK(cast.i[1] == 8);
+ cast.f[2] = v4f_z(i); CHK(cast.i[2] == 8);
+ cast.f[3] = v4f_w(i); CHK(cast.i[3] == 8);
i = v4f_true();
- cast.f[0] = v4f_x(i); CHECK(cast.i[0], (int32_t)0xFFFFFFFF);
- cast.f[1] = v4f_y(i); CHECK(cast.i[1], (int32_t)0xFFFFFFFF);
- cast.f[2] = v4f_z(i); CHECK(cast.i[2], (int32_t)0xFFFFFFFF);
- cast.f[3] = v4f_w(i); CHECK(cast.i[3], (int32_t)0xFFFFFFFF);
+ cast.f[0] = v4f_x(i); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
+ cast.f[1] = v4f_y(i); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
+ cast.f[2] = v4f_z(i); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
+ cast.f[3] = v4f_w(i); CHK(cast.i[3] == (int32_t)0xFFFFFFFF);
i = v4f_false();
- cast.f[0] = v4f_x(i); CHECK(cast.i[0], (int32_t)0x00000000);
- cast.f[1] = v4f_y(i); CHECK(cast.i[1], (int32_t)0x00000000);
- cast.f[2] = v4f_z(i); CHECK(cast.i[2], (int32_t)0x00000000);
- cast.f[3] = v4f_w(i); CHECK(cast.i[3], (int32_t)0x00000000);
+ cast.f[0] = v4f_x(i); CHK(cast.i[0] == (int32_t)0x00000000);
+ cast.f[1] = v4f_y(i); CHK(cast.i[1] == (int32_t)0x00000000);
+ cast.f[2] = v4f_z(i); CHK(cast.i[2] == (int32_t)0x00000000);
+ cast.f[3] = v4f_w(i); CHK(cast.i[3] == (int32_t)0x00000000);
i = v4f_mask(~0, 0, ~0, ~0);
j = v4f_mask(0, 0, 0, ~0);
k = v4f_or(i, j);
- cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0xFFFFFFFF);
- cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0x00000000);
- cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0xFFFFFFFF);
- cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0xFFFFFFFF);
- CHECK(v4f_mask_x(i), ~0);
- CHECK(v4f_mask_y(i), 0);
- CHECK(v4f_mask_z(i), ~0);
- CHECK(v4f_mask_w(i), ~0);
+ cast.f[0] = v4f_x(k); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
+ cast.f[1] = v4f_y(k); CHK(cast.i[1] == (int32_t)0x00000000);
+ cast.f[2] = v4f_z(k); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
+ cast.f[3] = v4f_w(k); CHK(cast.i[3] == (int32_t)0xFFFFFFFF);
+ CHK(v4f_mask_x(i) == ~0);
+ CHK(v4f_mask_y(i) == 0);
+ CHK(v4f_mask_z(i) == ~0);
+ CHK(v4f_mask_w(i) == ~0);
k = v4f_and(i, j);
- cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0x00000000);
- cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0x00000000);
- cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0x00000000);
- cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0xFFFFFFFF);
+ cast.f[0] = v4f_x(k); CHK(cast.i[0] == (int32_t)0x00000000);
+ cast.f[1] = v4f_y(k); CHK(cast.i[1] == (int32_t)0x00000000);
+ cast.f[2] = v4f_z(k); CHK(cast.i[2] == (int32_t)0x00000000);
+ cast.f[3] = v4f_w(k); CHK(cast.i[3] == (int32_t)0xFFFFFFFF);
k = v4f_xor(i, j);
- cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0xFFFFFFFF);
- cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0x00000000);
- cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0xFFFFFFFF);
- cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0x00000000);
+ cast.f[0] = v4f_x(k); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
+ cast.f[1] = v4f_y(k); CHK(cast.i[1] == (int32_t)0x00000000);
+ cast.f[2] = v4f_z(k); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
+ cast.f[3] = v4f_w(k); CHK(cast.i[3] == (int32_t)0x00000000);
i = v4f_mask(0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F);
j = v4f_mask(0x01020401, 0x70605040, 0x0F1F2F3F, 0x00000000);
k = v4f_andnot(i, j);
- cast.f[0] = v4f_x(k); CHECK(cast.i[0], 0x01020400);
- cast.f[1] = v4f_y(k); CHECK(cast.i[1], 0x70605040);
- cast.f[2] = v4f_z(k); CHECK(cast.i[2], 0x07162534);
- cast.f[3] = v4f_w(k); CHECK(cast.i[3], 0x00000000);
+ cast.f[0] = v4f_x(k); CHK(cast.i[0] == 0x01020400);
+ cast.f[1] = v4f_y(k); CHK(cast.i[1] == 0x70605040);
+ cast.f[2] = v4f_z(k); CHK(cast.i[2] == 0x07162534);
+ cast.f[3] = v4f_w(k); CHK(cast.i[3] == 0x00000000);
- CHECK(v4f_movemask(i), 0);
+ CHK(v4f_movemask(i) == 0);
i = v4f_mask(0x01020401, (int32_t)0x80605040, 0x7F1F2F3F, 0x00000000);
- CHECK(v4f_movemask(i), 2);
+ CHK(v4f_movemask(i) == 2);
i = v4f_set(1.f, 2.f, 3.f, 4.f);
j = v4f_set(5.f, 6.f, 7.f, 8.f);
k = v4f_sel(i, j, v4f_mask(~0, 0, 0, ~0));
- CHECK(v4f_x(k), 5.f);
- CHECK(v4f_y(k), 2.f);
- CHECK(v4f_z(k), 3.f);
- CHECK(v4f_w(k), 8.f);
+ CHK(v4f_x(k) == 5.f);
+ CHK(v4f_y(k) == 2.f);
+ CHK(v4f_z(k) == 3.f);
+ CHK(v4f_w(k) == 8.f);
k = v4f_xayb(i, j);
- CHECK(v4f_x(k), 1.f);
- CHECK(v4f_y(k), 5.f);
- CHECK(v4f_z(k), 2.f);
- CHECK(v4f_w(k), 6.f);
+ CHK(v4f_x(k) == 1.f);
+ CHK(v4f_y(k) == 5.f);
+ CHK(v4f_z(k) == 2.f);
+ CHK(v4f_w(k) == 6.f);
k = v4f_xyab(i, j);
- CHECK(v4f_x(k), 1.f);
- CHECK(v4f_y(k), 2.f);
- CHECK(v4f_z(k), 5.f);
- CHECK(v4f_w(k), 6.f);
+ CHK(v4f_x(k) == 1.f);
+ CHK(v4f_y(k) == 2.f);
+ CHK(v4f_z(k) == 5.f);
+ CHK(v4f_w(k) == 6.f);
k = v4f_zcwd(i, j);
- CHECK(v4f_x(k), 3.f);
- CHECK(v4f_y(k), 7.f);
- CHECK(v4f_z(k), 4.f);
- CHECK(v4f_w(k), 8.f);
+ CHK(v4f_x(k) == 3.f);
+ CHK(v4f_y(k) == 7.f);
+ CHK(v4f_z(k) == 4.f);
+ CHK(v4f_w(k) == 8.f);
k = v4f_zwcd(i, j);
- CHECK(v4f_x(k), 3.f);
- CHECK(v4f_y(k), 4.f);
- CHECK(v4f_z(k), 7.f);
- CHECK(v4f_w(k), 8.f);
+ CHK(v4f_x(k) == 3.f);
+ CHK(v4f_y(k) == 4.f);
+ CHK(v4f_z(k) == 7.f);
+ CHK(v4f_w(k) == 8.f);
k = v4f_ayzw(i, j);
- CHECK(v4f_x(k), 5.f);
- CHECK(v4f_y(k), 2.f);
- CHECK(v4f_z(k), 3.f);
- CHECK(v4f_w(k), 4.f);
+ CHK(v4f_x(k) == 5.f);
+ CHK(v4f_y(k) == 2.f);
+ CHK(v4f_z(k) == 3.f);
+ CHK(v4f_w(k) == 4.f);
k = v4f_xycd(i, j);
- CHECK(v4f_x(k), 1.f);
- CHECK(v4f_y(k), 2.f);
- CHECK(v4f_z(k), 7.f);
- CHECK(v4f_w(k), 8.f);
+ CHK(v4f_x(k) == 1.f);
+ CHK(v4f_y(k) == 2.f);
+ CHK(v4f_z(k) == 7.f);
+ CHK(v4f_w(k) == 8.f);
k = v4f_ywbd(i, j);
- CHECK(v4f_x(k), 2.f);
- CHECK(v4f_y(k), 4.f);
- CHECK(v4f_z(k), 6.f);
- CHECK(v4f_w(k), 8.f);
+ CHK(v4f_x(k) == 2.f);
+ CHK(v4f_y(k) == 4.f);
+ CHK(v4f_z(k) == 6.f);
+ CHK(v4f_w(k) == 8.f);
k = v4f_xbzw(i, j);
- CHECK(v4f_x(k), 1.f);
- CHECK(v4f_y(k), 6.f);
- CHECK(v4f_z(k), 3.f);
- CHECK(v4f_w(k), 4.f);
+ CHK(v4f_x(k) == 1.f);
+ CHK(v4f_y(k) == 6.f);
+ CHK(v4f_z(k) == 3.f);
+ CHK(v4f_w(k) == 4.f);
k = v4f_xycw(i, j);
- CHECK(v4f_x(k), 1.f);
- CHECK(v4f_y(k), 2.f);
- CHECK(v4f_z(k), 7.f);
- CHECK(v4f_w(k), 4.f);
+ CHK(v4f_x(k) == 1.f);
+ CHK(v4f_y(k) == 2.f);
+ CHK(v4f_z(k) == 7.f);
+ CHK(v4f_w(k) == 4.f);
k = v4f_xyzd(i, j);
- CHECK(v4f_x(k), 1.f);
- CHECK(v4f_y(k), 2.f);
- CHECK(v4f_z(k), 3.f);
- CHECK(v4f_w(k), 8.f);
+ CHK(v4f_x(k) == 1.f);
+ CHK(v4f_y(k) == 2.f);
+ CHK(v4f_z(k) == 3.f);
+ CHK(v4f_w(k) == 8.f);
k = v4f_048C(v4f_set1(1.f), v4f_set1(2.f), v4f_set1(3.f), v4f_set1(4.f));
- CHECK(v4f_x(k), 1.f);
- CHECK(v4f_y(k), 2.f);
- CHECK(v4f_z(k), 3.f);
- CHECK(v4f_w(k), 4.f);
+ CHK(v4f_x(k) == 1.f);
+ CHK(v4f_y(k) == 2.f);
+ CHK(v4f_z(k) == 3.f);
+ CHK(v4f_w(k) == 4.f);
i = v4f_set(-1.f, 2.f, -3.f, 4.f);
j = v4f_minus(i);
- CHECK(v4f_x(j), 1.f);
- CHECK(v4f_y(j), -2.f);
- CHECK(v4f_z(j), 3.f);
- CHECK(v4f_w(j), -4.f);
+ CHK(v4f_x(j) == 1.f);
+ CHK(v4f_y(j) == -2.f);
+ CHK(v4f_z(j) == 3.f);
+ CHK(v4f_w(j) == -4.f);
k = v4f_add(i, j);
- CHECK(v4f_x(k), 0.f);
- CHECK(v4f_y(k), 0.f);
- CHECK(v4f_z(k), 0.f);
- CHECK(v4f_w(k), 0.f);
+ CHK(v4f_x(k) == 0.f);
+ CHK(v4f_y(k) == 0.f);
+ CHK(v4f_z(k) == 0.f);
+ CHK(v4f_w(k) == 0.f);
k = v4f_sub(i, j);
- CHECK(v4f_x(k), -2.f);
- CHECK(v4f_y(k), 4.f);
- CHECK(v4f_z(k), -6.f);
- CHECK(v4f_w(k), 8.f);
+ CHK(v4f_x(k) == -2.f);
+ CHK(v4f_y(k) == 4.f);
+ CHK(v4f_z(k) == -6.f);
+ CHK(v4f_w(k) == 8.f);
k = v4f_mul(i, j);
- CHECK(v4f_x(k), -1.f);
- CHECK(v4f_y(k), -4.f);
- CHECK(v4f_z(k), -9.f);
- CHECK(v4f_w(k), -16.f);
+ CHK(v4f_x(k) == -1.f);
+ CHK(v4f_y(k) == -4.f);
+ CHK(v4f_z(k) == -9.f);
+ CHK(v4f_w(k) == -16.f);
k = v4f_div(k, i);
- CHECK(v4f_x(k), 1.f);
- CHECK(v4f_y(k), -2.f);
- CHECK(v4f_z(k), 3.f);
- CHECK(v4f_w(k), -4.f);
+ CHK(v4f_x(k) == 1.f);
+ CHK(v4f_y(k) == -2.f);
+ CHK(v4f_z(k) == 3.f);
+ CHK(v4f_w(k) == -4.f);
k = v4f_madd(i, j, k);
- CHECK(v4f_x(k), 0.f);
- CHECK(v4f_y(k), -6.f);
- CHECK(v4f_z(k), -6.f);
- CHECK(v4f_w(k), -20.f);
+ CHK(v4f_x(k) == 0.f);
+ CHK(v4f_y(k) == -6.f);
+ CHK(v4f_z(k) == -6.f);
+ CHK(v4f_w(k) == -20.f);
k = v4f_abs(i);
- CHECK(v4f_x(k), 1.f);
- CHECK(v4f_y(k), 2.f);
- CHECK(v4f_z(k), 3.f);
- CHECK(v4f_w(k), 4.f);
+ CHK(v4f_x(k) == 1.f);
+ CHK(v4f_y(k) == 2.f);
+ CHK(v4f_z(k) == 3.f);
+ CHK(v4f_w(k) == 4.f);
i = v4f_set(4.f, 9.f, 16.f, 25.f);
k = v4f_sqrt(i);
- CHECK(v4f_x(k), 2.f);
- CHECK(v4f_y(k), 3.f);
- CHECK(v4f_z(k), 4.f);
- CHECK(v4f_w(k), 5.f);
+ CHK(v4f_x(k) == 2.f);
+ CHK(v4f_y(k) == 3.f);
+ CHK(v4f_z(k) == 4.f);
+ CHK(v4f_w(k) == 5.f);
k = v4f_rsqrte(i);
- CHECK(eq_eps(v4f_x(k), 1.f/2.f, 1.e-3f), 1);
- CHECK(eq_eps(v4f_y(k), 1.f/3.f, 1.e-3f), 1);
- CHECK(eq_eps(v4f_z(k), 1.f/4.f, 1.e-3f), 1);
- CHECK(eq_eps(v4f_w(k), 1.f/5.f, 1.e-3f), 1);
+ CHK(eq_eps(v4f_x(k), 1.f/2.f, 1.e-3f) == 1);
+ CHK(eq_eps(v4f_y(k), 1.f/3.f, 1.e-3f) == 1);
+ CHK(eq_eps(v4f_z(k), 1.f/4.f, 1.e-3f) == 1);
+ CHK(eq_eps(v4f_w(k), 1.f/5.f, 1.e-3f) == 1);
k = v4f_rsqrt(i);
- CHECK(eq_eps(v4f_x(k), 1.f/2.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), 1.f/3.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(k), 1.f/4.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(k), 1.f/5.f, 1.e-6f), 1);
+ CHK(eq_eps(v4f_x(k), 1.f/2.f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_y(k), 1.f/3.f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_z(k), 1.f/4.f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_w(k), 1.f/5.f, 1.e-6f) == 1);
k = v4f_rcpe(i);
- CHECK(eq_eps(v4f_x(k), 1.f/4.f, 1.e-3f), 1);
- CHECK(eq_eps(v4f_y(k), 1.f/9.f, 1.e-3f), 1);
- CHECK(eq_eps(v4f_z(k), 1.f/16.f, 1.e-3f), 1);
- CHECK(eq_eps(v4f_w(k), 1.f/25.f, 1.e-3f), 1);
+ CHK(eq_eps(v4f_x(k), 1.f/4.f, 1.e-3f) == 1);
+ CHK(eq_eps(v4f_y(k), 1.f/9.f, 1.e-3f) == 1);
+ CHK(eq_eps(v4f_z(k), 1.f/16.f, 1.e-3f) == 1);
+ CHK(eq_eps(v4f_w(k), 1.f/25.f, 1.e-3f) == 1);
k = v4f_rcp(i);
- CHECK(eq_eps(v4f_x(k), 1.f/4.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), 1.f/9.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(k), 1.f/16.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(k), 1.f/25.f, 1.e-6f), 1);
+ CHK(eq_eps(v4f_x(k), 1.f/4.f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_y(k), 1.f/9.f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_z(k), 1.f/16.f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_w(k), 1.f/25.f, 1.e-6f) == 1);
i = v4f_set(0.f, 1.f, 2.f, 4.f);
j = v4f_set(1.f, 2.f, -1.f, 1.f);
k = v4f_lerp(i, j, v4f_set1(0.5f));
- CHECK(v4f_x(k), 0.5f);
- CHECK(v4f_y(k), 1.5f);
- CHECK(v4f_z(k), 0.5f);
- CHECK(v4f_w(k), 2.5f);
+ CHK(v4f_x(k) == 0.5f);
+ CHK(v4f_y(k) == 1.5f);
+ CHK(v4f_z(k) == 0.5f);
+ CHK(v4f_w(k) == 2.5f);
k = v4f_sum(j);
- CHECK(v4f_x(k), 3.f);
- CHECK(v4f_y(k), 3.f);
- CHECK(v4f_z(k), 3.f);
- CHECK(v4f_w(k), 3.f);
+ CHK(v4f_x(k) == 3.f);
+ CHK(v4f_y(k) == 3.f);
+ CHK(v4f_z(k) == 3.f);
+ CHK(v4f_w(k) == 3.f);
k = v4f_dot(i, j);
- CHECK(v4f_x(k), 4.f);
- CHECK(v4f_y(k), 4.f);
- CHECK(v4f_z(k), 4.f);
- CHECK(v4f_w(k), 4.f);
+ CHK(v4f_x(k) == 4.f);
+ CHK(v4f_y(k) == 4.f);
+ CHK(v4f_z(k) == 4.f);
+ CHK(v4f_w(k) == 4.f);
k = v4f_len(i);
- CHECK(eq_eps(v4f_x(k), (float)sqrt(21.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), (float)sqrt(21.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(k), (float)sqrt(21.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(k), (float)sqrt(21.0), 1.e-6f), 1);
+ CHK(eq_eps(v4f_x(k), (float)sqrt(21.0), 1.e-6f) == 1);
+ CHK(eq_eps(v4f_y(k), (float)sqrt(21.0), 1.e-6f) == 1);
+ CHK(eq_eps(v4f_z(k), (float)sqrt(21.0), 1.e-6f) == 1);
+ CHK(eq_eps(v4f_w(k), (float)sqrt(21.0), 1.e-6f) == 1);
i = v4f_set(0.f, 4.f, 2.f, 3.f);
k = v4f_normalize(i);
- CHECK(eq_eps(v4f_x(k), 0.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), 0.742781353f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(k), 0.371390676f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(k), 0.557086014f, 1.e-6f), 1);
+ CHK(eq_eps(v4f_x(k), 0.f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_y(k), 0.742781353f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_z(k), 0.371390676f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_w(k), 0.557086014f, 1.e-6f) == 1);
i = v4f_set(1.f, 4.f, 2.f, 3.f);
k = v4f_sum2(i);
- CHECK(v4f_x(k), 5.f);
- CHECK(v4f_y(k), 5.f);
- CHECK(v4f_z(k), 5.f);
- CHECK(v4f_w(k), 5.f);
+ CHK(v4f_x(k) == 5.f);
+ CHK(v4f_y(k) == 5.f);
+ CHK(v4f_z(k) == 5.f);
+ CHK(v4f_w(k) == 5.f);
j = v4f_set(2.f, 3.f, 5.f, 1.f);
k = v4f_dot2(i, j);
- CHECK(v4f_x(k), 14.f);
- CHECK(v4f_y(k), 14.f);
- CHECK(v4f_z(k), 14.f);
- CHECK(v4f_w(k), 14.f);
+ CHK(v4f_x(k) == 14.f);
+ CHK(v4f_y(k) == 14.f);
+ CHK(v4f_z(k) == 14.f);
+ CHK(v4f_w(k) == 14.f);
k = v4f_len2(i);
- CHECK(eq_eps(v4f_x(k), (float)sqrt(17.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), (float)sqrt(17.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(k), (float)sqrt(17.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(k), (float)sqrt(17.0), 1.e-6f), 1);
+ CHK(eq_eps(v4f_x(k), (float)sqrt(17.0), 1.e-6f) == 1);
+ CHK(eq_eps(v4f_y(k), (float)sqrt(17.0), 1.e-6f) == 1);
+ CHK(eq_eps(v4f_z(k), (float)sqrt(17.0), 1.e-6f) == 1);
+ CHK(eq_eps(v4f_w(k), (float)sqrt(17.0), 1.e-6f) == 1);
i = v4f_set(1.f, -2.f, 2.f, 5.f);
j = v4f_set(3.f, 1.f, 1.f, 5.f);
k = v4f_cross2(i, j);
- CHECK(v4f_x(k), 7.f);
- CHECK(v4f_y(k), 7.f);
- CHECK(v4f_z(k), 7.f);
- CHECK(v4f_w(k), 7.f);
+ CHK(v4f_x(k) == 7.f);
+ CHK(v4f_y(k) == 7.f);
+ CHK(v4f_z(k) == 7.f);
+ CHK(v4f_w(k) == 7.f);
k = v4f_cross2(j, i);
- CHECK(v4f_x(k), -7.f);
- CHECK(v4f_y(k), -7.f);
- CHECK(v4f_z(k), -7.f);
- CHECK(v4f_w(k), -7.f);
+ CHK(v4f_x(k) == -7.f);
+ CHK(v4f_y(k) == -7.f);
+ CHK(v4f_z(k) == -7.f);
+ CHK(v4f_w(k) == -7.f);
i = v4f_set(0.f, 4.f, 5.f, 7.f);
k = v4f_normalize2(i);
- CHECK(eq_eps(v4f_x(k), 0.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), 1.f, 1.e-6f), 1);
+ CHK(eq_eps(v4f_x(k), 0.f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_y(k), 1.f, 1.e-6f) == 1);
k = v4f_sum3(i);
- CHECK(v4f_x(k), 9.f);
- CHECK(v4f_y(k), 9.f);
- CHECK(v4f_z(k), 9.f);
- CHECK(v4f_w(k), 9.f);
+ CHK(v4f_x(k) == 9.f);
+ CHK(v4f_y(k) == 9.f);
+ CHK(v4f_z(k) == 9.f);
+ CHK(v4f_w(k) == 9.f);
i = v4f_set(2.f, 3.f, 2.f, 4.f);
j = v4f_set(0.f, 4.f, 2.f, 19.f);
k = v4f_dot3(i, j);
- CHECK(v4f_x(k), 16.f);
- CHECK(v4f_y(k), 16.f);
- CHECK(v4f_z(k), 16.f);
- CHECK(v4f_w(k), 16.f);
+ CHK(v4f_x(k) == 16.f);
+ CHK(v4f_y(k) == 16.f);
+ CHK(v4f_z(k) == 16.f);
+ CHK(v4f_w(k) == 16.f);
k = v4f_len3(j);
- CHECK(eq_eps(v4f_x(k), (float)sqrt(20.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), (float)sqrt(20.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(k), (float)sqrt(20.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(k), (float)sqrt(20.0), 1.e-6f), 1);
+ CHK(eq_eps(v4f_x(k), (float)sqrt(20.0), 1.e-6f) == 1);
+ CHK(eq_eps(v4f_y(k), (float)sqrt(20.0), 1.e-6f) == 1);
+ CHK(eq_eps(v4f_z(k), (float)sqrt(20.0), 1.e-6f) == 1);
+ CHK(eq_eps(v4f_w(k), (float)sqrt(20.0), 1.e-6f) == 1);
k = v4f_normalize3(j);
- CHECK(eq_eps(v4f_x(k), 0.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), 0.8944271910f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(k), 0.4472135995f, 1.e-6f), 1);
+ CHK(eq_eps(v4f_x(k), 0.f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_y(k), 0.8944271910f, 1.e-6f) == 1);
+ CHK(eq_eps(v4f_z(k), 0.4472135995f, 1.e-6f) == 1);
i = v4f_set(1.f, -2.f, 2.f, 4.f);
j = v4f_set(3.f, 1.f, -1.5f, 2.f);
k = v4f_cross3(i, j);
- CHECK(v4f_x(k), 1.f);
- CHECK(v4f_y(k), 7.5f);
- CHECK(v4f_z(k), 7.f);
-
- i = v4f_set((float)PI/2.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
- k = v4f_cos(i);
- CHECK(eq_eps(v4f_x(k), (float)cos(PI/2.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), (float)cos(PI/3.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(k), (float)cos(PI/4.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(k), (float)cos(PI/6.0), 1.e-6f), 1);
-
- k = v4f_sin(i);
- CHECK(eq_eps(v4f_x(k), (float)sin(PI/2.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), (float)sin(PI/3.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(k), (float)sin(PI/4.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(k), (float)sin(PI/6.0), 1.e-6f), 1);
-
- v4f_sincos(i, &k, &j);
- CHECK(eq_eps(v4f_x(k), (float)sin(PI/2.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), (float)sin(PI/3.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(k), (float)sin(PI/4.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(k), (float)sin(PI/6.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_x(j), (float)cos(PI/2.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(j), (float)cos(PI/3.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(j), (float)cos(PI/4.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(j), (float)cos(PI/6.0), 1.e-6f), 1);
-
- i = v4f_set((float)PI/8.f, (float)PI/3.f, (float)PI/4.f, (float)PI/6.f);
- k = v4f_tan(i);
- CHECK(eq_eps(v4f_x(k), (float)tan(PI/8.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), (float)tan(PI/3.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(k), (float)tan(PI/4.0), 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(k), (float)tan(PI/6.0), 1.e-6f), 1);
-
- k = v4f_acos(v4f_cos(i));
- CHECK(eq_eps(v4f_x(k), PI/8.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), PI/3.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(k), PI/4.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(k), PI/6.f, 1.e-6f), 1);
-
- k = v4f_asin(v4f_sin(i));
- CHECK(eq_eps(v4f_x(k), PI/8.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), PI/3.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(k), PI/4.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(k), PI/6.f, 1.e-6f), 1);
-
- k = v4f_atan(v4f_tan(i));
- CHECK(eq_eps(v4f_x(k), PI/8.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_y(k), PI/3.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_z(k), PI/4.f, 1.e-6f), 1);
- CHECK(eq_eps(v4f_w(k), PI/6.f, 1.e-6f), 1);
+ CHK(v4f_x(k) == 1.f);
+ CHK(v4f_y(k) == 7.5f);
+ CHK(v4f_z(k) == 7.f);
i = v4f_set(1.f, 2.f, 3.f, 4.f);
j = v4f_set(-2.f, -4.f, 3.f, 6.f);
k = v4f_eq(i, j);
- cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0x00000000);
- cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0x00000000);
- cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0xFFFFFFFF);
- cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0x00000000);
+ cast.f[0] = v4f_x(k); CHK(cast.i[0] == (int32_t)0x00000000);
+ cast.f[1] = v4f_y(k); CHK(cast.i[1] == (int32_t)0x00000000);
+ cast.f[2] = v4f_z(k); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
+ cast.f[3] = v4f_w(k); CHK(cast.i[3] == (int32_t)0x00000000);
k = v4f_neq(i, j);
- cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0xFFFFFFFF);
- cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0xFFFFFFFF);
- cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0x00000000);
- cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0xFFFFFFFF);
+ cast.f[0] = v4f_x(k); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
+ cast.f[1] = v4f_y(k); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
+ cast.f[2] = v4f_z(k); CHK(cast.i[2] == (int32_t)0x00000000);
+ cast.f[3] = v4f_w(k); CHK(cast.i[3] == (int32_t)0xFFFFFFFF);
k = v4f_gt(i, j);
- cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0xFFFFFFFF);
- cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0xFFFFFFFF);
- cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0x00000000);
- cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0x00000000);
+ cast.f[0] = v4f_x(k); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
+ cast.f[1] = v4f_y(k); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
+ cast.f[2] = v4f_z(k); CHK(cast.i[2] == (int32_t)0x00000000);
+ cast.f[3] = v4f_w(k); CHK(cast.i[3] == (int32_t)0x00000000);
k = v4f_lt(i, j);
- cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0x00000000);
- cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0x00000000);
- cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0x00000000);
- cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0xFFFFFFFF);
+ cast.f[0] = v4f_x(k); CHK(cast.i[0] == (int32_t)0x00000000);
+ cast.f[1] = v4f_y(k); CHK(cast.i[1] == (int32_t)0x00000000);
+ cast.f[2] = v4f_z(k); CHK(cast.i[2] == (int32_t)0x00000000);
+ cast.f[3] = v4f_w(k); CHK(cast.i[3] == (int32_t)0xFFFFFFFF);
k = v4f_ge(i, j);
- cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0xFFFFFFFF);
- cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0xFFFFFFFF);
- cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0xFFFFFFFF);
- cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0x00000000);
+ cast.f[0] = v4f_x(k); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
+ cast.f[1] = v4f_y(k); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
+ cast.f[2] = v4f_z(k); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
+ cast.f[3] = v4f_w(k); CHK(cast.i[3] == (int32_t)0x00000000);
k = v4f_le(i, j);
- cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0x00000000);
- cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0x00000000);
- cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0xFFFFFFFF);
- cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0xFFFFFFFF);
+ cast.f[0] = v4f_x(k); CHK(cast.i[0] == (int32_t)0x00000000);
+ cast.f[1] = v4f_y(k); CHK(cast.i[1] == (int32_t)0x00000000);
+ cast.f[2] = v4f_z(k); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
+ cast.f[3] = v4f_w(k); CHK(cast.i[3] == (int32_t)0xFFFFFFFF);
i = v4f_set(1.01f, 2.01f, 3.02f, 0.02f);
j = v4f_set(1.f, 2.f, 3.f, 0.f);
k = v4f_set(0.f, 0.01f, 0.02f, 0.f);
k = v4f_eq_eps(i, j, k);
- cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0x00000000);
- cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0xFFFFFFFF);
- cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0xFFFFFFFF);
- cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0x00000000);
+ cast.f[0] = v4f_x(k); CHK(cast.i[0] == (int32_t)0x00000000);
+ cast.f[1] = v4f_y(k); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
+ cast.f[2] = v4f_z(k); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
+ cast.f[3] = v4f_w(k); CHK(cast.i[3] == (int32_t)0x00000000);
i = v4f_set(1.f, 2.f, 3.f, 4.f);
j = v4f_set(-2.f, -4.f, 3.f, 6.f);
k = v4f_min(i, j);
- CHECK(v4f_x(k), -2.f);
- CHECK(v4f_y(k), -4.f);
- CHECK(v4f_z(k), 3.f);
- CHECK(v4f_w(k), 4.f);
+ CHK(v4f_x(k) == -2.f);
+ CHK(v4f_y(k) == -4.f);
+ CHK(v4f_z(k) == 3.f);
+ CHK(v4f_w(k) == 4.f);
k = v4f_max(i, j);
- CHECK(v4f_x(k), 1.f);
- CHECK(v4f_y(k), 2.f);
- CHECK(v4f_z(k), 3.f);
- CHECK(v4f_w(k), 6.f);
+ CHK(v4f_x(k) == 1.f);
+ CHK(v4f_y(k) == 2.f);
+ CHK(v4f_z(k) == 3.f);
+ CHK(v4f_w(k) == 6.f);
k = v4f_reduce_min(i);
- CHECK(v4f_x(k), 1.f);
- CHECK(v4f_y(k), 1.f);
- CHECK(v4f_z(k), 1.f);
- CHECK(v4f_w(k), 1.f);
+ CHK(v4f_x(k) == 1.f);
+ CHK(v4f_y(k) == 1.f);
+ CHK(v4f_z(k) == 1.f);
+ CHK(v4f_w(k) == 1.f);
k = v4f_reduce_min(j);
- CHECK(v4f_x(k), -4.f);
- CHECK(v4f_y(k), -4.f);
- CHECK(v4f_z(k), -4.f);
- CHECK(v4f_w(k), -4.f);
+ CHK(v4f_x(k) == -4.f);
+ CHK(v4f_y(k) == -4.f);
+ CHK(v4f_z(k) == -4.f);
+ CHK(v4f_w(k) == -4.f);
k = v4f_reduce_max(i);
- CHECK(v4f_x(k), 4.f);
- CHECK(v4f_y(k), 4.f);
- CHECK(v4f_z(k), 4.f);
- CHECK(v4f_w(k), 4.f);
+ CHK(v4f_x(k) == 4.f);
+ CHK(v4f_y(k) == 4.f);
+ CHK(v4f_z(k) == 4.f);
+ CHK(v4f_w(k) == 4.f);
k = v4f_reduce_max(j);
- CHECK(v4f_x(k), 6.f);
- CHECK(v4f_y(k), 6.f);
- CHECK(v4f_z(k), 6.f);
- CHECK(v4f_w(k), 6.f);
+ CHK(v4f_x(k) == 6.f);
+ CHK(v4f_y(k) == 6.f);
+ CHK(v4f_z(k) == 6.f);
+ CHK(v4f_w(k) == 6.f);
k = v4f_clamp(i, v4f_set(0.f, 0.f, -1.f, 3.f), v4f_set(1.f, 3.f, 2.f, 3.1f));
- CHECK(v4f_x(k), 1.f);
- CHECK(v4f_y(k), 2.f);
- CHECK(v4f_z(k), 2.f);
- CHECK(v4f_w(k), 3.1f);
+ CHK(v4f_x(k) == 1.f);
+ CHK(v4f_y(k) == 2.f);
+ CHK(v4f_z(k) == 2.f);
+ CHK(v4f_w(k) == 3.1f);
l = v4f_to_v4i(j);
- CHECK(v4i_x(l), -2);
- CHECK(v4i_y(l), -4);
- CHECK(v4i_z(l), 3);
- CHECK(v4i_w(l), 6);
+ CHK(v4i_x(l) == -2);
+ CHK(v4i_y(l) == -4);
+ CHK(v4i_z(l) == 3);
+ CHK(v4i_w(l) == 6);
k = v4i_to_v4f(l);
- CHECK(v4f_x(k), -2.f);
- CHECK(v4f_y(k), -4.f);
- CHECK(v4f_z(k), 3.f);
- CHECK(v4f_w(k), 6.f);
+ CHK(v4f_x(k) == -2.f);
+ CHK(v4f_y(k) == -4.f);
+ CHK(v4f_z(k) == 3.f);
+ CHK(v4f_w(k) == 6.f);
i = v4f_set(1.5f, 2.51f, 3.2f, 4.35f);
l = v4f_to_v4i(i);
- CHECK(v4i_x(l), 2);
- CHECK(v4i_y(l), 3);
- CHECK(v4i_z(l), 3);
- CHECK(v4i_w(l), 4);
+ CHK(v4i_x(l) == 2);
+ CHK(v4i_y(l) == 3);
+ CHK(v4i_z(l) == 3);
+ CHK(v4i_w(l) == 4);
l = v4f_trunk_v4i(i);
- CHECK(v4i_x(l), 1);
- CHECK(v4i_y(l), 2);
- CHECK(v4i_z(l), 3);
- CHECK(v4i_w(l), 4);
+ CHK(v4i_x(l) == 1);
+ CHK(v4i_y(l) == 2);
+ CHK(v4i_z(l) == 3);
+ CHK(v4i_w(l) == 4);
cast.f[0] = 1.f;
cast.f[1] = 2.f;
@@ -549,121 +502,121 @@ main(int argc, char** argv)
i = v4f_set(cast.f[0], cast.f[1], cast.f[2], cast.f[3]);
l = v4f_rcast_v4i(i);
- CHECK(v4i_x(l), cast.i[0]);
- CHECK(v4i_y(l), cast.i[1]);
- CHECK(v4i_z(l), cast.i[2]);
- CHECK(v4i_w(l), cast.i[3]);
+ CHK(v4i_x(l) == cast.i[0]);
+ CHK(v4i_y(l) == cast.i[1]);
+ CHK(v4i_z(l) == cast.i[2]);
+ CHK(v4i_w(l) == cast.i[3]);
i = v4i_rcast_v4f(l);
- CHECK(v4f_x(i), cast.f[0]);
- CHECK(v4f_y(i), cast.f[1]);
- CHECK(v4f_z(i), cast.f[2]);
- CHECK(v4f_w(i), cast.f[3]);
+ CHK(v4f_x(i) == cast.f[0]);
+ CHK(v4f_y(i) == cast.f[1]);
+ CHK(v4f_z(i) == cast.f[2]);
+ CHK(v4f_w(i) == cast.f[3]);
k = v4f_xxxx(j);
- CHECK(v4f_x(k), -2.f);
- CHECK(v4f_y(k), -2.f);
- CHECK(v4f_z(k), -2.f);
- CHECK(v4f_w(k), -2.f);
+ CHK(v4f_x(k) == -2.f);
+ CHK(v4f_y(k) == -2.f);
+ CHK(v4f_z(k) == -2.f);
+ CHK(v4f_w(k) == -2.f);
k = v4f_yyxx(j);
- CHECK(v4f_x(k), -4.f);
- CHECK(v4f_y(k), -4.f);
- CHECK(v4f_z(k), -2.f);
- CHECK(v4f_w(k), -2.f);
+ CHK(v4f_x(k) == -4.f);
+ CHK(v4f_y(k) == -4.f);
+ CHK(v4f_z(k) == -2.f);
+ CHK(v4f_w(k) == -2.f);
k = v4f_wwxy(j);
- CHECK(v4f_x(k), 6.f);
- CHECK(v4f_y(k), 6.f);
- CHECK(v4f_z(k), -2.f);
- CHECK(v4f_w(k), -4.f);
+ CHK(v4f_x(k) == 6.f);
+ CHK(v4f_y(k) == 6.f);
+ CHK(v4f_z(k) == -2.f);
+ CHK(v4f_w(k) == -4.f);
k = v4f_zyzy(j);
- CHECK(v4f_x(k), 3.f);
- CHECK(v4f_y(k), -4.f);
- CHECK(v4f_z(k), 3.f);
- CHECK(v4f_w(k), -4.f);
+ CHK(v4f_x(k) == 3.f);
+ CHK(v4f_y(k) == -4.f);
+ CHK(v4f_z(k) == 3.f);
+ CHK(v4f_w(k) == -4.f);
k = v4f_wyyz(j);
- CHECK(v4f_x(k), 6.f);
- CHECK(v4f_y(k), -4.f);
- CHECK(v4f_z(k), -4.f);
- CHECK(v4f_w(k), 3.f);
+ CHK(v4f_x(k) == 6.f);
+ CHK(v4f_y(k) == -4.f);
+ CHK(v4f_z(k) == -4.f);
+ CHK(v4f_w(k) == 3.f);
i = v4f_xyz_to_rthetaphi(v4f_set(10.f, 5.f, 3.f, 0.f));
- CHECK(eq_eps(v4f_x(i), 11.575836f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 1.308643f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), 0.463647f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 11.575836f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 1.308643f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), 0.463647f, 1.e-5f) == 1);
i = v4f_xyz_to_rthetaphi(v4f_set(8.56f, 7.234f, 33.587f, 0.f));
- CHECK(eq_eps(v4f_x(i), 35.407498f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 0.322063f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), 0.701638f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 35.407498f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 0.322063f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), 0.701638f, 1.e-5f) == 1);
i = v4f_xyz_to_rthetaphi(v4f_set(0.f, 0.f, 0.f, 0.f));
- CHECK(eq_eps(v4f_x(i), 0.f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 0.f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), 0.f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 0.f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 0.f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), 0.f, 1.e-5f) == 1);
i = v4f_xyz_to_rthetaphi(v4f_set(4.53f, 0.f, 0.f, 0.f));
- CHECK(eq_eps(v4f_x(i), 4.53f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 1.570796f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), 0.f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 4.53f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 1.570796f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), 0.f, 1.e-5f) == 1);
i = v4f_xyz_to_rthetaphi(v4f_set(0.f, 7.2f, 0.f, 0.f));
- CHECK(eq_eps(v4f_x(i), 7.2f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 1.570796f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), 1.570796f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 7.2f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 1.570796f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), 1.570796f, 1.e-5f) == 1);
i = v4f_xyz_to_rthetaphi(v4f_set(4.53f, 7.2f, 0.f, 0.f));
- CHECK(eq_eps(v4f_x(i), 8.506521f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 1.570796f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), 1.009206f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 8.506521f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 1.570796f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), 1.009206f, 1.e-5f) == 1);
i = v4f_xyz_to_rthetaphi(v4f_set(0.f, 0.f, 3.1f, 0.f));
- CHECK(eq_eps(v4f_x(i), 3.1f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 0.f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), 0.f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 3.1f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 0.f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), 0.f, 1.e-5f) == 1);
i = v4f_xyz_to_rthetaphi(v4f_set(4.53f, 0.f, 3.1f, 0.f));
- CHECK(eq_eps(v4f_x(i), 5.489162f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 0.970666f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), 0.f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 5.489162f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 0.970666f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), 0.f, 1.e-5f) == 1);
i = v4f_xyz_to_rthetaphi(v4f_set(0.f, 7.2f, 3.1f, 0.f));
- CHECK(eq_eps(v4f_x(i), 7.839005f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 1.164229f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), 1.570796f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 7.839005f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 1.164229f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), 1.570796f, 1.e-5f) == 1);
i = v4f_xyz_to_rthetaphi(v4f_set(4.53f, 7.2f, 3.1f, 0.f));
- CHECK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 1.221327f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), 1.009206f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 1.221327f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), 1.009206f, 1.e-5f) == 1);
i = v4f_xyz_to_rthetaphi(v4f_set(-4.53f, 7.2f, 3.1f, 0.f));
- CHECK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 1.221327f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), 2.132386f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 1.221327f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), 2.132386f, 1.e-5f) == 1);
i = v4f_xyz_to_rthetaphi(v4f_set(-4.53f, -7.2f, 3.1f, 0.f));
- CHECK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 1.221327f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), -2.132386f, 1.e-5f) ||
- eq_eps(v4f_z(i), 2*PI - 2.132386f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 1.221327f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), -2.132386f, 1.e-5f)
+ || eq_eps(v4f_z(i), 2*PI - 2.132386f, 1.e-5f));
i = v4f_xyz_to_rthetaphi(v4f_set(4.53f, -7.2f, 3.1f, 0.f));
- CHECK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 1.221327f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), -1.009206f, 1.e-5f) ||
- eq_eps(v4f_z(i), 2*PI - 1.009206f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 1.221327f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), -1.009206f, 1.e-5f)
+ || eq_eps(v4f_z(i), 2*PI - 1.009206f, 1.e-5f));
i = v4f_xyz_to_rthetaphi(v4f_set(4.53f, 7.2f, -3.1f, 0.f));
- CHECK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 1.920264f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), 1.009206f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 1.920264f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), 1.009206f, 1.e-5f) == 1);
i = v4f_xyz_to_rthetaphi(v4f_set(-4.53f, 7.2f, -3.1f, 0.f));
- CHECK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 1.920264f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), 2.132386f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 1.920264f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), 2.132386f, 1.e-5f) == 1);
i = v4f_xyz_to_rthetaphi(v4f_set(4.53f, -7.2f, -3.1f, 0.f));
- CHECK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 1.920264f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), -1.009206f, 1.e-5f) ||
- eq_eps(v4f_z(i), 2*PI - 1.009206f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 1.920264f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), -1.009206f, 1.e-5f)
+ || eq_eps(v4f_z(i), 2*PI - 1.009206f, 1.e-5f));
i = v4f_xyz_to_rthetaphi(v4f_set(-4.53f, -7.2f, -3.1f, 0.f));
- CHECK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_y(i), 1.920264f, 1.e-5f), 1);
- CHECK(eq_eps(v4f_z(i), -2.132386f, 1.e-5f) ||
- eq_eps(v4f_z(i), 2*PI - 2.132386f, 1.e-5f), 1);
+ CHK(eq_eps(v4f_x(i), 9.053778f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_y(i), 1.920264f, 1.e-5f) == 1);
+ CHK(eq_eps(v4f_z(i), -2.132386f, 1.e-5f)
+ || eq_eps(v4f_z(i), 2*PI - 2.132386f, 1.e-5f));
return 0;
}
diff --git a/src/test_v4i.c b/src/test_v4i.c
@@ -1,16 +1,16 @@
-/* Copyright (C) 2014-2016 Vincent Forest (vaplv@free.fr)
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
*
* The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
+ * it under the terms of the GNU General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The RSIMD library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public License
+ * You should have received a copy of the GNU General Public License
* along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
#include "rsimd.h"
@@ -23,180 +23,228 @@ main(int argc, char** argv)
(void)argc, (void)argv;
i = v4i_load(tmp);
- CHECK(v4i_x(i), 0);
- CHECK(v4i_y(i), 1);
- CHECK(v4i_z(i), 2);
- CHECK(v4i_w(i), 3);
+ CHK(v4i_x(i) == 0);
+ CHK(v4i_y(i) == 1);
+ CHK(v4i_z(i) == 2);
+ CHK(v4i_w(i) == 3);
tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0;
- CHECK(v4i_store(tmp, i), tmp);
- CHECK(tmp[0], 0);
- CHECK(tmp[1], 1);
- CHECK(tmp[2], 2);
- CHECK(tmp[3], 3);
+ CHK(v4i_store(tmp, i) == tmp);
+ CHK(tmp[0] == 0);
+ CHK(tmp[1] == 1);
+ CHK(tmp[2] == 2);
+ CHK(tmp[3] == 3);
i = v4i_set(1, 2, 3, 4);
- CHECK(v4i_x(i), 1);
- CHECK(v4i_y(i), 2);
- CHECK(v4i_z(i), 3);
- CHECK(v4i_w(i), 4);
+ CHK(v4i_x(i) == 1);
+ CHK(v4i_y(i) == 2);
+ CHK(v4i_z(i) == 3);
+ CHK(v4i_w(i) == 4);
i = v4i_set1(-1);
- CHECK(v4i_x(i), -1);
- CHECK(v4i_y(i), -1);
- CHECK(v4i_z(i), -1);
- CHECK(v4i_w(i), -1);
+ CHK(v4i_x(i) == -1);
+ CHK(v4i_y(i) == -1);
+ CHK(v4i_z(i) == -1);
+ CHK(v4i_w(i) == -1);
i = v4i_zero();
- CHECK(v4i_x(i), 0);
- CHECK(v4i_y(i), 0);
- CHECK(v4i_z(i), 0);
- CHECK(v4i_w(i), 0);
+ CHK(v4i_x(i) == 0);
+ CHK(v4i_y(i) == 0);
+ CHK(v4i_z(i) == 0);
+ CHK(v4i_w(i) == 0);
i = v4i_set(1, 2, 3, 4);
j = v4i_set(5, 6, 7, 8);
k = v4i_xayb(i, j);
- CHECK(v4i_x(k), 1);
- CHECK(v4i_y(k), 5);
- CHECK(v4i_z(k), 2);
- CHECK(v4i_w(k), 6);
+ CHK(v4i_x(k) == 1);
+ CHK(v4i_y(k) == 5);
+ CHK(v4i_z(k) == 2);
+ CHK(v4i_w(k) == 6);
k = v4i_zcwd(i, j);
- CHECK(v4i_x(k), 3);
- CHECK(v4i_y(k), 7);
- CHECK(v4i_z(k), 4);
- CHECK(v4i_w(k), 8);
+ CHK(v4i_x(k) == 3);
+ CHK(v4i_y(k) == 7);
+ CHK(v4i_z(k) == 4);
+ CHK(v4i_w(k) == 8);
i = v4i_set(0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F);
j = v4i_set(0x01020401, 0x70605040, 0x0F1F2F3F, 0x00000000);
k = v4i_or(i, j);
- CHECK(v4i_x(k), (int32_t)0x01030603);
- CHECK(v4i_y(k), (int32_t)0x74655647);
- CHECK(v4i_z(k), (int32_t)0x0F1F2F3F);
- CHECK(v4i_w(k), (int32_t)0x0C0D0E0F);
+ CHK(v4i_x(k) == (int32_t)0x01030603);
+ CHK(v4i_y(k) == (int32_t)0x74655647);
+ CHK(v4i_z(k) == (int32_t)0x0F1F2F3F);
+ CHK(v4i_w(k) == (int32_t)0x0C0D0E0F);
k = v4i_and(i, j);
- CHECK(v4i_x(k), (int32_t)0x00000001);
- CHECK(v4i_y(k), (int32_t)0x00000000);
- CHECK(v4i_z(k), (int32_t)0x08090A0B);
- CHECK(v4i_w(k), (int32_t)0x00000000);
+ CHK(v4i_x(k) == (int32_t)0x00000001);
+ CHK(v4i_y(k) == (int32_t)0x00000000);
+ CHK(v4i_z(k) == (int32_t)0x08090A0B);
+ CHK(v4i_w(k) == (int32_t)0x00000000);
k = v4i_andnot(i, j);
- CHECK(v4i_x(k), (int32_t)0x01020400);
- CHECK(v4i_y(k), (int32_t)0x70605040);
- CHECK(v4i_z(k), (int32_t)0x07162534);
- CHECK(v4i_w(k), (int32_t)0x00000000);
+ CHK(v4i_x(k) == (int32_t)0x01020400);
+ CHK(v4i_y(k) == (int32_t)0x70605040);
+ CHK(v4i_z(k) == (int32_t)0x07162534);
+ CHK(v4i_w(k) == (int32_t)0x00000000);
k = v4i_xor(i, j);
- CHECK(v4i_x(k), (int32_t)0x01030602);
- CHECK(v4i_y(k), (int32_t)0x74655647);
- CHECK(v4i_z(k), (int32_t)0x07162534);
- CHECK(v4i_w(k), (int32_t)0x0C0D0E0F);
+ CHK(v4i_x(k) == (int32_t)0x01030602);
+ CHK(v4i_y(k) == (int32_t)0x74655647);
+ CHK(v4i_z(k) == (int32_t)0x07162534);
+ CHK(v4i_w(k) == (int32_t)0x0C0D0E0F);
k = v4i_not(i);
- CHECK(v4i_x(k), (int32_t)0xFFFEFDFC);
- CHECK(v4i_y(k), (int32_t)0xFBFAF9F8);
- CHECK(v4i_z(k), (int32_t)0xF7F6F5F4);
- CHECK(v4i_w(k), (int32_t)0xF3F2F1F0);
+ CHK(v4i_x(k) == (int32_t)0xFFFEFDFC);
+ CHK(v4i_y(k) == (int32_t)0xFBFAF9F8);
+ CHK(v4i_z(k) == (int32_t)0xF7F6F5F4);
+ CHK(v4i_w(k) == (int32_t)0xF3F2F1F0);
i = v4i_set(32, 16, 8, 4);
k = v4i_rshift(i, 4);
- CHECK(v4i_x(k), 2);
- CHECK(v4i_y(k), 1);
- CHECK(v4i_z(k), 0);
- CHECK(v4i_w(k), 0);
-
+ CHK(v4i_x(k) == 2);
+ CHK(v4i_y(k) == 1);
+ CHK(v4i_z(k) == 0);
+ CHK(v4i_w(k) == 0);
+
k = v4i_rshift(i, 1);
- CHECK(v4i_x(k), 16);
- CHECK(v4i_y(k), 8);
- CHECK(v4i_z(k), 4);
- CHECK(v4i_w(k), 2);
+ CHK(v4i_x(k) == 16);
+ CHK(v4i_y(k) == 8);
+ CHK(v4i_z(k) == 4);
+ CHK(v4i_w(k) == 2);
k = v4i_lshift(i, 4);
- CHECK(v4i_x(k), 512);
- CHECK(v4i_y(k), 256);
- CHECK(v4i_z(k), 128);
- CHECK(v4i_w(k), 64);
+ CHK(v4i_x(k) == 512);
+ CHK(v4i_y(k) == 256);
+ CHK(v4i_z(k) == 128);
+ CHK(v4i_w(k) == 64);
i = v4i_set(1, 2, 3, 4);
j = v4i_set(-2, -4, 3, 6);
k = v4i_add(i, j);
- CHECK(v4i_x(k), -1);
- CHECK(v4i_y(k), -2);
- CHECK(v4i_z(k), 6);
- CHECK(v4i_w(k), 10);
+ CHK(v4i_x(k) == -1);
+ CHK(v4i_y(k) == -2);
+ CHK(v4i_z(k) == 6);
+ CHK(v4i_w(k) == 10);
k = v4i_sub(i, j);
- CHECK(v4i_x(k), 3);
- CHECK(v4i_y(k), 6);
- CHECK(v4i_z(k), 0);
- CHECK(v4i_w(k), -2);
+ CHK(v4i_x(k) == 3);
+ CHK(v4i_y(k) == 6);
+ CHK(v4i_z(k) == 0);
+ CHK(v4i_w(k) == -2);
+
+ k = v4i_minus(j);
+ CHK(v4i_x(k) == -v4i_x(j));
+ CHK(v4i_y(k) == -v4i_y(j));
+ CHK(v4i_z(k) == -v4i_z(j));
+ CHK(v4i_w(k) == -v4i_w(j));
k = v4i_eq(i, j);
- CHECK(v4i_x(k), (int32_t)0x00000000);
- CHECK(v4i_y(k), (int32_t)0x00000000);
- CHECK(v4i_z(k), (int32_t)0xFFFFFFFF);
- CHECK(v4i_w(k), (int32_t)0x00000000);
+ CHK(v4i_x(k) == (int32_t)0x00000000);
+ CHK(v4i_y(k) == (int32_t)0x00000000);
+ CHK(v4i_z(k) == (int32_t)0xFFFFFFFF);
+ CHK(v4i_w(k) == (int32_t)0x00000000);
k = v4i_neq(i, j);
- CHECK(v4i_x(k), (int32_t)0xFFFFFFFF);
- CHECK(v4i_y(k), (int32_t)0xFFFFFFFF);
- CHECK(v4i_z(k), (int32_t)0x00000000);
- CHECK(v4i_w(k), (int32_t)0xFFFFFFFF);
+ CHK(v4i_x(k) == (int32_t)0xFFFFFFFF);
+ CHK(v4i_y(k) == (int32_t)0xFFFFFFFF);
+ CHK(v4i_z(k) == (int32_t)0x00000000);
+ CHK(v4i_w(k) == (int32_t)0xFFFFFFFF);
k = v4i_gt(i, j);
- CHECK(v4i_x(k), (int32_t)0xFFFFFFFF);
- CHECK(v4i_y(k), (int32_t)0xFFFFFFFF);
- CHECK(v4i_z(k), (int32_t)0x00000000);
- CHECK(v4i_w(k), (int32_t)0x00000000);
+ CHK(v4i_x(k) == (int32_t)0xFFFFFFFF);
+ CHK(v4i_y(k) == (int32_t)0xFFFFFFFF);
+ CHK(v4i_z(k) == (int32_t)0x00000000);
+ CHK(v4i_w(k) == (int32_t)0x00000000);
k = v4i_lt(i, j);
- CHECK(v4i_x(k), (int32_t)0x00000000);
- CHECK(v4i_y(k), (int32_t)0x00000000);
- CHECK(v4i_z(k), (int32_t)0x00000000);
- CHECK(v4i_w(k), (int32_t)0xFFFFFFFF);
+ CHK(v4i_x(k) == (int32_t)0x00000000);
+ CHK(v4i_y(k) == (int32_t)0x00000000);
+ CHK(v4i_z(k) == (int32_t)0x00000000);
+ CHK(v4i_w(k) == (int32_t)0xFFFFFFFF);
k = v4i_ge(i, j);
- CHECK(v4i_x(k), (int32_t)0xFFFFFFFF);
- CHECK(v4i_y(k), (int32_t)0xFFFFFFFF);
- CHECK(v4i_z(k), (int32_t)0xFFFFFFFF);
- CHECK(v4i_w(k), (int32_t)0x00000000);
+ CHK(v4i_x(k) == (int32_t)0xFFFFFFFF);
+ CHK(v4i_y(k) == (int32_t)0xFFFFFFFF);
+ CHK(v4i_z(k) == (int32_t)0xFFFFFFFF);
+ CHK(v4i_w(k) == (int32_t)0x00000000);
k = v4i_le(i, j);
- CHECK(v4i_x(k), (int32_t)0x00000000);
- CHECK(v4i_y(k), (int32_t)0x00000000);
- CHECK(v4i_z(k), (int32_t)0xFFFFFFFF);
- CHECK(v4i_w(k), (int32_t)0xFFFFFFFF);
+ CHK(v4i_x(k) == (int32_t)0x00000000);
+ CHK(v4i_y(k) == (int32_t)0x00000000);
+ CHK(v4i_z(k) == (int32_t)0xFFFFFFFF);
+ CHK(v4i_w(k) == (int32_t)0xFFFFFFFF);
k = v4i_sel(i, j, v4i_set(~0, 0, ~0, 0));
- CHECK(v4i_x(k), -2);
- CHECK(v4i_y(k), 2);
- CHECK(v4i_z(k), 3);
- CHECK(v4i_w(k), 4);
+ CHK(v4i_x(k) == -2);
+ CHK(v4i_y(k) == 2);
+ CHK(v4i_z(k) == 3);
+ CHK(v4i_w(k) == 4);
k = v4i_xxxx(i);
- CHECK(v4i_x(k), 1);
- CHECK(v4i_y(k), 1);
- CHECK(v4i_z(k), 1);
- CHECK(v4i_w(k), 1);
+ CHK(v4i_x(k) == 1);
+ CHK(v4i_y(k) == 1);
+ CHK(v4i_z(k) == 1);
+ CHK(v4i_w(k) == 1);
k = v4i_wwxy(i);
- CHECK(v4i_x(k), 4);
- CHECK(v4i_y(k), 4);
- CHECK(v4i_z(k), 1);
- CHECK(v4i_w(k), 2);
+ CHK(v4i_x(k) == 4);
+ CHK(v4i_y(k) == 4);
+ CHK(v4i_z(k) == 1);
+ CHK(v4i_w(k) == 2);
k = v4i_xyxy(i);
- CHECK(v4i_x(k), 1);
- CHECK(v4i_y(k), 2);
- CHECK(v4i_z(k), 1);
- CHECK(v4i_w(k), 2);
+ CHK(v4i_x(k) == 1);
+ CHK(v4i_y(k) == 2);
+ CHK(v4i_z(k) == 1);
+ CHK(v4i_w(k) == 2);
k = v4i_wyyz(i);
- CHECK(v4i_x(k), 4);
- CHECK(v4i_y(k), 2);
- CHECK(v4i_z(k), 2);
- CHECK(v4i_w(k), 3);
+ CHK(v4i_x(k) == 4);
+ CHK(v4i_y(k) == 2);
+ CHK(v4i_z(k) == 2);
+ CHK(v4i_w(k) == 3);
+
+ i = v4i_set(1, 2, 3, 4);
+ j = v4i_set(-2, -4, 3, 6);
+ k = v4i_min(i, j);
+ CHK(v4i_x(k) == -2);
+ CHK(v4i_y(k) == -4);
+ CHK(v4i_z(k) == 3);
+ CHK(v4i_w(k) == 4);
+
+ k = v4i_max(i, j);
+ CHK(v4i_x(k) == 1);
+ CHK(v4i_y(k) == 2);
+ CHK(v4i_z(k) == 3);
+ CHK(v4i_w(k) == 6);
+
+ k = v4i_reduce_min(i);
+ CHK(v4i_x(k) == 1);
+ CHK(v4i_y(k) == 1);
+ CHK(v4i_z(k) == 1);
+ CHK(v4i_w(k) == 1);
+ CHK(v4i_reduce_min_i32(i) == 1);
+
+ k = v4i_reduce_min(j);
+ CHK(v4i_x(k) == -4);
+ CHK(v4i_y(k) == -4);
+ CHK(v4i_z(k) == -4);
+ CHK(v4i_w(k) == -4);
+ CHK(v4i_reduce_min_i32(j) == -4);
+
+ k = v4i_reduce_max(i);
+ CHK(v4i_x(k) == 4);
+ CHK(v4i_y(k) == 4);
+ CHK(v4i_z(k) == 4);
+ CHK(v4i_w(k) == 4);
+ CHK(v4i_reduce_max_i32(i) == 4);
+
+ k = v4i_reduce_max(j);
+ CHK(v4i_x(k) == 6);
+ CHK(v4i_y(k) == 6);
+ CHK(v4i_z(k) == 6);
+ CHK(v4i_w(k) == 6);
+ CHK(v4i_reduce_max_i32(j) == 6);
return 0;
}
diff --git a/src/test_v8f.c b/src/test_v8f.c
@@ -0,0 +1,450 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "rsimd.h"
+
+int
+main(int argc, char** argv)
+{
+ v8f_T i, j, k;
+ ALIGN(32) union { int32_t i[8]; float f[8]; } cast;
+ ALIGN(32) float tmp[9] = {0.f,1.f,2.f,3.f,4.f,5.f,6.f,7.f,8.f};
+ (void)argc, (void)argv;
+
+ i = v8f_loadu(tmp+1);
+ CHK(v4f_x(v8f_abcd(i)) == 1.f);
+ CHK(v4f_y(v8f_abcd(i)) == 2.f);
+ CHK(v4f_z(v8f_abcd(i)) == 3.f);
+ CHK(v4f_w(v8f_abcd(i)) == 4.f);
+ CHK(v4f_x(v8f_efgh(i)) == 5.f);
+ CHK(v4f_y(v8f_efgh(i)) == 6.f);
+ CHK(v4f_z(v8f_efgh(i)) == 7.f);
+ CHK(v4f_w(v8f_efgh(i)) == 8.f);
+
+ i = v8f_load(tmp);
+ CHK(v4f_x(v8f_abcd(i)) == 0.f);
+ CHK(v4f_y(v8f_abcd(i)) == 1.f);
+ CHK(v4f_z(v8f_abcd(i)) == 2.f);
+ CHK(v4f_w(v8f_abcd(i)) == 3.f);
+ CHK(v4f_x(v8f_efgh(i)) == 4.f);
+ CHK(v4f_y(v8f_efgh(i)) == 5.f);
+ CHK(v4f_z(v8f_efgh(i)) == 6.f);
+ CHK(v4f_w(v8f_efgh(i)) == 7.f);
+
+ tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0.f;
+ tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0.f;
+ CHK(v8f_store(tmp, i) == tmp);
+ CHK(tmp[0] == 0.f);
+ CHK(tmp[1] == 1.f);
+ CHK(tmp[2] == 2.f);
+ CHK(tmp[3] == 3.f);
+ CHK(tmp[4] == 4.f);
+ CHK(tmp[5] == 5.f);
+ CHK(tmp[6] == 6.f);
+ CHK(tmp[7] == 7.f);
+ CHK(tmp[8] == 8.f);
+
+ i = v8f_set1(-2.f);
+ CHK(v4f_x(v8f_abcd(i)) == -2.f);
+ CHK(v4f_y(v8f_abcd(i)) == -2.f);
+ CHK(v4f_z(v8f_abcd(i)) == -2.f);
+ CHK(v4f_w(v8f_abcd(i)) == -2.f);
+ CHK(v4f_x(v8f_efgh(i)) == -2.f);
+ CHK(v4f_y(v8f_efgh(i)) == -2.f);
+ CHK(v4f_z(v8f_efgh(i)) == -2.f);
+ CHK(v4f_w(v8f_efgh(i)) == -2.f);
+
+ i = v8f_set(0.f,1.f,2.f,3.f,4.f,5.f,6.f,7.f);
+ CHK(v4f_x(v8f_abcd(i)) == 0.f);
+ CHK(v4f_y(v8f_abcd(i)) == 1.f);
+ CHK(v4f_z(v8f_abcd(i)) == 2.f);
+ CHK(v4f_w(v8f_abcd(i)) == 3.f);
+ CHK(v4f_x(v8f_efgh(i)) == 4.f);
+ CHK(v4f_y(v8f_efgh(i)) == 5.f);
+ CHK(v4f_z(v8f_efgh(i)) == 6.f);
+ CHK(v4f_w(v8f_efgh(i)) == 7.f);
+
+ i = v8f_zero();
+ CHK(v4f_x(v8f_abcd(i)) == 0.f);
+ CHK(v4f_y(v8f_abcd(i)) == 0.f);
+ CHK(v4f_z(v8f_abcd(i)) == 0.f);
+ CHK(v4f_w(v8f_abcd(i)) == 0.f);
+ CHK(v4f_x(v8f_efgh(i)) == 0.f);
+ CHK(v4f_y(v8f_efgh(i)) == 0.f);
+ CHK(v4f_z(v8f_efgh(i)) == 0.f);
+ CHK(v4f_w(v8f_efgh(i)) == 0.f);
+
+ i = v8f_mask(~0,~0,0,0,0,~0,~0,0);
+ cast.f[0] = v4f_x(v8f_abcd(i)); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
+ cast.f[1] = v4f_y(v8f_abcd(i)); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
+ cast.f[2] = v4f_z(v8f_abcd(i)); CHK(cast.i[2] == (int32_t)0x00000000);
+ cast.f[3] = v4f_w(v8f_abcd(i)); CHK(cast.i[3] == (int32_t)0x00000000);
+ cast.f[4] = v4f_x(v8f_efgh(i)); CHK(cast.i[4] == (int32_t)0x00000000);
+ cast.f[5] = v4f_y(v8f_efgh(i)); CHK(cast.i[5] == (int32_t)0xFFFFFFFF);
+ cast.f[6] = v4f_z(v8f_efgh(i)); CHK(cast.i[6] == (int32_t)0xFFFFFFFF);
+ cast.f[7] = v4f_w(v8f_efgh(i)); CHK(cast.i[7] == (int32_t)0x00000000);
+
+ i = v8f_mask1(~0);
+ cast.f[0] = v4f_x(v8f_abcd(i)); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
+ cast.f[1] = v4f_y(v8f_abcd(i)); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
+ cast.f[2] = v4f_z(v8f_abcd(i)); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
+ cast.f[3] = v4f_w(v8f_abcd(i)); CHK(cast.i[3] == (int32_t)0xFFFFFFFF);
+ cast.f[4] = v4f_x(v8f_efgh(i)); CHK(cast.i[4] == (int32_t)0xFFFFFFFF);
+ cast.f[5] = v4f_y(v8f_efgh(i)); CHK(cast.i[5] == (int32_t)0xFFFFFFFF);
+ cast.f[6] = v4f_z(v8f_efgh(i)); CHK(cast.i[6] == (int32_t)0xFFFFFFFF);
+ cast.f[7] = v4f_w(v8f_efgh(i)); CHK(cast.i[7] == (int32_t)0xFFFFFFFF);
+
+ i = v8f_true();
+ cast.f[0] = v4f_x(v8f_abcd(i)); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
+ cast.f[1] = v4f_y(v8f_abcd(i)); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
+ cast.f[2] = v4f_z(v8f_abcd(i)); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
+ cast.f[3] = v4f_w(v8f_abcd(i)); CHK(cast.i[3] == (int32_t)0xFFFFFFFF);
+ cast.f[4] = v4f_x(v8f_efgh(i)); CHK(cast.i[4] == (int32_t)0xFFFFFFFF);
+ cast.f[5] = v4f_y(v8f_efgh(i)); CHK(cast.i[5] == (int32_t)0xFFFFFFFF);
+ cast.f[6] = v4f_z(v8f_efgh(i)); CHK(cast.i[6] == (int32_t)0xFFFFFFFF);
+ cast.f[7] = v4f_w(v8f_efgh(i)); CHK(cast.i[7] == (int32_t)0xFFFFFFFF);
+
+ i = v8f_false();
+ cast.f[0] = v4f_x(v8f_abcd(i)); CHK(cast.i[0] == (int32_t)0x00000000);
+ cast.f[1] = v4f_y(v8f_abcd(i)); CHK(cast.i[1] == (int32_t)0x00000000);
+ cast.f[2] = v4f_z(v8f_abcd(i)); CHK(cast.i[2] == (int32_t)0x00000000);
+ cast.f[3] = v4f_w(v8f_abcd(i)); CHK(cast.i[3] == (int32_t)0x00000000);
+ cast.f[4] = v4f_x(v8f_efgh(i)); CHK(cast.i[4] == (int32_t)0x00000000);
+ cast.f[5] = v4f_y(v8f_efgh(i)); CHK(cast.i[5] == (int32_t)0x00000000);
+ cast.f[6] = v4f_z(v8f_efgh(i)); CHK(cast.i[6] == (int32_t)0x00000000);
+ cast.f[7] = v4f_w(v8f_efgh(i)); CHK(cast.i[7] == (int32_t)0x00000000);
+
+ i = v8f_mask(~0,~0,0,0,0,~0,~0,0);
+ j = v8f_mask(~0,0,~0,0,0,~0,0,~0);
+ k = v8f_or(i, j);
+ cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
+ cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
+ cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
+ cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == (int32_t)0x00000000);
+ cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == (int32_t)0x00000000);
+ cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == (int32_t)0xFFFFFFFF);
+ cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] == (int32_t)0xFFFFFFFF);
+ cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] == (int32_t)0xFFFFFFFF);
+
+ k = v8f_and(i, j);
+ cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] == (int32_t)0xFFFFFFFF);
+ cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] == (int32_t)0x00000000);
+ cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] == (int32_t)0x00000000);
+ cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == (int32_t)0x00000000);
+ cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == (int32_t)0x00000000);
+ cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == (int32_t)0xFFFFFFFF);
+ cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] == (int32_t)0x00000000);
+ cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] == (int32_t)0x00000000);
+
+ k = v8f_andnot(i, j);
+ cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] == (int32_t)0x00000000);
+ cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] == (int32_t)0x00000000);
+ cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
+ cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == (int32_t)0x00000000);
+ cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == (int32_t)0x00000000);
+ cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == (int32_t)0x00000000);
+ cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] == (int32_t)0x00000000);
+ cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] == (int32_t)0xFFFFFFFF);
+
+ k = v8f_xor(i, j);
+ cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] == (int32_t)0x00000000);
+ cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] == (int32_t)0xFFFFFFFF);
+ cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] == (int32_t)0xFFFFFFFF);
+ cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == (int32_t)0x00000000);
+ cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == (int32_t)0x00000000);
+ cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == (int32_t)0x00000000);
+ cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] == (int32_t)0xFFFFFFFF);
+ cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] == (int32_t)0xFFFFFFFF);
+
+ CHK(v8f_movemask(k) == 0xC6);
+ i = v8f_mask
+ ((int32_t)0x01020401, (int32_t)0x80605040, (int32_t)0x7F1F2F3F, (int32_t)0,
+ (int32_t)0xF0000000, (int32_t)0xFFFFFFFF, (int32_t)0x7FFFFFFF, (int32_t)~0);
+ CHK(v8f_movemask(i) == 0xB2);
+
+ i = v8f_set(0.f,1.f,2.f,3.f,4.f,5.f,6.f,7.f);
+ j = v8f_set(8.f,9.f,10.f,11.f,12.f,13.f,14.f,15.f);
+ k = v8f_sel(i, j, v8f_mask(~0,~0,0,0,0,~0,~0,0));
+ CHK(v4f_x(v8f_abcd(k)) == 8.f);
+ CHK(v4f_y(v8f_abcd(k)) == 9.f);
+ CHK(v4f_z(v8f_abcd(k)) == 2.f);
+ CHK(v4f_w(v8f_abcd(k)) == 3.f);
+ CHK(v4f_x(v8f_efgh(k)) == 4.f);
+ CHK(v4f_y(v8f_efgh(k)) == 13.f);
+ CHK(v4f_z(v8f_efgh(k)) == 14.f);
+ CHK(v4f_w(v8f_efgh(k)) == 7.f);
+
+ k = v8f_minus(i);
+ CHK(v4f_x(v8f_abcd(k)) == -0.f);
+ CHK(v4f_y(v8f_abcd(k)) == -1.f);
+ CHK(v4f_z(v8f_abcd(k)) == -2.f);
+ CHK(v4f_w(v8f_abcd(k)) == -3.f);
+ CHK(v4f_x(v8f_efgh(k)) == -4.f);
+ CHK(v4f_y(v8f_efgh(k)) == -5.f);
+ CHK(v4f_z(v8f_efgh(k)) == -6.f);
+ CHK(v4f_w(v8f_efgh(k)) == -7.f);
+
+ k = v8f_add(i, j);
+ CHK(v4f_x(v8f_abcd(k)) == 8.f);
+ CHK(v4f_y(v8f_abcd(k)) == 10.f);
+ CHK(v4f_z(v8f_abcd(k)) == 12.f);
+ CHK(v4f_w(v8f_abcd(k)) == 14.f);
+ CHK(v4f_x(v8f_efgh(k)) == 16.f);
+ CHK(v4f_y(v8f_efgh(k)) == 18.f);
+ CHK(v4f_z(v8f_efgh(k)) == 20.f);
+ CHK(v4f_w(v8f_efgh(k)) == 22.f);
+
+ k = v8f_sub(i, j);
+ CHK(v4f_x(v8f_abcd(k)) == -8.f);
+ CHK(v4f_y(v8f_abcd(k)) == -8.f);
+ CHK(v4f_z(v8f_abcd(k)) == -8.f);
+ CHK(v4f_w(v8f_abcd(k)) == -8.f);
+ CHK(v4f_x(v8f_efgh(k)) == -8.f);
+ CHK(v4f_y(v8f_efgh(k)) == -8.f);
+ CHK(v4f_z(v8f_efgh(k)) == -8.f);
+ CHK(v4f_w(v8f_efgh(k)) == -8.f);
+
+ k = v8f_mul(i, j);
+ CHK(v4f_x(v8f_abcd(k)) == 0.f);
+ CHK(v4f_y(v8f_abcd(k)) == 9.f);
+ CHK(v4f_z(v8f_abcd(k)) == 20.f);
+ CHK(v4f_w(v8f_abcd(k)) == 33.f);
+ CHK(v4f_x(v8f_efgh(k)) == 48.f);
+ CHK(v4f_y(v8f_efgh(k)) == 65.f);
+ CHK(v4f_z(v8f_efgh(k)) == 84.f);
+ CHK(v4f_w(v8f_efgh(k)) == 105.f);
+
+ k = v8f_div(i, j);
+ CHK(v4f_x(v8f_abcd(k)) == 0.f);
+ CHK(v4f_y(v8f_abcd(k)) == 1.f/9.f);
+ CHK(v4f_z(v8f_abcd(k)) == 0.2f);
+ CHK(v4f_w(v8f_abcd(k)) == 3.f/11.f);
+ CHK(v4f_x(v8f_efgh(k)) == 1.f/3.f);
+ CHK(v4f_y(v8f_efgh(k)) == 5.f/13.f);
+ CHK(v4f_z(v8f_efgh(k)) == 3.f/7.f);
+ CHK(v4f_w(v8f_efgh(k)) == 7.f/15.f);
+
+ k = v8f_set(0.1f,0.2f,0.3f,0.4f,0.5f,0.6f,0.7f,0.8f);
+ k = v8f_madd(i, j, k);
+ CHK(v4f_x(v8f_abcd(k)) == 0.1f);
+ CHK(v4f_y(v8f_abcd(k)) == 9.2f);
+ CHK(v4f_z(v8f_abcd(k)) == 20.3f);
+ CHK(v4f_w(v8f_abcd(k)) == 33.4f);
+ CHK(v4f_x(v8f_efgh(k)) == 48.5f);
+ CHK(v4f_y(v8f_efgh(k)) == 65.6f);
+ CHK(v4f_z(v8f_efgh(k)) == 84.7f);
+ CHK(v4f_w(v8f_efgh(k)) == 105.8f);
+
+ k = v8f_abs(v8f_minus(i));
+ CHK(v4f_x(v8f_abcd(k)) == 0.f);
+ CHK(v4f_y(v8f_abcd(k)) == 1.f);
+ CHK(v4f_z(v8f_abcd(k)) == 2.f);
+ CHK(v4f_w(v8f_abcd(k)) == 3.f);
+ CHK(v4f_x(v8f_efgh(k)) == 4.f);
+ CHK(v4f_y(v8f_efgh(k)) == 5.f);
+ CHK(v4f_z(v8f_efgh(k)) == 6.f);
+ CHK(v4f_w(v8f_efgh(k)) == 7.f);
+
+ i = v8f_set(1.f, 4.f, 9.f, 16.f, 25.f, 36.f, 49.f, 64.f);
+ k = v8f_sqrt(i);
+ CHK(v4f_x(v8f_abcd(k)) == 1.f);
+ CHK(v4f_y(v8f_abcd(k)) == 2.f);
+ CHK(v4f_z(v8f_abcd(k)) == 3.f);
+ CHK(v4f_w(v8f_abcd(k)) == 4.f);
+ CHK(v4f_x(v8f_efgh(k)) == 5.f);
+ CHK(v4f_y(v8f_efgh(k)) == 6.f);
+ CHK(v4f_z(v8f_efgh(k)) == 7.f);
+ CHK(v4f_w(v8f_efgh(k)) == 8.f);
+
+ k = v8f_rsqrte(i);
+ CHK(eq_epsf(v4f_x(v8f_abcd(k)), 1.f/1.f, 1.e-3f));
+ CHK(eq_epsf(v4f_y(v8f_abcd(k)), 1.f/2.f, 1.e-3f));
+ CHK(eq_epsf(v4f_z(v8f_abcd(k)), 1.f/3.f, 1.e-3f));
+ CHK(eq_epsf(v4f_w(v8f_abcd(k)), 1.f/4.f, 1.e-3f));
+ CHK(eq_epsf(v4f_x(v8f_efgh(k)), 1.f/5.f, 1.e-3f));
+ CHK(eq_epsf(v4f_y(v8f_efgh(k)), 1.f/6.f, 1.e-3f));
+ CHK(eq_epsf(v4f_z(v8f_efgh(k)), 1.f/7.f, 1.e-3f));
+ CHK(eq_epsf(v4f_w(v8f_efgh(k)), 1.f/8.f, 1.e-3f));
+
+ k = v8f_rsqrt(i);
+ CHK(eq_epsf(v4f_x(v8f_abcd(k)), 1.f/1.f, 1.e-6f));
+ CHK(eq_epsf(v4f_y(v8f_abcd(k)), 1.f/2.f, 1.e-6f));
+ CHK(eq_epsf(v4f_z(v8f_abcd(k)), 1.f/3.f, 1.e-6f));
+ CHK(eq_epsf(v4f_w(v8f_abcd(k)), 1.f/4.f, 1.e-6f));
+ CHK(eq_epsf(v4f_x(v8f_efgh(k)), 1.f/5.f, 1.e-6f));
+ CHK(eq_epsf(v4f_y(v8f_efgh(k)), 1.f/6.f, 1.e-6f));
+ CHK(eq_epsf(v4f_z(v8f_efgh(k)), 1.f/7.f, 1.e-6f));
+ CHK(eq_epsf(v4f_w(v8f_efgh(k)), 1.f/8.f, 1.e-6f));
+
+ i = v8f_set(1.f,2.f,3.f,4.f,5.f,6.f,7.f,8.f);
+ k = v8f_rcpe(i);
+ CHK(eq_epsf(v4f_x(v8f_abcd(k)), 1.f/1.f, 1.e-3f));
+ CHK(eq_epsf(v4f_y(v8f_abcd(k)), 1.f/2.f, 1.e-3f));
+ CHK(eq_epsf(v4f_z(v8f_abcd(k)), 1.f/3.f, 1.e-3f));
+ CHK(eq_epsf(v4f_w(v8f_abcd(k)), 1.f/4.f, 1.e-3f));
+ CHK(eq_epsf(v4f_x(v8f_efgh(k)), 1.f/5.f, 1.e-3f));
+ CHK(eq_epsf(v4f_y(v8f_efgh(k)), 1.f/6.f, 1.e-3f));
+ CHK(eq_epsf(v4f_z(v8f_efgh(k)), 1.f/7.f, 1.e-3f));
+ CHK(eq_epsf(v4f_w(v8f_efgh(k)), 1.f/8.f, 1.e-3f));
+
+ k = v8f_rcp(i);
+ CHK(eq_epsf(v4f_x(v8f_abcd(k)), 1.f/1.f, 1.e-6f));
+ CHK(eq_epsf(v4f_y(v8f_abcd(k)), 1.f/2.f, 1.e-6f));
+ CHK(eq_epsf(v4f_z(v8f_abcd(k)), 1.f/3.f, 1.e-6f));
+ CHK(eq_epsf(v4f_w(v8f_abcd(k)), 1.f/4.f, 1.e-6f));
+ CHK(eq_epsf(v4f_x(v8f_efgh(k)), 1.f/5.f, 1.e-6f));
+ CHK(eq_epsf(v4f_y(v8f_efgh(k)), 1.f/6.f, 1.e-6f));
+ CHK(eq_epsf(v4f_z(v8f_efgh(k)), 1.f/7.f, 1.e-6f));
+ CHK(eq_epsf(v4f_w(v8f_efgh(k)), 1.f/8.f, 1.e-6f));
+
+ j = v8f_set(2.f,3.f,4.f,5.f,6.f,7.f,8.f,9.f);
+ k = v8f_lerp(i, j, v8f_set1(0.5f));
+ CHK(v4f_x(v8f_abcd(k)) == 1.5f);
+ CHK(v4f_y(v8f_abcd(k)) == 2.5f);
+ CHK(v4f_z(v8f_abcd(k)) == 3.5f);
+ CHK(v4f_w(v8f_abcd(k)) == 4.5f);
+ CHK(v4f_x(v8f_efgh(k)) == 5.5f);
+ CHK(v4f_y(v8f_efgh(k)) == 6.5f);
+ CHK(v4f_z(v8f_efgh(k)) == 7.5f);
+ CHK(v4f_w(v8f_efgh(k)) == 8.5f);
+
+ i = v8f_set(0.f, 1.f,2.f,3.f, 4.f,5.f,6.f,7.f);
+ j = v8f_set(0.f,-1.f,4.f,4.f,-2.f,6.f,6.f,8.f);
+
+ k = v8f_eq(i, j);
+ cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] ==~0);
+ cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] == 0);
+ cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] == 0);
+ cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == 0);
+ cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == 0);
+ cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == 0);
+ cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] ==~0);
+ cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] == 0);
+
+ k = v8f_neq(i, j);
+ cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] == 0);
+ cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] ==~0);
+ cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] ==~0);
+ cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] ==~0);
+ cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] ==~0);
+ cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] ==~0);
+ cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] == 0);
+ cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] ==~0);
+
+ k = v8f_ge(i, j);
+ cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] ==~0);
+ cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] ==~0);
+ cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] == 0);
+ cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == 0);
+ cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] ==~0);
+ cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == 0);
+ cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] ==~0);
+ cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] == 0);
+
+ k = v8f_le(i, j);
+ cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] ==~0);
+ cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] == 0);
+ cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] ==~0);
+ cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] ==~0);
+ cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == 0);
+ cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] ==~0);
+ cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] ==~0);
+ cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] ==~0);
+
+ k = v8f_gt(i, j);
+ cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] == 0);
+ cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] ==~0);
+ cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] == 0);
+ cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == 0);
+ cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] ==~0);
+ cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == 0);
+ cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] == 0);
+ cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] == 0);
+
+ k = v8f_lt(i, j);
+ cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] == 0);
+ cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] == 0);
+ cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] ==~0);
+ cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] ==~0);
+ cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == 0);
+ cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] ==~0);
+ cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] == 0);
+ cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] ==~0);
+
+ j = v8f_set(0.0001f, 0.99999f, 2.f, 3.1f, 4.001f, 5.0002f, 6.f, 6.999999f);
+ k = v8f_eq_eps(i, j, v8f_set1(1.e-4f));
+ cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] ==~0);
+ cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] ==~0);
+ cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] ==~0);
+ cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] == 0);
+ cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] == 0);
+ cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] == 0);
+ cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] ==~0);
+ cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] ==~0);
+
+ k = v8f_eq_eps(i, j, v8f_set(1.e-4f, 1.e-4f, 0.f, 0.1f, 1.e-3f, 2.e-4f, 0.f, 1.e-5f));
+ cast.f[0] = v4f_x(v8f_abcd(k)); CHK(cast.i[0] ==~0);
+ cast.f[1] = v4f_y(v8f_abcd(k)); CHK(cast.i[1] ==~0);
+ cast.f[2] = v4f_z(v8f_abcd(k)); CHK(cast.i[2] ==~0);
+ cast.f[3] = v4f_w(v8f_abcd(k)); CHK(cast.i[3] ==~0);
+ cast.f[4] = v4f_x(v8f_efgh(k)); CHK(cast.i[4] ==~0);
+ cast.f[5] = v4f_y(v8f_efgh(k)); CHK(cast.i[5] ==~0);
+ cast.f[6] = v4f_z(v8f_efgh(k)); CHK(cast.i[6] ==~0);
+ cast.f[7] = v4f_w(v8f_efgh(k)); CHK(cast.i[7] ==~0);
+
+ i = v8f_set(0.f, 1.f,2.f,3.f, 4.f,5.f,6.f,7.f);
+ j = v8f_set(0.f,-1.f,4.f,4.f,-2.f,6.f,6.f,8.f);
+
+ k = v8f_min(i, j);
+ CHK(v4f_x(v8f_abcd(k)) == 0.f);
+ CHK(v4f_y(v8f_abcd(k)) ==-1.f);
+ CHK(v4f_z(v8f_abcd(k)) == 2.f);
+ CHK(v4f_w(v8f_abcd(k)) == 3.f);
+ CHK(v4f_x(v8f_efgh(k)) ==-2.f);
+ CHK(v4f_y(v8f_efgh(k)) == 5.f);
+ CHK(v4f_z(v8f_efgh(k)) == 6.f);
+ CHK(v4f_w(v8f_efgh(k)) == 7.f);
+
+ k = v8f_max(i, j);
+ CHK(v4f_x(v8f_abcd(k)) == 0.f);
+ CHK(v4f_y(v8f_abcd(k)) == 1.f);
+ CHK(v4f_z(v8f_abcd(k)) == 4.f);
+ CHK(v4f_w(v8f_abcd(k)) == 4.f);
+ CHK(v4f_x(v8f_efgh(k)) == 4.f);
+ CHK(v4f_y(v8f_efgh(k)) == 6.f);
+ CHK(v4f_z(v8f_efgh(k)) == 6.f);
+ CHK(v4f_w(v8f_efgh(k)) == 8.f);
+
+ CHK(v8f_reduce_min(i) == 0.f);
+ CHK(v8f_reduce_min(j) ==-2.f);
+ CHK(v8f_reduce_max(i) == 7.f);
+ CHK(v8f_reduce_max(j) == 8.f);
+
+ k = v8f_clamp(i,
+ v8f_set(1.f, 1.f, 3.1f, 5.f, 4.f, 0.f, 0.f, -1.f),
+ v8f_set(1.f, 1.f, 4.f, 6.f, 4.f, 1.f, 6.f, 5.f));
+
+ CHK(v4f_x(v8f_abcd(k)) == 1.f);
+ CHK(v4f_y(v8f_abcd(k)) == 1.f);
+ CHK(v4f_z(v8f_abcd(k)) == 3.1f);
+ CHK(v4f_w(v8f_abcd(k)) == 5.f);
+ CHK(v4f_x(v8f_efgh(k)) == 4.f);
+ CHK(v4f_y(v8f_efgh(k)) == 1.f);
+ CHK(v4f_z(v8f_efgh(k)) == 6.f);
+ CHK(v4f_w(v8f_efgh(k)) == 5.f);
+
+ return 0;
+}
+
diff --git a/src/test_v8i.c b/src/test_v8i.c
@@ -0,0 +1,192 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "rsimd.h"
+
+int
+main(int argc, char** argv)
+{
+ v8i_T i, j, k;
+ ALIGN(32) int32_t tmp[8] = {0,1,2,3,4,5,6,7};
+ (void)argc, (void)argv;
+
+ i = v8i_load(tmp);
+ CHK(v4i_x(v8i_abcd(i)) == 0);
+ CHK(v4i_y(v8i_abcd(i)) == 1);
+ CHK(v4i_z(v8i_abcd(i)) == 2);
+ CHK(v4i_w(v8i_abcd(i)) == 3);
+ CHK(v4i_x(v8i_efgh(i)) == 4);
+ CHK(v4i_y(v8i_efgh(i)) == 5);
+ CHK(v4i_z(v8i_efgh(i)) == 6);
+ CHK(v4i_w(v8i_efgh(i)) == 7);
+
+ tmp[0]= tmp[1] = tmp[2] = tmp[3] = 0;
+ tmp[4]= tmp[5] = tmp[6] = tmp[7] = 0;
+ CHK(v8i_store(tmp, i) == tmp);
+ CHK(tmp[0] == 0);
+ CHK(tmp[1] == 1);
+ CHK(tmp[2] == 2);
+ CHK(tmp[3] == 3);
+ CHK(tmp[4] == 4);
+ CHK(tmp[5] == 5);
+ CHK(tmp[6] == 6);
+ CHK(tmp[7] == 7);
+
+ i = v8i_set(1, 2, 3, 4, 5, 6, 7, 8);
+ CHK(v4i_x(v8i_abcd(i)) == 1);
+ CHK(v4i_y(v8i_abcd(i)) == 2);
+ CHK(v4i_z(v8i_abcd(i)) == 3);
+ CHK(v4i_w(v8i_abcd(i)) == 4);
+ CHK(v4i_x(v8i_efgh(i)) == 5);
+ CHK(v4i_y(v8i_efgh(i)) == 6);
+ CHK(v4i_z(v8i_efgh(i)) == 7);
+ CHK(v4i_w(v8i_efgh(i)) == 8);
+
+ i = v8i_set1(-1);
+ CHK(v4i_x(v8i_abcd(i)) == -1);
+ CHK(v4i_y(v8i_abcd(i)) == -1);
+ CHK(v4i_z(v8i_abcd(i)) == -1);
+ CHK(v4i_w(v8i_abcd(i)) == -1);
+ CHK(v4i_x(v8i_efgh(i)) == -1);
+ CHK(v4i_y(v8i_efgh(i)) == -1);
+ CHK(v4i_z(v8i_efgh(i)) == -1);
+ CHK(v4i_w(v8i_efgh(i)) == -1);
+
+ i = v8i_zero();
+ CHK(v4i_x(v8i_abcd(i)) == 0);
+ CHK(v4i_y(v8i_abcd(i)) == 0);
+ CHK(v4i_z(v8i_abcd(i)) == 0);
+ CHK(v4i_w(v8i_abcd(i)) == 0);
+ CHK(v4i_x(v8i_efgh(i)) == 0);
+ CHK(v4i_y(v8i_efgh(i)) == 0);
+ CHK(v4i_z(v8i_efgh(i)) == 0);
+ CHK(v4i_w(v8i_efgh(i)) == 0);
+
+ i = v8i_set_v4i(v4i_set(-1,-2,3,4), v4i_set(5,6,-7,-8));
+ CHK(v4i_x(v8i_abcd(i)) ==-1);
+ CHK(v4i_y(v8i_abcd(i)) ==-2);
+ CHK(v4i_z(v8i_abcd(i)) == 3);
+ CHK(v4i_w(v8i_abcd(i)) == 4);
+ CHK(v4i_x(v8i_efgh(i)) == 5);
+ CHK(v4i_y(v8i_efgh(i)) == 6);
+ CHK(v4i_z(v8i_efgh(i)) ==-7);
+ CHK(v4i_w(v8i_efgh(i)) ==-8);
+
+ i = v8i_set
+ (0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F,
+ 0x00102030, 0x40506070, (int32_t)0x8090A0B0, (int32_t)0xC0D0E0F0);
+ j = v8i_set
+ (0x01020401, 0x70605040, 0x0F1F2F3F, 0x00000000,
+ 0x10204010, 0x06050400, (int32_t)0xF1F2F3F0, 0x10000000);
+ k = v8i_or(i, j);
+ CHK(v4i_x(v8i_abcd(k)) == (int32_t)0x01030603);
+ CHK(v4i_y(v8i_abcd(k)) == (int32_t)0x74655647);
+ CHK(v4i_z(v8i_abcd(k)) == (int32_t)0x0F1F2F3F);
+ CHK(v4i_w(v8i_abcd(k)) == (int32_t)0x0C0D0E0F);
+ CHK(v4i_x(v8i_efgh(k)) == (int32_t)0x10306030);
+ CHK(v4i_y(v8i_efgh(k)) == (int32_t)0x46556470);
+ CHK(v4i_z(v8i_efgh(k)) == (int32_t)0xF1F2F3F0);
+ CHK(v4i_w(v8i_efgh(k)) == (int32_t)0xD0D0E0F0);
+
+ k = v8i_and(i, j);
+ CHK(v4i_x(v8i_abcd(k)) == (int32_t)0x00000001);
+ CHK(v4i_y(v8i_abcd(k)) == (int32_t)0x00000000);
+ CHK(v4i_z(v8i_abcd(k)) == (int32_t)0x08090A0B);
+ CHK(v4i_w(v8i_abcd(k)) == (int32_t)0x00000000);
+ CHK(v4i_x(v8i_efgh(k)) == (int32_t)0x00000010);
+ CHK(v4i_y(v8i_efgh(k)) == (int32_t)0x00000000);
+ CHK(v4i_z(v8i_efgh(k)) == (int32_t)0x8090A0B0);
+ CHK(v4i_w(v8i_efgh(k)) == (int32_t)0x00000000);
+
+ k = v8i_andnot(i, j);
+ CHK(v4i_x(v8i_abcd(k)) == (int32_t)0x01020400);
+ CHK(v4i_y(v8i_abcd(k)) == (int32_t)0x70605040);
+ CHK(v4i_z(v8i_abcd(k)) == (int32_t)0x07162534);
+ CHK(v4i_w(v8i_abcd(k)) == (int32_t)0x00000000);
+ CHK(v4i_x(v8i_efgh(k)) == (int32_t)0x10204000);
+ CHK(v4i_y(v8i_efgh(k)) == (int32_t)0x06050400);
+ CHK(v4i_z(v8i_efgh(k)) == (int32_t)0x71625340);
+ CHK(v4i_w(v8i_efgh(k)) == (int32_t)0x10000000);
+
+ k = v8i_xor(i, j);
+ CHK(v4i_x(v8i_abcd(k)) == (int32_t)0x01030602);
+ CHK(v4i_y(v8i_abcd(k)) == (int32_t)0x74655647);
+ CHK(v4i_z(v8i_abcd(k)) == (int32_t)0x07162534);
+ CHK(v4i_w(v8i_abcd(k)) == (int32_t)0x0C0D0E0F);
+ CHK(v4i_x(v8i_efgh(k)) == (int32_t)0x10306020);
+ CHK(v4i_y(v8i_efgh(k)) == (int32_t)0x46556470);
+ CHK(v4i_z(v8i_efgh(k)) == (int32_t)0x71625340);
+ CHK(v4i_w(v8i_efgh(k)) == (int32_t)0XD0D0E0F0);
+
+ i = v8i_set( 1, 2,3,4,5, 6,7,8);
+ j = v8i_set(-2,-4,3,6,5,-1,8,8);
+
+ k = v8i_eq(i, j);
+ CHK(v4i_x(v8i_abcd(k)) == 0);
+ CHK(v4i_y(v8i_abcd(k)) == 0);
+ CHK(v4i_z(v8i_abcd(k)) ==~0);
+ CHK(v4i_w(v8i_abcd(k)) == 0);
+ CHK(v4i_x(v8i_efgh(k)) ==~0);
+ CHK(v4i_y(v8i_efgh(k)) == 0);
+ CHK(v4i_z(v8i_efgh(k)) == 0);
+ CHK(v4i_w(v8i_efgh(k)) ==~0);
+
+ k = v8i_neq(i, j);
+ CHK(v4i_x(v8i_abcd(k)) ==~0);
+ CHK(v4i_y(v8i_abcd(k)) ==~0);
+ CHK(v4i_z(v8i_abcd(k)) == 0);
+ CHK(v4i_w(v8i_abcd(k)) ==~0);
+ CHK(v4i_x(v8i_efgh(k)) == 0);
+ CHK(v4i_y(v8i_efgh(k)) ==~0);
+ CHK(v4i_z(v8i_efgh(k)) ==~0);
+ CHK(v4i_w(v8i_efgh(k)) == 0);
+
+ k = v8i_sel(i, j, v8i_set(~0,~0,0,~0,0,0,~0,0));
+ CHK(v4i_x(v8i_abcd(k)) ==-2);
+ CHK(v4i_y(v8i_abcd(k)) ==-4);
+ CHK(v4i_z(v8i_abcd(k)) == 3);
+ CHK(v4i_w(v8i_abcd(k)) == 6);
+ CHK(v4i_x(v8i_efgh(k)) == 5);
+ CHK(v4i_y(v8i_efgh(k)) == 6);
+ CHK(v4i_z(v8i_efgh(k)) == 8);
+ CHK(v4i_w(v8i_efgh(k)) == 8);
+
+ k = v8i_min(i, j);
+ CHK(v4i_x(v8i_abcd(k)) ==-2);
+ CHK(v4i_y(v8i_abcd(k)) ==-4);
+ CHK(v4i_z(v8i_abcd(k)) == 3);
+ CHK(v4i_w(v8i_abcd(k)) == 4);
+ CHK(v4i_x(v8i_efgh(k)) == 5);
+ CHK(v4i_y(v8i_efgh(k)) ==-1);
+ CHK(v4i_z(v8i_efgh(k)) == 7);
+ CHK(v4i_w(v8i_efgh(k)) == 8);
+
+ k = v8i_max(i, j);
+ CHK(v4i_x(v8i_abcd(k)) == 1);
+ CHK(v4i_y(v8i_abcd(k)) == 2);
+ CHK(v4i_z(v8i_abcd(k)) == 3);
+ CHK(v4i_w(v8i_abcd(k)) == 6);
+ CHK(v4i_x(v8i_efgh(k)) == 5);
+ CHK(v4i_y(v8i_efgh(k)) == 6);
+ CHK(v4i_z(v8i_efgh(k)) == 8);
+ CHK(v4i_w(v8i_efgh(k)) == 8);
+
+ CHK(v8i_reduce_min_i32(i) == 1);
+ CHK(v8i_reduce_min_i32(j) ==-4);
+ CHK(v8i_reduce_max_i32(i) == 8);
+ CHK(v8i_reduce_max_i32(j) == 8);
+
+ return 0;
+}
diff --git a/src/vXf_begin.h b/src/vXf_begin.h
@@ -0,0 +1,57 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "rsimd.h"
+
+/* This file can be included once */
+#ifdef VXF_BEGIN_H
+ #error "The vXf_begin.h header is already included"
+#endif
+#define VXF_BEGIN_H
+
+/* Check parameter */
+#if !defined(RSIMD_WIDTH__)
+ #error "Undefined RSIMD_WIDTH__ macro"
+#endif
+#if RSIMD_WIDTH__ != 4 && RSIMD_WIDTH__ != 8
+ #error "Unexpected RSIMD_WIDTH__ value of "STR(RSIMD_WIDTH__)
+#endif
+
+/* Check that internal macros are not already defined */
+#if defined(RSIMD_vXf__) \
+ || defined(RSIMD_vXf_T__) \
+ || defined(RSIMD_Sleef__) \
+ || defined(RSIMD_Sleef_ULP__) \
+ || defined(RSIMD_Sleef_vecf__)
+ #error "Unexpected macro definition"
+#endif
+
+/* Macros generic to RSIMD_WIDTH__ */
+#define RSIMD_vXf__(Func) \
+ CONCAT(CONCAT(CONCAT(CONCAT(v, RSIMD_WIDTH__), f), _), Func)
+#define RSIMD_vXf_T__ CONCAT(CONCAT(v, RSIMD_WIDTH__), f_T)
+
+/* Sleef macros */
+#define RSIMD_Sleef__(Func) CONCAT(CONCAT(Sleef_, Func), RSIMD_WIDTH__)
+#define RSIMD_Sleef_ULP__(Func, Suffix) \
+ CONCAT(CONCAT(CONCAT(CONCAT(Sleef_, Func), RSIMD_WIDTH__), _), Suffix)
+
+/* Vector types of the Sleef library */
+#if RSIMD_WIDTH__ == 4
+ #define RSIMD_Sleef_vecf__(Dim) CONCAT(Sleef___m128_, Dim)
+#elif RSIMD_WIDTH__ == 8
+ #define RSIMD_Sleef_vecf__(Dim) CONCAT(Sleef___m256_, Dim)
+#endif
+
diff --git a/src/vXf_end.h b/src/vXf_end.h
@@ -0,0 +1,31 @@
+/* Copyright (C) 2014-2021 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef VXF_BEGIN_H
+ #error "The vXf_begin.h file must be included"
+#endif
+
+/* Undef helper macros */
+#undef RSIMD_vXf__
+#undef RSIMD_vXf_T__
+#undef RSIMD_Sleef__
+#undef RSIMD_Sleef_ULP__
+#undef RSIMD_Sleef_vecf__
+
+/* Undef parameters */
+#undef RSIMD_WIDTH__
+
+#undef VXF_BEGIN_H
+