rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

commit 2757db8fe466d8de414526d46317b397356f4ba8
parent f816406cd607ff7a3b4e7c84d45009e955eae7c9
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Wed, 18 Oct 2023 15:39:04 +0200

Merge branch 'release_0.5'

Diffstat:
M.gitignore | 13++++++++-----
AMakefile | 231+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
MREADME.md | 78+++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Dcmake/CMakeLists.txt | 239-------------------------------------------------------------------------------
Dcmake/RSIMDConfig.cmake.in | 133-------------------------------------------------------------------------------
Dcmake/RSIMDConfigVersion.cmake.in | 54------------------------------------------------------
Dcmake/SleefConfig.cmake | 35-----------------------------------
Aconfig.mk | 91+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amake.sh | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Arsimd.pc.in | 12++++++++++++
Msrc/rsimd.h | 1-
Msrc/soaXfY_begin.h | 2+-
Msrc/sse/ssei.h | 4++--
13 files changed, 505 insertions(+), 505 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1,10 +1,13 @@ .gitignore -CMakeCache.txt -CMakeFiles -Makefile -tmp [Bb]uild* *.sw[po] -*.[ao] +*.[aod] +*.so *~ tags +test* +!test*.[ch] +.test +.simd +.config +rsimd*.pc diff --git a/Makefile b/Makefile @@ -0,0 +1,231 @@ +# Copyright (C) 2014-2019, 2021, 2023 Vincent Forest (vaplv@free.fr) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +.POSIX: +.SUFFIXES: # Clean up default inference rules + +include config.mk + +LIBNAME_STATIC = librsimd.a +LIBNAME_SHARED = librsimd.so +LIBNAME = $(LIBNAME_$(LIB_TYPE)) + +################################################################################ +# Library building +################################################################################ +SRC_SIMD128 = src/aosf44.c src/aosq.c src/math4.c +SRC_SIMD256 = src/math8.c $(SRC_SIMD128) +SRC = $(SRC_SIMD$(SIMD_WIDTH)) +OBJ = $(SRC:.c=.o) +DEP = $(SRC:.c=.d) + +build_library: + +build_library__: .config $(DEP) + @$(MAKE) -f.simd -fMakefile $$(for i in $(DEP); do echo -f $${i}; done) \ + $$(if [ -n "$(LIBNAME)" ]; then\ + echo "$(LIBNAME)";\ + else\ + echo "$(LIBNAME_SHARED)";\ + fi) + +$(DEP) $(OBJ): config.mk + +$(LIBNAME_SHARED): $(OBJ) + $(CC) -std=c99 $(CFLAGS_SO) $(DPDC_CFLAGS) -o $@ $(OBJ) $(LDFLAGS_SO) $(DPDC_LIBS) + +$(LIBNAME_STATIC): librsimd.o + $(AR) -rc $@ $? + $(RANLIB) $@ + +librsimd.o: $(OBJ) + $(LD) -r $(OBJ) -o $@ + $(OBJCOPY) $(OCPFLAGS) $@ + +.config: make.sh config.mk + @if ! $(PKG_CONFIG) --atleast-version $(RSYS_VERSION) rsys; then \ + echo "rsys $(RSYS_VERSION) not found"; exit 1; fi + @if ! $(PKG_CONFIG) --atleast-version $(SLEEF_VERSION) sleef; then \ + echo "sleef $(SLEEF_VERSION) not found"; exit 1; fi + @echo "config done" > $@ + +.simd: make.sh config.mk + @$(SHELL) make.sh config_simd $(MAKE) > $@ + +.SUFFIXES: .c .d .o +.c.d: + @$(CC) -std=c99 $(CFLAGS_SO) $(DPDC_CFLAGS) -MM -MT "$(@:.d=.o) $@" $< -MF $@ + +.c.o: + $(CC) -std=c99 $(CFLAGS_SO) $(DPDC_CFLAGS) -DRSIMD_SHARED_BUILD -c $< -o $@ + +################################################################################ +# Miscellaneous targets +################################################################################ +all: build_library build_tests + +clean__: clean_test + rm -f $(OBJ) $(TEST_OBJ) $(LIBNAME) + rm -f .config .simd .test librsimd.o rsimd.pc rsimd-local.pc + +distclean__: clean__ + rm -f $(DEP) $(TEST_DEP) .test + +lint: + shellcheck -o all make.sh + +build_library build_tests clean distclean install .test test uninstall: .simd + @$(MAKE) -f.simd -fMakefile $@__ + +################################################################################ +# Installation +################################################################################ +API_SIMD128=\ + src/aosf33.h\ + src/aosf44.h\ + src/aosq.h\ + src/math.h\ + src/mathX.h\ + src/math4.h\ + src/rsimd.h\ + src/soaXfY.h\ + src/soaXfY_begin.h\ + src/soaXfY_end.h\ + src/soaXf2.h\ + src/soaXf3.h\ + src/soa4f2.h\ + src/soa4f3.h\ + src/soa4f4.h\ + src/vXf_begin.h\ + src/vXf_end.h\ + src/sse/sse.h\ + src/sse/ssef.h\ + src/sse/ssei.h\ + src/sse/sse_swz.h +API_SIMD256=\ + src/math8.h\ + src/soa8f2.h\ + src/soa8f3.h\ + src/soa8f4.h\ + src/avx/avx.h\ + src/avx/avxf.h\ + src/avx/avxi.h\ + $(API_SIMD128) +API = $(API_SIMD$(SIMD_WIDTH)) + +pkg: + sed -e 's#@PREFIX@#$(PREFIX)#g' \ + -e 's#@VERSION@#$(VERSION)#g' \ + -e 's#@SLEEF_VERSION@#$(SLEEF_VERSION)#g' \ + -e 's#@RSYS_VERSION@#$(RSYS_VERSION)#g' \ + -e 's#@CFLAGS_SIMD@#$(CFLAGS_SIMD)#g' \ + rsimd.pc.in > rsimd.pc + +# Remove the include directive rather than setting it to "./src". to prevent +# the source directory from having a higher priority than the system include +# directories. In such a situation, the local "math.h" file could be included +# instead of the "math.h" header provided by the C standard library. Note that +# this is no longer a problem with the common pc file: the "math.h" file is +# installed in the "rsimd" subdirectory, which is therefore a prefix of the +# header file allowing it to be distinguished from the header of the standard +# library +rsimd-local.pc: rsimd.pc.in + sed -e '1,2d'\ + -e 's#^libdir=.*#libdir=./#'\ + -e 's#@VERSION@#$(VERSION)#g' \ + -e 's#@SLEEF_VERSION@#$(SLEEF_VERSION)#g' \ + -e 's#@RSYS_VERSION@#$(RSYS_VERSION)#g' \ + -e 's#@CFLAGS_SIMD@#$(CFLAGS_SIMD)#g' \ + -e 's#-I$${includedir}##g'\ + rsimd.pc.in > $@ + +install__: build_library pkg + @$(SHELL) make.sh install "$(DESTDIR)$(PREFIX)/lib" $(LIBNAME) + @$(SHELL) make.sh install "$(DESTDIR)$(PREFIX)/lib/pkgconfig" rsimd.pc + @$(SHELL) make.sh install "$(DESTDIR)$(PREFIX)/include/rsimd" $(API) + @$(SHELL) make.sh install "$(DESTDIR)$(PREFIX)/share/doc/rsimd" COPYING README.md + +uninstall__: + rm -f $(DESTDIR)$(PREFIX)/lib/$(LIBNAME) + rm -f $(DESTDIR)$(PREFIX)/lib/pkgconfig/rsimd.pc + rm -f $(DESTDIR)$(PREFIX)/share/doc/rsimd/COPYING + rm -f $(DESTDIR)$(PREFIX)/share/doc/rsimd/README.md + rm -f $$(echo $(API) | sed 's,src\/,$(DESTDIR)$(PREFIX)\/include\/rsimd\/,g') + +################################################################################ +# Tests +################################################################################ +TEST_SIMD128=\ + src/test_aosf33.c\ + src/test_aosf44.c\ + src/test_aosq.c\ + src/test_math4.c\ + src/test_soa4f2.c\ + src/test_soa4f3.c\ + src/test_soa4f4.c\ + src/test_v4f.c\ + src/test_v4i.c +TEST_SIMD256=\ + src/test_math8.c\ + src/test_soa8f2.c\ + src/test_soa8f3.c\ + src/test_soa8f4.c\ + $(TEST_SIMD128) +TEST_SRC = $(TEST_SIMD$(SIMD_WIDTH)) +TEST_OBJ = $(TEST_SRC:.c=.o) +TEST_DEP = $(TEST_SRC:.c=.d) + +PKG_CONFIG_LOCAL = PKG_CONFIG_PATH="./:$${PKG_CONFIG_PATH}" $(PKG_CONFIG) +RSIMD_CFLAGS = $$($(PKG_CONFIG_LOCAL) $(PCFLAGS) --cflags rsimd-local.pc) +RSIMD_LIBS = $$($(PKG_CONFIG_LOCAL) $(PCFLAGS) --libs rsimd-local.pc) + +build_tests__: build_library $(TEST_DEP) .test .simd + @$(MAKE) -f.simd -fMakefile -f.test \ + $$(for i in $(TEST_DEP); do echo -f"$${i}"; done) test_bin + +test__: build_tests__ + @$(SHELL) make.sh run_test $(TEST_SRC) + +.test__: Makefile make.sh + @$(SHELL) make.sh config_test $(TEST_SRC) > .test + +clean_test: + @$(SHELL) make.sh clean_test $(TEST_SRC) + +$(TEST_DEP): config.mk rsimd-local.pc + @$(CC) -std=c89 $(CFLAGS_EXE) $(RSIMD_CFLAGS) $(RSYS_CFLAGS) \ + -MM -MT "$(@:.d=.o) $@" $(@:.d=.c) -MF $@ + +$(TEST_OBJ): config.mk rsimd-local.pc + $(CC) -std=c89 $(CFLAGS_EXE) $(RSIMD_CFLAGS) $(RSYS_CFLAGS) -c $(@:.o=.c) -o $@ + +test_aosf33 \ +test_aosf44 \ +test_aosq \ +test_soa4f2 \ +test_soa4f3 \ +test_soa4f4 \ +test_soa8f2 \ +test_soa8f3 \ +test_soa8f4 \ +test_v4f \ +test_v4i \ +: config.mk rsimd-local.pc $(LIBNAME) + $(CC) $(CFLAGS_EXE) -o $@ src/$@.o $(LDFLAGS_EXE) $(RSIMD_LIBS) $(RSYS_LIBS) + +test_math4 \ +test_math8 \ +: config.mk rsimd-local.pc $(LIBNAME) + $(CC) $(CFLAGS_EXE) -o $@ src/$@.o $(LDFLAGS_EXE) $(RSIMD_LIBS) $(RSYS_LIBS) -lm diff --git a/README.md b/README.md @@ -1,47 +1,54 @@ # RSIMD -This C89 library defines an interface that encapsulates and make easier the -manipulation of SIMD instruction sets. It also provides a SIMD implementation -of linear algebra operations for 3x3 and 4x4 matrices as well as quaternions -arranged in an `Array of Structures` SIMD layout. Linear algebra functions on -`Structure of Arrays` 2/3/4 dimensions vectors are also implemented. Finally it -exposes a vectorized version of some math functions by relying on the -[Sleef](https://sleef.org/) library. - -## How to build - -The library uses [CMake](http://www.cmake.org) and the -[RCMake](https://gitlab.com/vaplv/rcmake/) package to build. It also depends on -the [RSys](https://gitlab.com/vaplv/rsys/) and the [Sleef](https://sleef.org) -library. First, install the RCMake package, the RSys and the Sleef libraries. -Then, generate the project from the cmake/CMakeLists.txt file by appending the -RCMake, RSys and Sleef install directories to the `CMAKE_PREFIX_PATH` variable. -The resulting project can be edited, built, tested and installed as any CMake -project. +RSIMD is a C library whose purpose is to facilitate the use of SIMD +instruction sets. It implements linear algebra operations for 3x3 and +4x4 matrices stored in SIMD vectors as arrays of structures. Vectorized +linear algebra operations on structures of arrays, for 2, 3 and 4 +dimensional vectors are also implemented. Finally, it provides a +vectorized version of the standard mathematical functions. + +## Requirements + +- C compiler (C99) +- POSIX make +- [RSys](https://gitlab.com/vaplv/rsys/) +- [Sleef](https://sleef.org/) + +## Installation + +Edit config.mk as needed, then run: + + make clean install ## Release notes +### Version 0.5 + +- Replace CMake by Makefile as build system. +- Provide a pkg-config file to link the library as an external + dependency. + ### Version 0.4 -- Make the sleef library an internal dependency that library users don't have - to worry about: calls to the sleef library are moved from API headers to C - files. -- Set the C standard to C99 instead of C89: this allows the use of official - (i.e. unpatched) sleef headers whose comments are written using C99-style - comments. Note that since sleef is now an internal dependency, the rsimd API - remains C89 compliant. -- Remove the file COPYING.LESSER from the installation target: since version - 0.3, the library is distributed under GPLv3+. +- Make the sleef library an internal dependency that library users don't + have to worry about: calls to the sleef library are moved from API + headers to C files. +- Set the C standard to C99 instead of C89: this allows the use of + official (i.e. unpatched) sleef headers whose comments are written + using C99-style comments. Note that since sleef is now an internal + dependency, the rsimd API remains C89 compliant. +- Remove the file COPYING.LESSER from the installation target: since + version 0.3, the library is distributed under GPLv3+. ### Version 0.3 - Add 8-way vector API for the float and int32 types. - Add the `v<4|8>i_[reduce_]<min|max>` functions. - Add the `v4i_minus` function. -- Rely on the [Sleef](https://sleef.org) library to replace the hand-crafted - implementation of the trigonometric functions. -- Add math functions for both 4-way and 8-way vectors. Provided math functions are: - copysign, floor, pow, exp[<2|10>] and log[<2|10>]. +- Rely on the [Sleef](https://sleef.org) library to replace the + hand-crafted implementation of the trigonometric functions. +- Add math functions for both 4-way and 8-way vectors. Provided math + functions are: copysign, floor, pow, exp[<2|10>] and log[<2|10>]. ### Version 0.2.1 @@ -51,7 +58,8 @@ project. ## License -Copyright (C) 2014-2019, 2021, 2023 Vincent Forest (vaplv@free.fr). RSIMD is -free software released under the GPL v3+ license: GNU GPL version 3 or later. -You are welcome to redistribute it under certain conditions; refer to the -COPYING file for details. +Copyright (C) 2014-2019, 2021, 2023 Vincent Forest (vaplv@free.fr) + +RSIMD is free software released under the GPL v3+ license: GNU GPL +version 3 or later. You are welcome to redistribute it under certain +conditions; refer to the COPYING file for details. diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -1,239 +0,0 @@ -# Copyright (C) 2014-2019, 2021, 2023 Vincent Forest (vaplv@free.fr) -# -# The RSIMD CMake is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# The RSIMD CMake is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with the RSIMD CMake. If not, see <http://www.gnu.org/licenses/>. - -cmake_minimum_required(VERSION 3.1) -project(rsimd C) -enable_testing() - -option(NO_TEST "Disable the tests" OFF) -set(RSIMD_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../src) - -################################################################################ -# Check dependencies -################################################################################ -set(Sleef_DIR ${PROJECT_SOURCE_DIR}) - -find_package(RCMake REQUIRED) -find_package(RSys 0.12 REQUIRED) -find_package(PkgConfig REQUIRED) -find_package(Sleef REQUIRED) - -include_directories(${RSys_INCLUDE_DIR} ${Sleef_INCLUDE_DIR}) -include(CheckCCompilerFlag) -set(CMAKE_MODULE_PATH ${RCMAKE_SOURCE_DIR}) -include(rcmake) - -################################################################################ -# Check compiler features -################################################################################ -if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux" -OR NOT CMAKE_COMPILER_IS_GNUCC) - message(STATUS ${CMAKE_SYSTEM_NAME}) - message(FATAL_ERROR "Unsupported platform") -endif() - -execute_process(COMMAND cat "/proc/cpuinfo" - OUTPUT_VARIABLE CPUINFO_OUT - ERROR_VARIABLE CPUINFO_ERR - RESULT_VARIABLE CPUINFO_RES) -if(NOT CPUINFO_RES EQUAL 0) - message(FATAL_ERROR "${CPUINFO_ERR}") -endif() - -string(REGEX MATCH "[ \t\r\n]+sse2[ \t\r\n]+" SSE2 ${CPUINFO_OUT}) -string(REGEX MATCH "[ \t\r\n]+sse4_1[ \t\r\n]+" SSE4_1 ${CPUINFO_OUT}) -string(REGEX MATCH "[ \t\r\n]+avx[ \t\r\n]+" AVX ${CPUINFO_OUT}) -string(REGEX MATCH "[ \t\r\n]+fma[ \t\r\n]+" FMA ${CPUINFO_OUT}) - -set(CFLAGS "-std=c99") # Required by the sleef library's c99 style comments - -if(SSE2) - unset(SSE2) - CHECK_C_COMPILER_FLAG("-msse2" SSE2) - message(STATUS "Use the SSE2 instruction set ") - set(CFLAGS "${CFLAGS} -msse2") -else() - message(FATAL_ERROR "The SSE2 instruction set must be supported.") -endif() -if(SSE4_1) - unset(SSE4_1) - CHECK_C_COMPILER_FLAG("-msse4.1" SSE4_1) - message(STATUS "Use the SSE4.1 instruction set") - set(CFLAGS "${CFLAGS} -msse4.1") -endif() -if(AVX) - unset(AVX) - CHECK_C_COMPILER_FLAG("-mavx" AVX) - message(STATUS "Use the AVX instruction set") - set(CFLAGS "${CFLAGS} -mavx") -endif() -if(FMA) - unset(FMA) - CHECK_C_COMPILER_FLAG("-mfma" FMA) - message(STATUS "Use the FMA instruction set") - set(CFLAGS "${CFLAGS} -mfma") -endif() - -################################################################################ -# Configure and define targets -################################################################################ -set(VERSION_MAJOR 0) -set(VERSION_MINOR 4) -set(VERSION_PATCH 0) -set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}) - -set(RSIMD_SSE2 ${SSE2}) -set(RSIMD_SSE4_1 ${SSE4_1}) -set(RSIMD_AVX ${AVX}) -set(RSIMD_FMA ${FMA}) - -# Configure the files generic to the RSIMD version -configure_file(${RSIMD_SOURCE_DIR}/rsimd_version.h.in - ${CMAKE_CURRENT_BINARY_DIR}/rsimd_version.h @ONLY) -configure_file(${PROJECT_SOURCE_DIR}/RSIMDConfigVersion.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/RSIMDConfigVersion.cmake @ONLY) -configure_file(${PROJECT_SOURCE_DIR}/RSIMDConfig.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/RSIMDConfig.cmake @ONLY) - -set(RSIMD_FILES_INC_LEGACY - aosf33.h - aosf44.h - aosq.h - math.h - mathX.h - math4.h - math8.h - rsimd.h - soaXfY.h - soaXfY_begin.h - soaXfY_end.h - soaXf2.h - soaXf3.h - soa4f2.h - soa4f3.h - soa4f4.h - soa8f2.h - soa8f3.h - soa8f4.h - vXf_begin.h - vXf_end.h) -set(RSIMD_FILES_INC_SSE - sse/sse.h - sse/ssef.h - sse/ssei.h - sse/sse_swz.h) -set(RSIMD_FILES_INC_AVX - avx/avx.h - avx/avxf.h - avx/avxi.h) -set(RSIMD_FILES_SRC - math4.c - aosf44.c - aosq.c) -if(AVX) - set(RSIMD_FILES_SRC ${RSIMD_FILES_SRC} math8.c) -endif() -set(RSIMD_FILES_DOC COPYING README.md) -set(RSIMD_FILES_CMAKE - RSIMDConfig.cmake - RSIMDConfigVersion.cmake) -rcmake_prepend_path(RSIMD_FILES_INC_LEGACY ${RSIMD_SOURCE_DIR}) -rcmake_prepend_path(RSIMD_FILES_INC_SSE ${RSIMD_SOURCE_DIR}) -rcmake_prepend_path(RSIMD_FILES_INC_AVX ${RSIMD_SOURCE_DIR}) -rcmake_prepend_path(RSIMD_FILES_SRC ${RSIMD_SOURCE_DIR}) -rcmake_prepend_path(RSIMD_FILES_DOC ${PROJECT_SOURCE_DIR}/../) -rcmake_prepend_path(RSIMD_FILES_CMAKE ${PROJECT_SOURCE_DIR}/) -set(RSIMD_FILES_INC - ${RSIMD_FILES_INC_LEGACY} - ${RSIMD_FILES_INC_SSE} - ${RSIMD_FILES_INC_AVX}) - -add_library(rsimd SHARED ${RSIMD_FILES_INC} ${RSIMD_FILES_SRC}) -target_link_libraries(rsimd Sleef) -set_target_properties(rsimd PROPERTIES DEFINE_SYMBOL RSIMD_SHARED_BUILD) -set_target_properties(rsimd PROPERTIES COMPILE_FLAGS "${CFLAGS}") -set_target_properties(rsimd PROPERTIES - VERSION ${VERSION} - SOVERSION ${VERSION_MAJOR}) - -################################################################################ -# Add tests -################################################################################ -if(NOT NO_TEST) - function(new_test_named _name _src) - add_executable(${_name} ${RSIMD_SOURCE_DIR}/${_src}.c) - target_link_libraries(${_name} rsimd m) - add_test(${_name} ${_name}) - if(NOT "${ARGN}" STREQUAL "") - set_target_properties(${_name} PROPERTIES COMPILE_FLAGS ${ARGN}) - endif() - endfunction() - - function(new_test _name) - new_test_named(${_name} ${_name} ${ARGN}) - endfunction() - - new_test(test_v4f) - new_test(test_v4i) - new_test(test_aosf33) - new_test(test_aosf44) - new_test(test_aosq) - new_test(test_math4) - new_test(test_soa4f2) - new_test(test_soa4f3) - new_test(test_soa4f4) - - if(SSE4_1 AND CMAKE_COMPILER_IS_GNUCC) - new_test_named(test_v4f_sse4_1 test_v4f "-msse4.1") - new_test_named(test_v4i_sse4_1 test_v4i "-msse4.1") - endif() - - if(FMA AND CMAKE_COMPILER_IS_GNUCC) - new_test_named(test_v4f_fma test_v4f "-mfma") - new_test_named(test_soa8f2_fma test_soa8f2 "-mfma") - new_test_named(test_soa8f3_fma test_soa8f3 "-mfma") - new_test_named(test_soa8f4_fma test_soa8f4 "-mfma") - endif() - - if(AVX AND CMAKE_COMPILER_IS_GNUCC) - new_test(test_math8 "-mavx") - new_test(test_v8f "-mavx") - new_test(test_v8i "-mavx") - new_test(test_soa8f2 "-mavx") - new_test(test_soa8f3 "-mavx") - new_test(test_soa8f4 "-mavx") - endif(AVX AND CMAKE_COMPILER_IS_GNUCC) - -endif(NOT NO_TEST) - -################################################################################ -# Install directives -################################################################################ -install(TARGETS rsimd - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib - RUNTIME DESTINATION bin) -install(FILES ${RSIMD_FILES_INC_LEGACY} DESTINATION include/rsimd) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/rsimd_version.h DESTINATION include/rsimd) -install(FILES ${RSIMD_FILES_INC_SSE} DESTINATION include/rsimd/sse) -install(FILES ${RSIMD_FILES_INC_AVX} DESTINATION include/rsimd/avx) -install(FILES ${RSIMD_FILES_DOC} DESTINATION share/doc/rsimd) -install(FILES ${Sleef_DIR}/SleefConfig.cmake DESTINATION lib/cmake/Sleef/) - -install(FILES - ${CMAKE_CURRENT_BINARY_DIR}/RSIMDConfig.cmake - ${CMAKE_CURRENT_BINARY_DIR}/RSIMDConfigVersion.cmake - DESTINATION lib/cmake/RSIMD) - diff --git a/cmake/RSIMDConfig.cmake.in b/cmake/RSIMDConfig.cmake.in @@ -1,133 +0,0 @@ -# Copyright (C) 2014-2019, 2021, 2023 Vincent Forest (vaplv@free.fr) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -cmake_minimum_required(VERSION 3.1) - -# Check dependenc -find_package(Sleef REQUIRED) - -set(RSIMD_SSE2 @RSIMD_SSE2@) -set(RSIMD_SSE4_1 @RSIMD_SSE4_1@) -set(RSIMD_AVX @RSIMD_AVX@) -set(RSIMD_FMA @RSIMD_FMA@) - -# Check compiler features -if(CMAKE_COMPILER_IS_GNUCC) - include(CheckCCompilerFlag) - if(RSIMD_SSE2) - CHECK_C_COMPILER_FLAG("-msse2" SSE2) - if(SSE2) - list(APPEND _compile_flags -msse2) - endif() - endif() - if(RSIMD_SSE4_1) - CHECK_C_COMPILER_FLAG("-msse4.1" SSE4_1) - if(SSE4_1) - list(APPEND _compile_flags -msse4.1) - endif() - endif() - if(RSIMD_AVX) - CHECK_C_COMPILER_FLAG("-mavx" AVX) - if(AVX) - list(APPEND _compile_flags -mavx) - endif() - endif() - if(RSIMD_FMA) - CHECK_C_COMPILER_FLAG("-mfma" FMA) - if(FMA) - list(APPEND _compile_flags -mfma) - endif() - endif() -endif() - -# Try to find the RSIMD devel. Once done this will define: -# - RSIMD_FOUND: system has RSIMD -# - RSIMD_INCLUDE_DIR: the include directory -# - RSIMD Target: Link this to use rsimd - -# Look for library header -find_path(RSIMD_INCLUDE_DIR rsimd/rsimd_version.h) - -# Look for Release, Debug, RelWithDebInfo and MinSizeRel libraries -unset(RSIMD_LIBRARY CACHE) -unset(RSIMD_LIBRARY_RELEASE CACHE) -unset(RSIMD_LIBRARY_DEBUG CACHE) -unset(RSIMD_LIBRARY_RELWITHDEBINFO CACHE) -unset(RSIMD_LIBRARY_MINSIZEREL CACHE) - -# Find per configuration type libraries -find_library(RSIMD_LIBRARY_RELEASE - rsimd - PATH_SUFFIXES bin Bin BIN - DOC "Path to the library rsimd used during release builds.") -find_library(RSIMD_LIBRARY_DEBUG - rsimd-dbg - PATH_SUFFIXES bin Bin BIN - DOC "Path to the library rsimd used during debug builds.") -find_library(RSIMD_LIBRARY_RELWITHDEBINFO - rsimd-rdbg - PATH_SUFFIXES bin Bin BIN - DOC "Path to the library rsimd used during release with debug info builds.") -find_library(RSIMD_LIBRARY_MINSIZEREL - rsimd-mszr - PATH_SUFFIXES bin Bin BIN - DOC "Path to the library rsimd used during minsize builds.") - -# Define the generic rsimd library -if(RSIMD_LIBRARY_RELEASE) - set(RSIMD_LIBRARY ${RSIMD_LIBRARY_RELEASE}) -elseif(RSIMD_LIBRARY_RELWITHDEBINFO) - set(RSIMD_LIBRARY ${RSIMD_LIBRARY_RELWITHDEBINFO}) -elseif(RSIMD_LIBRARY_MINSIZEREL) - set(RSIMD_LIBRARY ${RSIMD_LIBRARY_MINSIZEREL}) -elseif(RSIMD_LIBRARY_DEBUG) - set(RSIMD_LIBRARY ${RSIMD_LIBRARY_DEBUG}) -endif() - -# Define the per configuration library fallback when not found -set(_configs RELEASE DEBUG RELWITHDEBINFO MINSIZEREL) -foreach(_cfg ${_configs}) - if(NOT RSIMD_LIBRARY_${_cfg}) - get_property(_doc CACHE RSIMD_LIBRARY_${_cfg} PROPERTY HELPSTRING) - set(RSIMD_LIBRARY_${_cfg} - ${RSIMD_LIBRARY} CACHE PATH ${_doc} FORCE) - endif() -endforeach() - -# Create the imported library target -add_library(RSIMD SHARED IMPORTED) - -# Setup the properties of the imported target -if(CMAKE_HOST_WIN32) - set(_import_prop IMPORTED_IMPLIB) -else() - set(_import_prop IMPORTED_LOCATION) -endif() -set_target_properties(RSIMD PROPERTIES - ${_import_prop} ${RSIMD_LIBRARY} - ${_import_prop}_RELEASE ${RSIMD_LIBRARY_RELEASE} - ${_import_prop}_DEBUG ${RSIMD_LIBRARY_DEBUG} - ${_import_prop}_RELWITHDEBINFO ${RSIMD_LIBRARY_RELWITHDEBINFO} - ${_import_prop}_MINSIZEREL ${RSIMD_LIBRARY_MINSIZEREL} - INTERFACE_INCLUDE_DIRECTORIES ${RSIMD_INCLUDE_DIR} - INTERFACE_LINK_LIBRARIES Sleef - INTERFACE_COMPILE_OPTIONS "${_compile_flags}") - -# Check the package -include(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(RSIMD DEFAULT_MSG - RSIMD_INCLUDE_DIR - RSIMD_LIBRARY) - diff --git a/cmake/RSIMDConfigVersion.cmake.in b/cmake/RSIMDConfigVersion.cmake.in @@ -1,54 +0,0 @@ -# Copyright (C) 2014-2019, 2021, 2023 Vincent Forest (vaplv@free.fr) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -set(VERSION_MAJOR @VERSION_MAJOR@) -set(VERSION_MINOR @VERSION_MINOR@) -set(VERSION_PATCH @VERSION_PATCH@) -set(PACKAGE_VERSION "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}") - -if(NOT PACKAGE_FIND_VERSION - OR PACKAGE_VERSION VERSION_EQUAL PACKAGE_FIND_VERSION) - set(PACKAGE_VERSION_COMPATIBLE TRUE) - set(PACKAGE_VERSION_EXACT TRUE) - set(PACKAGE_VERSION_UNSUITABLE FALSE) - return() -endif() - -if(NOT VERSION_MAJOR VERSION_EQUAL PACKAGE_FIND_VERSION_MAJOR) - set(PACKAGE_VERSION_COMPATIBLE FALSE) - set(PACKAGE_VERSION_EXACT FALSE) - set(PACKAGE_VERSION_UNSUITABLE TRUE) - return() -endif() - -if(VERSION_MINOR VERSION_LESS PACKAGE_FIND_VERSION_MINOR) - set(PACKAGE_VERSION_COMPATIBLE FALSE) - set(PACKAGE_VERSION_EXACT FALSE) - set(PACKAGE_VERSION_UNSUITABLE TRUE) - return() -endif() - -if(VERSION_MINOR VERSION_EQUAL PACKAGE_FIND_VERSION_MINOR) - if(VERSION_PATCH VERSION_LESS PACKAGE_FIND_VERSION_PATCH) - set(PACKAGE_VERSION_COMPATIBLE FALSE) - set(PACKAGE_VERSION_EXACT FALSE) - set(PACKAGE_VERSION_UNSUITABLE TRUE) - return() - endif() -endif() - -set(PACKAGE_VERSION_COMPATIBLE TRUE) -set(PACKAGE_VERSION_EXACT FALSE) -set(PACKAGE_VERSION_UNSUITABLE FALSE) diff --git a/cmake/SleefConfig.cmake b/cmake/SleefConfig.cmake @@ -1,35 +0,0 @@ -# Copyright (C) 2014-2019, 2021, 2023 Vincent Forest (vaplv@free.fr) -# -# The RSIMD CMake is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# The RSIMD CMake is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with the RSIMD CMake. If not, see <http://www.gnu.org/licenses/>. - -cmake_minimum_required(VERSION 3.1) - -# Look for library header -find_path(Sleef_INCLUDE_DIR sleef.h) - -find_library(Sleef_LIBRARY sleef PATH_SUFFIXES lib64 - DOC "Path to the sleef library") - -# Create the imported library target -add_library(Sleef SHARED IMPORTED) -set_target_properties(Sleef PROPERTIES - IMPORTED_LOCATION ${Sleef_LIBRARY} - INTERFACE_INCLUDE_DIRECTORIES ${Sleef_INCLUDE_DIR}) - -# Check the package -include(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(Sleef DEFAULT_MSG - Sleef_INCLUDE_DIR - Sleef_LIBRARY) - diff --git a/config.mk b/config.mk @@ -0,0 +1,91 @@ +VERSION = 0.5.0 # Library version +PREFIX = /usr/local + +LIB_TYPE = SHARED +#LIB_TYPE = STATIC + +BUILD_TYPE = RELEASE +#BUILD_TYPE = DEBUG + +# If not set, SIMD WIDTH is retrieved from host CPU +#SIMD_WIDTH = 128 +#SIMD_WIDTH = 256 + +################################################################################ +# Tools +################################################################################ +AR = ar +CC = cc +LD = ld +PKG_CONFIG = pkg-config +OBJCOPY = objcopy +RANLIB = ranlib + +################################################################################ +# Dependencies +################################################################################ +PCFLAGS_SHARED = +PCFLAGS_STATIC = --static +PCFLAGS = $(PCFLAGS_$(LIB_TYPE)) + +RSYS_VERSION = 0.14 +RSYS_CFLAGS = $$($(PKG_CONFIG) $(PCFLAGS) --cflags rsys) +RSYS_LIBS = $$($(PKG_CONFIG) $(PCFLAGS) --libs rsys) + +SLEEF_VERSION = 3.6 +SLEEF_CFLAGS = $$($(PKG_CONFIG) $(PCFLAGS) --cflags sleef) +SLEEF_LIBS = $$($(PKG_CONFIG) $(PCFLAGS) --libs sleef) + +DPDC_CFLAGS = $(RSYS_CFLAGS) $(SLEEF_CFLAGS) +DPDC_LIBS = $(RSYS_LIBS) $(SLEEF_LIBS) + +################################################################################ +# Compilation options +################################################################################ +WFLAGS =\ + -Wall\ + -Wcast-align\ + -Wconversion\ + -Wextra\ + -Wmissing-declarations\ + -Wmissing-prototypes\ + -Wshadow + +CFLAGS_HARDENED =\ + -D_FORTIFY_SOURCES=2\ + -fcf-protection=full\ + -fstack-clash-protection\ + -fstack-protector-strong + +CFLAGS_SIMD = -march=native + +CFLAGS_COMMON =\ + -pedantic\ + -fPIC\ + -fvisibility=hidden\ + -fstrict-aliasing\ + $(WFLAGS)\ + $(CFLAGS_HARDENED)\ + $(CFLAGS_SIMD) + +CFLAGS_DEBUG = -g $(CFLAGS_COMMON) +CFLAGS_RELEASE = -O2 -DNDEBUG $(CFLAGS_COMMON) +CFLAGS = $(CFLAGS_$(BUILD_TYPE)) + +CFLAGS_SO = $(CFLAGS) -fPIC +CFLAGS_EXE = $(CFLAGS) -fPIE + +################################################################################ +# Linker options +################################################################################ +LDFLAGS_HARDENED = -Wl,-z,relro,-z,now +LDFLAGS_DEBUG = $(LDFLAGS_HARDENED) +LDFLAGS_RELEASE = -s $(LDFLAGS_HARDENED) +LDFLAGS = $(LDFLAGS_$(BUILD_TYPE)) + +LDFLAGS_SO = $(LDFLAGS) -shared -Wl,--no-undefined +LDFLAGS_EXE = $(LDFLAGS) -pie + +OCPFLAGS_DEBUG = --localize-hidden +OCPFLAGS_RELEASE = --localize-hidden --strip-unneeded +OCPFLAGS = $(OCPFLAGS_$(BUILD_TYPE)) diff --git a/make.sh b/make.sh @@ -0,0 +1,117 @@ +#!/bin/sh + +# Copyright (C) 2014-2019, 2021, 2023 Vincent Forest (vaplv@free.fr) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +set -e + +################################################################################ +# Helper functions +################################################################################ +# Print the value of a variable in config.mk +showvar() +{ + var="$1" + shift 1 + + # To avoid messages from Make being displayed instead of the value of the + # queried variable, we redirect its output to /dev/null and open a new file + # descriptor to stdout to print the variable +<< EOF "$@" -f 3>&1 1>/dev/null 2>&1 - || kill -HUP $$ +.POSIX: +include config.mk +showvar: + @1>&3 echo \$(${var}) +EOF + exec 3<&- # Close file descriptor 3 +} + +check_cpuflag() +{ + sed -n "/^flags[[:blank:]]\{1,\}:/{p;q}" /proc/cpuinfo \ +| sed "s/.*[[:blank:]]\{1,\}\($1\)[[:blank:]]\{1,\}.*/\1/" +} + +################################################################################ +# Main functions +################################################################################ +config_simd() +{ + simd_width="$(showvar SIMD_WIDTH "$@")" + avx="$(check_cpuflag avx)" + if [ -z "${simd_width}" ] \ + && [ -n "${avx}" ]; then + simd_width=256 + fi + printf "SIMD_WIDTH = %s\n" "${simd_width}" +} + +config_test() +{ + for i in "$@"; do + test=$(basename "${i}" ".c") + test_list="${test_list} ${test}" + printf "%s: src/%s.o\n" "${test}" "${test}" + done + printf "test_bin: %s\n" "${test_list}" +} + +run_test() +{ + for i in "$@"; do + test=$(basename "${i}" ".c") + + printf "%s " "${test}" + if ./"${test}" > /dev/null 2>&1; then + printf "\033[1;32mOK\033[m\n" + else + printf "\033[1;31mError\033[m\n" + n=$((n+1)) + fi + done 2> /dev/null +} + +clean_test() +{ + for i in "$@"; do + rm -f "$(basename "${i}" ".c")" + done +} + +install() +{ + prefix=$1 + shift 1 + + for i in "$@"; do + # Remove the "src" directory and append the "prefix" + dst="${prefix}/${i#*/}" + + # Create the Install directory if required + dir="${dst%/*}" + if [ ! -d "${dir}" ]; then + mkdir -p "${dir}" + fi + + if cmp -s "${i}" "${dst}"; then + printf "Up to date %s\n" "${dst}" + else + printf "Installing %s\n" "${dst}" + cp "${i}" "${dst}" + fi + done +} + +"$@" diff --git a/rsimd.pc.in b/rsimd.pc.in @@ -0,0 +1,12 @@ +prefix=@PREFIX@ +includedir=${prefix}/include +libdir=${prefix}/lib +cflags_simd=@CFLAGS_SIMD@ + +Requires: rsys >= @RSYS_VERSION@ +Requires.private: sleef >= @SLEEF_VERSION@ +Name: RSIMD +Description: RSIMD library +Version: @VERSION@ +Libs: -L${libdir} -lrsimd +CFlags: -I${includedir} ${cflags_simd} diff --git a/src/rsimd.h b/src/rsimd.h @@ -34,4 +34,3 @@ #endif #endif /* RSIMD_H */ - diff --git a/src/soaXfY_begin.h b/src/soaXfY_begin.h @@ -43,7 +43,7 @@ #error "Unexpected macro definition" #endif -/* Macros genric to RSIMD_WIDTH__ and RSIMD_SOA_DIMENSION__ */ +/* Macros generic to RSIMD_WIDTH__ and RSIMD_SOA_DIMENSION__ */ #define RSIMD_soaXfY_PREFIX__ \ CONCAT(CONCAT(CONCAT(soa, RSIMD_WIDTH__), f), RSIMD_SOA_DIMENSION__) #define RSIMD_soaXfY__(Func) CONCAT(CONCAT(RSIMD_soaXfY_PREFIX__, _), Func) diff --git a/src/sse/ssei.h b/src/sse/ssei.h @@ -221,7 +221,7 @@ v4i_min(const v4i_T v0, const v4i_T v1) v4i_store(a, v0); v4i_store(b, v1); return v4i_set - (MMIN(a[0], b[0]), + (MMIN(a[0], b[0]), MMIN(a[1], b[1]), MMIN(a[2], b[2]), MMIN(a[3], b[3])); @@ -239,7 +239,7 @@ v4i_max(const v4i_T v0, const v4i_T v1) v4i_store(a, v0); v4i_store(b, v1); return v4i_set - (MMAX(a[0], b[0]), + (MMAX(a[0], b[0]), MMAX(a[1], b[1]), MMAX(a[2], b[2]), MMAX(a[3], b[3]));