commit c5b97ca8fc48acbd9fd8cb487c165cc732cf6532
parent 7087e6b6dd782bd4034d8a8394926a8bb3e8a4ed
Author: vaplv <vaplv@free.fr>
Date: Sun, 24 Feb 2019 16:32:32 +0100
Make the soa vector functions generic to the SIMD width
Diffstat:
6 files changed, 402 insertions(+), 362 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -53,7 +53,7 @@ set(RSIMD_FILES_INC_LEGACY
aosf44.h
aosq.h
rsimd.h
- soa4fX.h
+ soaXfY.h
soa4f2.h
soa4f3.h
soa4f4.h)
diff --git a/src/soa4f2.h b/src/soa4f2.h
@@ -16,9 +16,10 @@
#ifndef SOA4F2_H
#define SOA4F2_H
-/* Generate the common soa4fX funcs */
-#define SOA4FX_DIMENSION__ 2
-#include "soa4fX.h"
+/* Generate the common soa4f2 funcs */
+#define RSIMD_WIDTH__ 4
+#define RSIMD_SOA_DIMENSION__ 2
+#include "soaXfY.h"
static FINLINE v4f_T
soa4f2_cross(const v4f_T a[2], const v4f_T b[2])
diff --git a/src/soa4f3.h b/src/soa4f3.h
@@ -16,9 +16,10 @@
#ifndef SOA4F3_H
#define SOA4F3_H
-/* Generate the common soa4fX functions */
-#define SOA4FX_DIMENSION__ 3
-#include "soa4fX.h"
+/* Generate the common soa4f3 functions */
+#define RSIMD_WIDTH__ 4
+#define RSIMD_SOA_DIMENSION__ 3
+#include "soaXfY.h"
static FINLINE v4f_T*
soa4f3_cross(v4f_T dst[3], const v4f_T a[3], const v4f_T b[3])
diff --git a/src/soa4f4.h b/src/soa4f4.h
@@ -16,9 +16,10 @@
#ifndef SOA4F4_H
#define SOA4F4_H
-/* Generate the common soa4fX functions */
-#define SOA4FX_DIMENSION__ 4
-#include "soa4fX.h"
+/* Generate the common soa4f4 functions */
+#define RSIMD_WIDTH__ 4
+#define RSIMD_SOA_DIMENSION__ 4
+#include "soaXfY.h"
#endif /* SOA4F4_H */
diff --git a/src/soa4fX.h b/src/soa4fX.h
@@ -1,352 +0,0 @@
-/* Copyright (C) 2014-2018 Vincent Forest (vaplv@free.fr)
- *
- * The RSIMD library is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * The RSIMD library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
-
-/*
- * Header used to generate funcs on SoA SIMD float vectors of X dimensions
- */
-#if !defined(SOA4FX_DIMENSION__)
- #error Missing arguments
-#endif
-
-#if defined(SOA4FX_FUNC__)
- #error Unexpected SOA4FX_FUNC__ macro defintion
-#endif
-
-#include "rsimd.h"
-
-#ifdef COMPILER_GCC
- #pragma GCC push_options
- #pragma GCC optimize("unroll-loops")
-#endif
-
-STATIC_ASSERT(SOA4FX_DIMENSION__ > 1, Unexpected_value);
-
-#define SOA4FX_FUNC__(Func) \
- CONCAT(CONCAT(CONCAT(soa4f, SOA4FX_DIMENSION__), _), Func)
-
-/* Helper macro */
-#define SIZEOF_SOA4FX__ sizeof(v4f_T[SOA4FX_DIMENSION__])
-
-#if SOA4FX_DIMENSION__ <= 4
-static FINLINE v4f_T*
-CONCAT(soa4f, SOA4FX_DIMENSION__)
- (v4f_T* dst
- ,const v4f_T x
- ,const v4f_T y
-#if SOA4FX_DIMENSION__ > 2
- ,const v4f_T z
-#endif
-#if SOA4FX_DIMENSION__ > 3
- ,const v4f_T w
-#endif
- )
-{
- ASSERT(dst);
- dst[0] = x;
- dst[1] = y;
-#if SOA4FX_DIMENSION__ > 2
- dst[2] = z;
-#endif
-#if SOA4FX_DIMENSION__ > 3
- dst[3] = w;
-#endif
- return dst;
-}
-#endif
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(splat)(v4f_T* dst, const v4f_T val)
-{
- int i;
- ASSERT(dst);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- dst[i] = val;
- return dst;
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(set__)(v4f_T* dst, const v4f_T* src)
-{
- int i;
- ASSERT(dst && src);
- ASSERT(!MEM_AREA_OVERLAP(dst, SIZEOF_SOA4FX__, src, SIZEOF_SOA4FX__));
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- dst[i] = src[i];
- return dst;
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(set)(v4f_T* dst, const v4f_T* src)
-{
- ASSERT(dst && src);
- if(!MEM_AREA_OVERLAP(dst, SIZEOF_SOA4FX__, src, SIZEOF_SOA4FX__)) {
- return SOA4FX_FUNC__(set__)(dst, src);
- } else {
- v4f_T tmp[SOA4FX_DIMENSION__];
- return SOA4FX_FUNC__(set__)(dst, SOA4FX_FUNC__(set__)(tmp, src));
- }
-}
-
-static FINLINE v4f_T
-SOA4FX_FUNC__(dot)(const v4f_T* a, const v4f_T* b)
-{
- v4f_T dot;
- int i;
- ASSERT(a && b);
- dot = v4f_mul(a[0], b[0]);
- FOR_EACH(i, 1, SOA4FX_DIMENSION__) {
- dot = v4f_add(dot, v4f_mul(a[i], b[i]));
- }
- return dot;
-}
-
-static FINLINE v4f_T
-SOA4FX_FUNC__(len)(const v4f_T* a)
-{
- ASSERT(a);
- return v4f_sqrt(SOA4FX_FUNC__(dot)(a, a));
-}
-
-static FINLINE v4f_T
-SOA4FX_FUNC__(normalize)(v4f_T* dst, const v4f_T* a)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- v4f_T sqr_len, rcp_len;
- v4f_T mask;
- int i;
- ASSERT(dst && a);
-
- sqr_len = SOA4FX_FUNC__(dot)(a, a);
- mask = v4f_neq(sqr_len, v4f_zero());
- rcp_len = v4f_rsqrt(sqr_len);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_and(mask, v4f_mul(a[i], rcp_len));
- SOA4FX_FUNC__(set__)(dst, tmp);
- return v4f_mul(sqr_len, rcp_len);
-}
-
-static FINLINE v4f_T
-SOA4FX_FUNC__(is_normalized)(const v4f_T* a)
-{
- return v4f_eq_eps(SOA4FX_FUNC__(len)(a), v4f_set1(1.f), v4f_set1(1.e-6f));
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(add)(v4f_T* dst, const v4f_T* a, const v4f_T* b)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a && b);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_add(a[i], b[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(addf)(v4f_T* dst, const v4f_T* a, const v4f_T f)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_add(a[i], f);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(sub)(v4f_T* dst, const v4f_T* a, const v4f_T* b)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a && b);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_sub(a[i], b[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(subf)(v4f_T* dst, const v4f_T* a, const v4f_T f)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_sub(a[i], f);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(mul)(v4f_T* dst, const v4f_T* a, const v4f_T* b)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a && b);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_mul(a[i], b[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(mulf)(v4f_T* dst, const v4f_T* a, const v4f_T f)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_mul(a[i], f);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(div)(v4f_T* dst, const v4f_T* a, const v4f_T* b)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a && b);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_div(a[i], b[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(divf)(v4f_T* dst, const v4f_T* a, const v4f_T f)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_div(a[i], f);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(minus)(v4f_T* dst, const v4f_T* a)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_minus(a[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T
-SOA4FX_FUNC__(sum)(const v4f_T* a)
-{
- v4f_T f;
- int i = 0;
- ASSERT(a);
- f = a[i];
- FOR_EACH(i, 1, SOA4FX_DIMENSION__)
- f = v4f_add(f, a[i]);
- return f;
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(lerp)
- (v4f_T* dst,
- const v4f_T* from,
- const v4f_T* to,
- const v4f_T t)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- v4f_T t_adjusted;
- int i;
- ASSERT(dst && from && to);
- t_adjusted = v4f_min(v4f_max(t, v4f_zero()), v4f_set1(1.f));
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_add(from[i], v4f_mul(t_adjusted, v4f_sub(to[i], from[i])));
- SOA4FX_FUNC__(set__)(dst, tmp);
- return dst;
-}
-
-static FINLINE v4f_T
-SOA4FX_FUNC__(eq)(const v4f_T* a, const v4f_T* b)
-{
- v4f_T is_eq;
- int i = 0;
- ASSERT(a && b);
- is_eq = v4f_eq(a[0], b[0]);
- FOR_EACH(i, 1, SOA4FX_DIMENSION__)
- is_eq = v4f_and(is_eq, v4f_eq(a[i], b[i]));
- return is_eq;
-}
-
-static FINLINE v4f_T
-SOA4FX_FUNC__(eq_eps)(const v4f_T* a, const v4f_T* b, const v4f_T eps)
-{
- v4f_T is_eq;
- int i = 0;
- ASSERT(a && b);
- is_eq = v4f_eq_eps(a[0], b[0], eps);
- FOR_EACH(i, 1, SOA4FX_DIMENSION__)
- is_eq = v4f_and(is_eq, v4f_eq_eps(a[i], b[i], eps));
- return is_eq;
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(max)(v4f_T* dst, const v4f_T* a, const v4f_T* b)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a && b);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_max(a[i], b[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(min)(v4f_T* dst, const v4f_T* a, const v4f_T* b)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && a && b);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_min(a[i], b[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(sel)
- (v4f_T* dst, const v4f_T* vfalse, const v4f_T* vtrue, const v4f_T cond)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && vfalse && vtrue);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_sel(vfalse[i], vtrue[i], cond);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-static FINLINE v4f_T*
-SOA4FX_FUNC__(selv)
- (v4f_T* dst, const v4f_T* vfalse, const v4f_T* vtrue, const v4f_T* vcond)
-{
- v4f_T tmp[SOA4FX_DIMENSION__];
- int i;
- ASSERT(dst && vfalse && vtrue);
- FOR_EACH(i, 0, SOA4FX_DIMENSION__)
- tmp[i] = v4f_sel(vfalse[i], vtrue[i], vcond[i]);
- return SOA4FX_FUNC__(set__)(dst, tmp);
-}
-
-#undef SIZEOF_SOA4FX__
-#undef SOA4FX_DIMENSION__
-#undef SOA4FX_FUNC__
-
-#ifdef COMPILER_GCC
- #pragma GCC pop_options
-#endif
-
diff --git a/src/soaXfY.h b/src/soaXfY.h
@@ -0,0 +1,389 @@
+/* Copyright (C) 2014-2018 Vincent Forest (vaplv@free.fr)
+ *
+ * The RSIMD library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The RSIMD library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
+
+/*
+ * Header used to generate funcs on SoA SIMD float vectors of X dimensions
+ */
+#include "rsimd.h"
+
+#if !defined(RSIMD_SOA_DIMENSION__)
+ #error "Undefined RSIMD_SOA_DIMENSION__ macro"
+#endif
+#if !defined(RSIMD_WIDTH__)
+ #error "Undefined RSIMD_WIDTH__ macro"
+#endif
+#if RSIMD_SOA_DIMENSION__ < 1 || RSIMD_SOA_DIMENSION__ > 4
+ #error "Unexpected RSIMD_SOA_DIMENSION__ value"
+#endif
+#if RSIMD_WIDTH__ != 4 && RSIMD_WIDTH__ != 8
+ #error "Unexpected RSIMD_WIDTH__ value"
+#endif
+
+/* Macros generic to RSIMD_WIDTH__ */
+#define RSIMD_vXf__(Func) \
+ CONCAT(CONCAT(CONCAT(CONCAT(v, RSIMD_WIDTH__), f), _), Func)
+#define RSIMD_vXf_T__ CONCAT(CONCAT(v, RSIMD_WIDTH__), f_T)
+#define RSIMD_soaXfY_PREFIX__ \
+ CONCAT(CONCAT(CONCAT(soa, RSIMD_WIDTH__), f), RSIMD_SOA_DIMENSION__)
+#define RSIMD_soaXfY__(Func) CONCAT(CONCAT(RSIMD_soaXfY_PREFIX__, _), Func)
+#define SIZEOF_RSIMD_soaXfY__ sizeof(RSIMD_vXf_T__[RSIMD_SOA_DIMENSION__])
+
+/* Force GCC to unroll the loops */
+#ifdef COMPILER_GCC
+ #pragma GCC push_options
+ #pragma GCC optimize("unroll-loops")
+#endif
+
+#if RSIMD_SOA_DIMENSION__ <= 4
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY_PREFIX__
+ (RSIMD_vXf_T__* dst
+ ,const RSIMD_vXf_T__ x
+ ,const RSIMD_vXf_T__ y
+#if RSIMD_SOA_DIMENSION__ > 2
+ ,const RSIMD_vXf_T__ z
+#endif
+#if RSIMD_SOA_DIMENSION__ > 3
+ ,const RSIMD_vXf_T__ w
+#endif
+ )
+{
+ ASSERT(dst);
+ dst[0] = x;
+ dst[1] = y;
+#if RSIMD_SOA_DIMENSION__ > 2
+ dst[2] = z;
+#endif
+#if RSIMD_SOA_DIMENSION__ > 3
+ dst[3] = w;
+#endif
+ return dst;
+}
+#endif
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(splat)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__ val)
+{
+ int i;
+ ASSERT(dst);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ dst[i] = val;
+ return dst;
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(set__)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* src)
+{
+ int i;
+ ASSERT(dst && src);
+ ASSERT(!MEM_AREA_OVERLAP(dst, SIZEOF_RSIMD_soaXfY__, src, SIZEOF_RSIMD_soaXfY__));
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ dst[i] = src[i];
+ return dst;
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(set)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* src)
+{
+ ASSERT(dst && src);
+ if(!MEM_AREA_OVERLAP(dst, SIZEOF_RSIMD_soaXfY__, src, SIZEOF_RSIMD_soaXfY__)) {
+ return RSIMD_soaXfY__(set__)(dst, src);
+ } else {
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ return RSIMD_soaXfY__(set__)(dst, RSIMD_soaXfY__(set__)(tmp, src));
+ }
+}
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(dot)(const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ dot;
+ int i;
+ ASSERT(a && b);
+ dot = RSIMD_vXf__(mul)(a[0], b[0]);
+ FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__) {
+ dot = RSIMD_vXf__(madd)(a[i], b[i], dot);
+ }
+ return dot;
+}
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(len)(const RSIMD_vXf_T__* a)
+{
+ ASSERT(a);
+ return RSIMD_vXf__(sqrt)(RSIMD_soaXfY__(dot)(a, a));
+}
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(normalize)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ RSIMD_vXf_T__ sqr_len, rcp_len;
+ RSIMD_vXf_T__ mask;
+ int i;
+ ASSERT(dst && a);
+
+ sqr_len = RSIMD_soaXfY__(dot)(a, a);
+ mask = RSIMD_vXf__(neq)(sqr_len, RSIMD_vXf__(zero)());
+ rcp_len = RSIMD_vXf__(rsqrt)(sqr_len);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(and)(mask, RSIMD_vXf__(mul)(a[i], rcp_len));
+ RSIMD_soaXfY__(set__)(dst, tmp);
+ return RSIMD_vXf__(mul)(sqr_len, rcp_len);
+}
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(is_normalized)(const RSIMD_vXf_T__* a)
+{
+ return RSIMD_vXf__(eq_eps)
+ (RSIMD_soaXfY__(len)(a),
+ RSIMD_vXf__(set1)(1.f),
+ RSIMD_vXf__(set1)(1.e-6f));
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(add)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a && b);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(add)(a[i], b[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(addf)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(add)(a[i], f);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(sub)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a && b);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(sub)(a[i], b[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(subf)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(sub)(a[i], f);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(mul)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a && b);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(mul)(a[i], b[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(mulf)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(mul)(a[i], f);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(div)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a && b);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(div)(a[i], b[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(divf)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(div)(a[i], f);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(minus)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(minus)(a[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(sum)(const RSIMD_vXf_T__* a)
+{
+ RSIMD_vXf_T__ f;
+ int i = 0;
+ ASSERT(a);
+ f = a[i];
+ FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__)
+ f = RSIMD_vXf__(add)(f, a[i]);
+ return f;
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(lerp)
+ (RSIMD_vXf_T__* dst,
+ const RSIMD_vXf_T__* from,
+ const RSIMD_vXf_T__* to,
+ const RSIMD_vXf_T__ t)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ RSIMD_vXf_T__ t_adjusted;
+ int i;
+ ASSERT(dst && from && to);
+ t_adjusted = RSIMD_vXf__(min)
+ (RSIMD_vXf__(max)(t, RSIMD_vXf__(zero)()), RSIMD_vXf__(set1)(1.f));
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(madd)
+ (t_adjusted, RSIMD_vXf__(sub)(to[i], from[i]), from[i]);
+ RSIMD_soaXfY__(set__)(dst, tmp);
+ return dst;
+}
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(eq)(const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ is_eq;
+ int i = 0;
+ ASSERT(a && b);
+ is_eq = RSIMD_vXf__(eq)(a[0], b[0]);
+ FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__)
+ is_eq = RSIMD_vXf__(and)(is_eq, RSIMD_vXf__(eq)(a[i], b[i]));
+ return is_eq;
+}
+
+static FINLINE RSIMD_vXf_T__
+RSIMD_soaXfY__(eq_eps)
+ (const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b, const RSIMD_vXf_T__ eps)
+{
+ RSIMD_vXf_T__ is_eq;
+ int i = 0;
+ ASSERT(a && b);
+ is_eq = RSIMD_vXf__(eq_eps)(a[0], b[0], eps);
+ FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__)
+ is_eq = RSIMD_vXf__(and)(is_eq, RSIMD_vXf__(eq_eps)(a[i], b[i], eps));
+ return is_eq;
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(max)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a && b);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(max)(a[i], b[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(min)
+ (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && a && b);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(min)(a[i], b[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(sel)
+ (RSIMD_vXf_T__* dst,
+ const RSIMD_vXf_T__* vfalse,
+ const RSIMD_vXf_T__* vtrue,
+ const RSIMD_vXf_T__ cond)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && vfalse && vtrue);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(sel)(vfalse[i], vtrue[i], cond);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+static FINLINE RSIMD_vXf_T__*
+RSIMD_soaXfY__(selv)
+ (RSIMD_vXf_T__* dst,
+ const RSIMD_vXf_T__* vfalse,
+ const RSIMD_vXf_T__* vtrue,
+ const RSIMD_vXf_T__* vcond)
+{
+ RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
+ int i;
+ ASSERT(dst && vfalse && vtrue);
+ FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
+ tmp[i] = RSIMD_vXf__(sel)(vfalse[i], vtrue[i], vcond[i]);
+ return RSIMD_soaXfY__(set__)(dst, tmp);
+}
+
+/* Restore compilation parameters */
+#ifdef COMPILER_GCC
+ #pragma GCC pop_options
+#endif
+
+/* Undef helper macros */
+#undef RSIMD_vXf__
+#undef RSIMD_vXf_T__
+#undef RSIMD_soaXfY_PREFIX__
+#undef RSIMD_soaXfY__
+#undef SIZEOF_RSIMD_soaXfY__
+
+/* Undef parameters */
+#undef RSIMD_SOA_DIMENSION__
+#undef RSIMD_WIDTH__
+