rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

commit 1b1ea7b04d5b5635c877b6e66ea21b168e0a9fc7
parent dacd06d46b1f09b1a8cfcf0c07802f8bd2708bf1
Author: vaplv <vaplv@free.fr>
Date:   Fri, 24 Oct 2014 14:29:47 +0200

Add and test the v4f/v4i reinterpret cast functions

Diffstat:
Msrc/sse/sse.h | 8++++++--
Msrc/test_v4f.c | 152+++++++++++++++++++++++++++++++++++++------------------------------------------
2 files changed, 77 insertions(+), 83 deletions(-)

diff --git a/src/sse/sse.h b/src/sse/sse.h @@ -19,8 +19,12 @@ #include "ssef.h" #include "ssei.h" -static FINLINE v4i_T v4f_to_v4i(const v4f_T v) { return _mm_cvtps_epi32(v); } -static FINLINE v4f_T v4i_to_v4f(const v4i_T v) { return _mm_cvtepi32_ps(v); } +static FINLINE v4i_T v4f_to_v4i(const v4f_T v) {return _mm_cvtps_epi32(v);} +static FINLINE v4f_T v4i_to_v4f(const v4i_T v) {return _mm_cvtepi32_ps(v);} + +/* Reinterpret cast */ +static FINLINE v4i_T v4f_rcast_v4i(const v4f_T v) {return _mm_castps_si128(v);} +static FINLINE v4f_T v4i_rcast_v4f(const v4i_T v) {return _mm_castsi128_ps(v);} #endif /* SIMD_SSE_H */ diff --git a/src/test_v4f.c b/src/test_v4f.c @@ -18,7 +18,7 @@ int main(int argc, char** argv) { - union { int32_t i; float f; } cast; + union { int32_t i[4]; float f[4]; } cast; v4f_T i, j, k; v4i_T l; ALIGN(16) float tmp[5] = { 0.f, 1.f, 2.f, 3.f, 4.f }; @@ -69,74 +69,46 @@ main(int argc, char** argv) CHECK(v4f_w(i), 0.f); i = v4f_mask(~0, 0, ~0, ~0); - cast.f = v4f_x(i); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_y(i); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_z(i); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_w(i); CHECK(cast.i, (int32_t)0xFFFFFFFF); + cast.f[0] = v4f_x(i); CHECK(cast.i[0], (int32_t)0xFFFFFFFF); + cast.f[1] = v4f_y(i); CHECK(cast.i[1], (int32_t)0x00000000); + cast.f[2] = v4f_z(i); CHECK(cast.i[2], (int32_t)0xFFFFFFFF); + cast.f[3] = v4f_w(i); CHECK(cast.i[3], (int32_t)0xFFFFFFFF); i = v4f_true(); - cast.f = v4f_x(i); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_y(i); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_z(i); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_w(i); CHECK(cast.i, (int32_t)0xFFFFFFFF); + cast.f[0] = v4f_x(i); CHECK(cast.i[0], (int32_t)0xFFFFFFFF); + cast.f[1] = v4f_y(i); CHECK(cast.i[1], (int32_t)0xFFFFFFFF); + cast.f[2] = v4f_z(i); CHECK(cast.i[2], (int32_t)0xFFFFFFFF); + cast.f[3] = v4f_w(i); CHECK(cast.i[3], (int32_t)0xFFFFFFFF); i = v4f_false(); - cast.f = v4f_x(i); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_y(i); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_z(i); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_w(i); CHECK(cast.i, (int32_t)0x00000000); - - i = v4f_xmask(); - cast.f = v4f_x(i); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_y(i); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_z(i); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_w(i); CHECK(cast.i, (int32_t)0x00000000); - - i = v4f_ymask(); - cast.f = v4f_x(i); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_y(i); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_z(i); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_w(i); CHECK(cast.i, (int32_t)0x00000000); - - i = v4f_zmask(); - cast.f = v4f_x(i); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_y(i); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_z(i); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_w(i); CHECK(cast.i, (int32_t)0x00000000); - - i = v4f_wmask(); - cast.f = v4f_x(i); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_y(i); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_z(i); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_w(i); CHECK(cast.i, (int32_t)0xFFFFFFFF); - CHECK(v4f_mask_x(i), 0); - CHECK(v4f_mask_y(i), 0); - CHECK(v4f_mask_z(i), 0); - CHECK(v4f_mask_w(i), ~0); + cast.f[0] = v4f_x(i); CHECK(cast.i[0], (int32_t)0x00000000); + cast.f[1] = v4f_y(i); CHECK(cast.i[1], (int32_t)0x00000000); + cast.f[2] = v4f_z(i); CHECK(cast.i[2], (int32_t)0x00000000); + cast.f[3] = v4f_w(i); CHECK(cast.i[3], (int32_t)0x00000000); i = v4f_mask(~0, 0, ~0, ~0); j = v4f_mask(0, 0, 0, ~0); k = v4f_or(i, j); - cast.f = v4f_x(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_y(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_z(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_w(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); + cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0xFFFFFFFF); + cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0x00000000); + cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0xFFFFFFFF); + cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0xFFFFFFFF); CHECK(v4f_mask_x(i), ~0); CHECK(v4f_mask_y(i), 0); CHECK(v4f_mask_z(i), ~0); CHECK(v4f_mask_w(i), ~0); k = v4f_and(i, j); - cast.f = v4f_x(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_y(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_z(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_w(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); + cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0x00000000); + cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0x00000000); + cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0x00000000); + cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0xFFFFFFFF); k = v4f_xor(i, j); - cast.f = v4f_x(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_y(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_z(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_w(k); CHECK(cast.i, (int32_t)0x00000000); + cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0xFFFFFFFF); + cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0x00000000); + cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0xFFFFFFFF); + cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0x00000000); i = v4f_set(1.f, 2.f, 3.f, 4.f); j = v4f_set(5.f, 6.f, 7.f, 8.f); @@ -429,49 +401,49 @@ main(int argc, char** argv) i = v4f_set(1.f, 2.f, 3.f, 4.f); j = v4f_set(-2.f, -4.f, 3.f, 6.f); k = v4f_eq(i, j); - cast.f = v4f_x(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_y(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_z(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_w(k); CHECK(cast.i, (int32_t)0x00000000); + cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0x00000000); + cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0x00000000); + cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0xFFFFFFFF); + cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0x00000000); k = v4f_neq(i, j); - cast.f = v4f_x(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_y(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_z(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_w(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); + cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0xFFFFFFFF); + cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0xFFFFFFFF); + cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0x00000000); + cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0xFFFFFFFF); k = v4f_gt(i, j); - cast.f = v4f_x(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_y(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_z(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_w(k); CHECK(cast.i, (int32_t)0x00000000); + cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0xFFFFFFFF); + cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0xFFFFFFFF); + cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0x00000000); + cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0x00000000); k = v4f_lt(i, j); - cast.f = v4f_x(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_y(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_z(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_w(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); + cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0x00000000); + cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0x00000000); + cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0x00000000); + cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0xFFFFFFFF); k = v4f_ge(i, j); - cast.f = v4f_x(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_y(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_z(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_w(k); CHECK(cast.i, (int32_t)0x00000000); + cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0xFFFFFFFF); + cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0xFFFFFFFF); + cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0xFFFFFFFF); + cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0x00000000); k = v4f_le(i, j); - cast.f = v4f_x(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_y(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_z(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_w(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); + cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0x00000000); + cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0x00000000); + cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0xFFFFFFFF); + cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0xFFFFFFFF); i = v4f_set(1.01f, 2.01f, 3.02f, 0.02f); j = v4f_set(1.f, 2.f, 3.f, 0.f); k = v4f_set(0.f, 0.01f, 0.02f, 0.f); k = v4f_eq_eps(i, j, k); - cast.f = v4f_x(k); CHECK(cast.i, (int32_t)0x00000000); - cast.f = v4f_y(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_z(k); CHECK(cast.i, (int32_t)0xFFFFFFFF); - cast.f = v4f_w(k); CHECK(cast.i, (int32_t)0x00000000); + cast.f[0] = v4f_x(k); CHECK(cast.i[0], (int32_t)0x00000000); + cast.f[1] = v4f_y(k); CHECK(cast.i[1], (int32_t)0xFFFFFFFF); + cast.f[2] = v4f_z(k); CHECK(cast.i[2], (int32_t)0xFFFFFFFF); + cast.f[3] = v4f_w(k); CHECK(cast.i[3], (int32_t)0x00000000); i = v4f_set(1.f, 2.f, 3.f, 4.f); j = v4f_set(-2.f, -4.f, 3.f, 6.f); @@ -499,6 +471,24 @@ main(int argc, char** argv) CHECK(v4f_z(k), 3.f); CHECK(v4f_w(k), 6.f); + cast.f[0] = 1.f; + cast.f[1] = 2.f; + cast.f[2] = 3.14f; + cast.f[3] = -9.2f; + + i = v4f_set(cast.f[0], cast.f[1], cast.f[2], cast.f[3]); + l = v4f_rcast_v4i(i); + CHECK(v4i_x(l), cast.i[0]); + CHECK(v4i_y(l), cast.i[1]); + CHECK(v4i_z(l), cast.i[2]); + CHECK(v4i_w(l), cast.i[3]); + + i = v4i_rcast_v4f(l); + CHECK(v4f_x(i), cast.f[0]); + CHECK(v4f_y(i), cast.f[1]); + CHECK(v4f_z(i), cast.f[2]); + CHECK(v4f_w(i), cast.f[3]); + k = v4f_xxxx(j); CHECK(v4f_x(k), -2.f); CHECK(v4f_y(k), -2.f);