rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

commit 0fb13274b16cf1ec799918a322615201973929a2
parent bf2c9a9410fb61d7d03c62ec70b982a0a77a7d75
Author: vaplv <vaplv@free.fr>
Date:   Sun, 26 Oct 2014 15:05:01 +0100

Add and test the v4i_<load|store> functions

Diffstat:
Msrc/sse/ssei.h | 15+++++++++++++++
Msrc/test_v4f.c | 4+---
Msrc/test_v4i.c | 14++++++++++++++
3 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/src/sse/ssei.h b/src/sse/ssei.h @@ -40,6 +40,21 @@ GENERATE_V4_SWZ_FUNCS__(v4i) /* Swizzle operations */ /******************************************************************************* * Set operations ******************************************************************************/ +static FINLINE int32_t* +v4i_store(int32_t dst[4], v4i_T v) +{ + ASSERT(dst && IS_ALIGNED(dst, 16)); + _mm_store_si128((v4i_T*)dst, v); + return dst; +} + +static FINLINE v4i_T +v4i_load(const int32_t src[4]) +{ + ASSERT(src && IS_ALIGNED(src, 16)); + return _mm_load_si128((const v4i_T*)src); +} + static FINLINE v4i_T v4i_set1(const int32_t i) { diff --git a/src/test_v4f.c b/src/test_v4f.c @@ -22,7 +22,6 @@ main(int argc, char** argv) v4f_T i, j, k; v4i_T l; ALIGN(16) float tmp[5] = { 0.f, 1.f, 2.f, 3.f, 4.f }; - float* ptr; (void)argc, (void)argv; i = v4f_loadu(tmp+1); @@ -43,8 +42,7 @@ main(int argc, char** argv) CHECK(v4f_w(i), 3.f); tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0.f; - ptr = v4f_store(tmp, i); - CHECK(ptr, tmp); + CHECK(v4f_store(tmp, i), tmp); CHECK(tmp[0], 0.f); CHECK(tmp[1], 1.f); CHECK(tmp[2], 2.f); diff --git a/src/test_v4i.c b/src/test_v4i.c @@ -19,8 +19,22 @@ int main(int argc, char** argv) { v4i_T i, j, k; + ALIGN(16) int32_t tmp[4] = { 0, 1, 2, 3 }; (void)argc, (void)argv; + i = v4i_load(tmp); + CHECK(v4i_x(i), 0); + CHECK(v4i_y(i), 1); + CHECK(v4i_z(i), 2); + CHECK(v4i_w(i), 3); + + tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0; + CHECK(v4i_store(tmp, i), tmp); + CHECK(tmp[0], 0); + CHECK(tmp[1], 1); + CHECK(tmp[2], 2); + CHECK(tmp[3], 3); + i = v4i_set(1, 2, 3, 4); CHECK(v4i_x(i), 1); CHECK(v4i_y(i), 2);