rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

test_v8i.c (6216B)


      1 /* Copyright (C) 2014-2019, 2021, 2023, 2025 Vincent Forest (vaplv@free.fr)
      2  *
      3  * The RSIMD library is free software: you can redistribute it and/or modify
      4  * it under the terms of the GNU General Public License as published
      5  * by the Free Software Foundation, either version 3 of the License, or
      6  * (at your option) any later version.
      7  *
      8  * The RSIMD library is distributed in the hope that it will be useful,
      9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     11  * GNU General Public License for more details.
     12  *
     13  * You should have received a copy of the GNU General Public License
     14  * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
     15 
     16 #include "rsimd.h"
     17 
     18 int
     19 main(int argc, char** argv)
     20 {
     21   v8i_T i, j, k;
     22   ALIGN(32) int32_t tmp[8] = {0,1,2,3,4,5,6,7};
     23   (void)argc, (void)argv;
     24 
     25   i = v8i_load(tmp);
     26   CHK(v4i_x(v8i_abcd(i)) == 0);
     27   CHK(v4i_y(v8i_abcd(i)) == 1);
     28   CHK(v4i_z(v8i_abcd(i)) == 2);
     29   CHK(v4i_w(v8i_abcd(i)) == 3);
     30   CHK(v4i_x(v8i_efgh(i)) == 4);
     31   CHK(v4i_y(v8i_efgh(i)) == 5);
     32   CHK(v4i_z(v8i_efgh(i)) == 6);
     33   CHK(v4i_w(v8i_efgh(i)) == 7);
     34 
     35   tmp[0]= tmp[1] = tmp[2] = tmp[3] = 0;
     36   tmp[4]= tmp[5] = tmp[6] = tmp[7] = 0;
     37   CHK(v8i_store(tmp, i) == tmp);
     38   CHK(tmp[0] == 0);
     39   CHK(tmp[1] == 1);
     40   CHK(tmp[2] == 2);
     41   CHK(tmp[3] == 3);
     42   CHK(tmp[4] == 4);
     43   CHK(tmp[5] == 5);
     44   CHK(tmp[6] == 6);
     45   CHK(tmp[7] == 7);
     46 
     47   i = v8i_set(1, 2, 3, 4, 5, 6, 7, 8);
     48   CHK(v4i_x(v8i_abcd(i)) == 1);
     49   CHK(v4i_y(v8i_abcd(i)) == 2);
     50   CHK(v4i_z(v8i_abcd(i)) == 3);
     51   CHK(v4i_w(v8i_abcd(i)) == 4);
     52   CHK(v4i_x(v8i_efgh(i)) == 5);
     53   CHK(v4i_y(v8i_efgh(i)) == 6);
     54   CHK(v4i_z(v8i_efgh(i)) == 7);
     55   CHK(v4i_w(v8i_efgh(i)) == 8);
     56 
     57   i = v8i_set1(-1);
     58   CHK(v4i_x(v8i_abcd(i)) == -1);
     59   CHK(v4i_y(v8i_abcd(i)) == -1);
     60   CHK(v4i_z(v8i_abcd(i)) == -1);
     61   CHK(v4i_w(v8i_abcd(i)) == -1);
     62   CHK(v4i_x(v8i_efgh(i)) == -1);
     63   CHK(v4i_y(v8i_efgh(i)) == -1);
     64   CHK(v4i_z(v8i_efgh(i)) == -1);
     65   CHK(v4i_w(v8i_efgh(i)) == -1);
     66 
     67   i = v8i_zero();
     68   CHK(v4i_x(v8i_abcd(i)) == 0);
     69   CHK(v4i_y(v8i_abcd(i)) == 0);
     70   CHK(v4i_z(v8i_abcd(i)) == 0);
     71   CHK(v4i_w(v8i_abcd(i)) == 0);
     72   CHK(v4i_x(v8i_efgh(i)) == 0);
     73   CHK(v4i_y(v8i_efgh(i)) == 0);
     74   CHK(v4i_z(v8i_efgh(i)) == 0);
     75   CHK(v4i_w(v8i_efgh(i)) == 0);
     76 
     77   i = v8i_set_v4i(v4i_set(-1,-2,3,4), v4i_set(5,6,-7,-8));
     78   CHK(v4i_x(v8i_abcd(i)) ==-1);
     79   CHK(v4i_y(v8i_abcd(i)) ==-2);
     80   CHK(v4i_z(v8i_abcd(i)) == 3);
     81   CHK(v4i_w(v8i_abcd(i)) == 4);
     82   CHK(v4i_x(v8i_efgh(i)) == 5);
     83   CHK(v4i_y(v8i_efgh(i)) == 6);
     84   CHK(v4i_z(v8i_efgh(i)) ==-7);
     85   CHK(v4i_w(v8i_efgh(i)) ==-8);
     86 
     87   i = v8i_set
     88     (0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F,
     89      0x00102030, 0x40506070, (int32_t)0x8090A0B0, (int32_t)0xC0D0E0F0);
     90   j = v8i_set
     91     (0x01020401, 0x70605040, 0x0F1F2F3F, 0x00000000,
     92      0x10204010, 0x06050400, (int32_t)0xF1F2F3F0, 0x10000000);
     93   k = v8i_or(i, j);
     94   CHK(v4i_x(v8i_abcd(k)) == (int32_t)0x01030603);
     95   CHK(v4i_y(v8i_abcd(k)) == (int32_t)0x74655647);
     96   CHK(v4i_z(v8i_abcd(k)) == (int32_t)0x0F1F2F3F);
     97   CHK(v4i_w(v8i_abcd(k)) == (int32_t)0x0C0D0E0F);
     98   CHK(v4i_x(v8i_efgh(k)) == (int32_t)0x10306030);
     99   CHK(v4i_y(v8i_efgh(k)) == (int32_t)0x46556470);
    100   CHK(v4i_z(v8i_efgh(k)) == (int32_t)0xF1F2F3F0);
    101   CHK(v4i_w(v8i_efgh(k)) == (int32_t)0xD0D0E0F0);
    102 
    103   k = v8i_and(i, j);
    104   CHK(v4i_x(v8i_abcd(k)) == (int32_t)0x00000001);
    105   CHK(v4i_y(v8i_abcd(k)) == (int32_t)0x00000000);
    106   CHK(v4i_z(v8i_abcd(k)) == (int32_t)0x08090A0B);
    107   CHK(v4i_w(v8i_abcd(k)) == (int32_t)0x00000000);
    108   CHK(v4i_x(v8i_efgh(k)) == (int32_t)0x00000010);
    109   CHK(v4i_y(v8i_efgh(k)) == (int32_t)0x00000000);
    110   CHK(v4i_z(v8i_efgh(k)) == (int32_t)0x8090A0B0);
    111   CHK(v4i_w(v8i_efgh(k)) == (int32_t)0x00000000);
    112 
    113   k = v8i_andnot(i, j);
    114   CHK(v4i_x(v8i_abcd(k)) == (int32_t)0x01020400);
    115   CHK(v4i_y(v8i_abcd(k)) == (int32_t)0x70605040);
    116   CHK(v4i_z(v8i_abcd(k)) == (int32_t)0x07162534);
    117   CHK(v4i_w(v8i_abcd(k)) == (int32_t)0x00000000);
    118   CHK(v4i_x(v8i_efgh(k)) == (int32_t)0x10204000);
    119   CHK(v4i_y(v8i_efgh(k)) == (int32_t)0x06050400);
    120   CHK(v4i_z(v8i_efgh(k)) == (int32_t)0x71625340);
    121   CHK(v4i_w(v8i_efgh(k)) == (int32_t)0x10000000);
    122 
    123   k = v8i_xor(i, j);
    124   CHK(v4i_x(v8i_abcd(k)) == (int32_t)0x01030602);
    125   CHK(v4i_y(v8i_abcd(k)) == (int32_t)0x74655647);
    126   CHK(v4i_z(v8i_abcd(k)) == (int32_t)0x07162534);
    127   CHK(v4i_w(v8i_abcd(k)) == (int32_t)0x0C0D0E0F);
    128   CHK(v4i_x(v8i_efgh(k)) == (int32_t)0x10306020);
    129   CHK(v4i_y(v8i_efgh(k)) == (int32_t)0x46556470);
    130   CHK(v4i_z(v8i_efgh(k)) == (int32_t)0x71625340);
    131   CHK(v4i_w(v8i_efgh(k)) == (int32_t)0XD0D0E0F0);
    132 
    133   i = v8i_set( 1, 2,3,4,5, 6,7,8);
    134   j = v8i_set(-2,-4,3,6,5,-1,8,8);
    135 
    136   k = v8i_eq(i, j);
    137   CHK(v4i_x(v8i_abcd(k)) == 0);
    138   CHK(v4i_y(v8i_abcd(k)) == 0);
    139   CHK(v4i_z(v8i_abcd(k)) ==~0);
    140   CHK(v4i_w(v8i_abcd(k)) == 0);
    141   CHK(v4i_x(v8i_efgh(k)) ==~0);
    142   CHK(v4i_y(v8i_efgh(k)) == 0);
    143   CHK(v4i_z(v8i_efgh(k)) == 0);
    144   CHK(v4i_w(v8i_efgh(k)) ==~0);
    145 
    146   k = v8i_neq(i, j);
    147   CHK(v4i_x(v8i_abcd(k)) ==~0);
    148   CHK(v4i_y(v8i_abcd(k)) ==~0);
    149   CHK(v4i_z(v8i_abcd(k)) == 0);
    150   CHK(v4i_w(v8i_abcd(k)) ==~0);
    151   CHK(v4i_x(v8i_efgh(k)) == 0);
    152   CHK(v4i_y(v8i_efgh(k)) ==~0);
    153   CHK(v4i_z(v8i_efgh(k)) ==~0);
    154   CHK(v4i_w(v8i_efgh(k)) == 0);
    155 
    156   k = v8i_sel(i, j, v8i_set(~0,~0,0,~0,0,0,~0,0));
    157   CHK(v4i_x(v8i_abcd(k)) ==-2);
    158   CHK(v4i_y(v8i_abcd(k)) ==-4);
    159   CHK(v4i_z(v8i_abcd(k)) == 3);
    160   CHK(v4i_w(v8i_abcd(k)) == 6);
    161   CHK(v4i_x(v8i_efgh(k)) == 5);
    162   CHK(v4i_y(v8i_efgh(k)) == 6);
    163   CHK(v4i_z(v8i_efgh(k)) == 8);
    164   CHK(v4i_w(v8i_efgh(k)) == 8);
    165 
    166   k = v8i_min(i, j);
    167   CHK(v4i_x(v8i_abcd(k)) ==-2);
    168   CHK(v4i_y(v8i_abcd(k)) ==-4);
    169   CHK(v4i_z(v8i_abcd(k)) == 3);
    170   CHK(v4i_w(v8i_abcd(k)) == 4);
    171   CHK(v4i_x(v8i_efgh(k)) == 5);
    172   CHK(v4i_y(v8i_efgh(k)) ==-1);
    173   CHK(v4i_z(v8i_efgh(k)) == 7);
    174   CHK(v4i_w(v8i_efgh(k)) == 8);
    175 
    176   k = v8i_max(i, j);
    177   CHK(v4i_x(v8i_abcd(k)) == 1);
    178   CHK(v4i_y(v8i_abcd(k)) == 2);
    179   CHK(v4i_z(v8i_abcd(k)) == 3);
    180   CHK(v4i_w(v8i_abcd(k)) == 6);
    181   CHK(v4i_x(v8i_efgh(k)) == 5);
    182   CHK(v4i_y(v8i_efgh(k)) == 6);
    183   CHK(v4i_z(v8i_efgh(k)) == 8);
    184   CHK(v4i_w(v8i_efgh(k)) == 8);
    185 
    186   CHK(v8i_reduce_min_i32(i) == 1);
    187   CHK(v8i_reduce_min_i32(j) ==-4);
    188   CHK(v8i_reduce_max_i32(i) == 8);
    189   CHK(v8i_reduce_max_i32(j) == 8);
    190 
    191   return 0;
    192 }