rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

test_v4i.c (6141B)


      1 /* Copyright (C) 2014-2019, 2021, 2023, 2025 Vincent Forest (vaplv@free.fr)
      2  *
      3  * The RSIMD library is free software: you can redistribute it and/or modify
      4  * it under the terms of the GNU General Public License as published
      5  * by the Free Software Foundation, either version 3 of the License, or
      6  * (at your option) any later version.
      7  *
      8  * The RSIMD library is distributed in the hope that it will be useful,
      9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     11  * GNU General Public License for more details.
     12  *
     13  * You should have received a copy of the GNU General Public License
     14  * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
     15 
     16 #include "rsimd.h"
     17 
     18 int
     19 main(int argc, char** argv)
     20 {
     21   v4i_T i, j, k;
     22   ALIGN(16) int32_t tmp[4] = { 0, 1, 2, 3 };
     23   (void)argc, (void)argv;
     24 
     25   i = v4i_load(tmp);
     26   CHK(v4i_x(i) == 0);
     27   CHK(v4i_y(i) == 1);
     28   CHK(v4i_z(i) == 2);
     29   CHK(v4i_w(i) == 3);
     30 
     31   tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0;
     32   CHK(v4i_store(tmp, i) == tmp);
     33   CHK(tmp[0] == 0);
     34   CHK(tmp[1] == 1);
     35   CHK(tmp[2] == 2);
     36   CHK(tmp[3] == 3);
     37 
     38   i = v4i_set(1, 2, 3, 4);
     39   CHK(v4i_x(i) == 1);
     40   CHK(v4i_y(i) == 2);
     41   CHK(v4i_z(i) == 3);
     42   CHK(v4i_w(i) == 4);
     43 
     44   i = v4i_set1(-1);
     45   CHK(v4i_x(i) == -1);
     46   CHK(v4i_y(i) == -1);
     47   CHK(v4i_z(i) == -1);
     48   CHK(v4i_w(i) == -1);
     49 
     50   i = v4i_zero();
     51   CHK(v4i_x(i) == 0);
     52   CHK(v4i_y(i) == 0);
     53   CHK(v4i_z(i) == 0);
     54   CHK(v4i_w(i) == 0);
     55 
     56   i = v4i_set(1, 2, 3, 4);
     57   j = v4i_set(5, 6, 7, 8);
     58   k = v4i_xayb(i, j);
     59   CHK(v4i_x(k) == 1);
     60   CHK(v4i_y(k) == 5);
     61   CHK(v4i_z(k) == 2);
     62   CHK(v4i_w(k) == 6);
     63 
     64   k = v4i_zcwd(i, j);
     65   CHK(v4i_x(k) == 3);
     66   CHK(v4i_y(k) == 7);
     67   CHK(v4i_z(k) == 4);
     68   CHK(v4i_w(k) == 8);
     69 
     70   i = v4i_set(0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F);
     71   j = v4i_set(0x01020401, 0x70605040, 0x0F1F2F3F, 0x00000000);
     72   k = v4i_or(i, j);
     73   CHK(v4i_x(k) == (int32_t)0x01030603);
     74   CHK(v4i_y(k) == (int32_t)0x74655647);
     75   CHK(v4i_z(k) == (int32_t)0x0F1F2F3F);
     76   CHK(v4i_w(k) == (int32_t)0x0C0D0E0F);
     77 
     78   k = v4i_and(i, j);
     79   CHK(v4i_x(k) == (int32_t)0x00000001);
     80   CHK(v4i_y(k) == (int32_t)0x00000000);
     81   CHK(v4i_z(k) == (int32_t)0x08090A0B);
     82   CHK(v4i_w(k) == (int32_t)0x00000000);
     83 
     84   k = v4i_andnot(i, j);
     85   CHK(v4i_x(k) == (int32_t)0x01020400);
     86   CHK(v4i_y(k) == (int32_t)0x70605040);
     87   CHK(v4i_z(k) == (int32_t)0x07162534);
     88   CHK(v4i_w(k) == (int32_t)0x00000000);
     89 
     90   k = v4i_xor(i, j);
     91   CHK(v4i_x(k) == (int32_t)0x01030602);
     92   CHK(v4i_y(k) == (int32_t)0x74655647);
     93   CHK(v4i_z(k) == (int32_t)0x07162534);
     94   CHK(v4i_w(k) == (int32_t)0x0C0D0E0F);
     95 
     96   k = v4i_not(i);
     97   CHK(v4i_x(k) == (int32_t)0xFFFEFDFC);
     98   CHK(v4i_y(k) == (int32_t)0xFBFAF9F8);
     99   CHK(v4i_z(k) == (int32_t)0xF7F6F5F4);
    100   CHK(v4i_w(k) == (int32_t)0xF3F2F1F0);
    101 
    102   i = v4i_set(32, 16, 8, 4);
    103   k = v4i_rshift(i, 4);
    104   CHK(v4i_x(k) == 2);
    105   CHK(v4i_y(k) == 1);
    106   CHK(v4i_z(k) == 0);
    107   CHK(v4i_w(k) == 0);
    108 
    109   k = v4i_rshift(i, 1);
    110   CHK(v4i_x(k) == 16);
    111   CHK(v4i_y(k) == 8);
    112   CHK(v4i_z(k) == 4);
    113   CHK(v4i_w(k) == 2);
    114 
    115   k = v4i_lshift(i, 4);
    116   CHK(v4i_x(k) == 512);
    117   CHK(v4i_y(k) == 256);
    118   CHK(v4i_z(k) == 128);
    119   CHK(v4i_w(k) == 64);
    120 
    121   i = v4i_set(1, 2, 3, 4);
    122   j = v4i_set(-2, -4, 3, 6);
    123   k = v4i_add(i, j);
    124   CHK(v4i_x(k) == -1);
    125   CHK(v4i_y(k) == -2);
    126   CHK(v4i_z(k) == 6);
    127   CHK(v4i_w(k) == 10);
    128 
    129   k = v4i_sub(i, j);
    130   CHK(v4i_x(k) == 3);
    131   CHK(v4i_y(k) == 6);
    132   CHK(v4i_z(k) == 0);
    133   CHK(v4i_w(k) == -2);
    134 
    135   k = v4i_minus(j);
    136   CHK(v4i_x(k) == -v4i_x(j));
    137   CHK(v4i_y(k) == -v4i_y(j));
    138   CHK(v4i_z(k) == -v4i_z(j));
    139   CHK(v4i_w(k) == -v4i_w(j));
    140 
    141   k = v4i_eq(i, j);
    142   CHK(v4i_x(k) == (int32_t)0x00000000);
    143   CHK(v4i_y(k) == (int32_t)0x00000000);
    144   CHK(v4i_z(k) == (int32_t)0xFFFFFFFF);
    145   CHK(v4i_w(k) == (int32_t)0x00000000);
    146 
    147   k = v4i_neq(i, j);
    148   CHK(v4i_x(k) == (int32_t)0xFFFFFFFF);
    149   CHK(v4i_y(k) == (int32_t)0xFFFFFFFF);
    150   CHK(v4i_z(k) == (int32_t)0x00000000);
    151   CHK(v4i_w(k) == (int32_t)0xFFFFFFFF);
    152 
    153   k = v4i_gt(i, j);
    154   CHK(v4i_x(k) == (int32_t)0xFFFFFFFF);
    155   CHK(v4i_y(k) == (int32_t)0xFFFFFFFF);
    156   CHK(v4i_z(k) == (int32_t)0x00000000);
    157   CHK(v4i_w(k) == (int32_t)0x00000000);
    158 
    159   k = v4i_lt(i, j);
    160   CHK(v4i_x(k) == (int32_t)0x00000000);
    161   CHK(v4i_y(k) == (int32_t)0x00000000);
    162   CHK(v4i_z(k) == (int32_t)0x00000000);
    163   CHK(v4i_w(k) == (int32_t)0xFFFFFFFF);
    164 
    165   k = v4i_ge(i, j);
    166   CHK(v4i_x(k) == (int32_t)0xFFFFFFFF);
    167   CHK(v4i_y(k) == (int32_t)0xFFFFFFFF);
    168   CHK(v4i_z(k) == (int32_t)0xFFFFFFFF);
    169   CHK(v4i_w(k) == (int32_t)0x00000000);
    170 
    171   k = v4i_le(i, j);
    172   CHK(v4i_x(k) == (int32_t)0x00000000);
    173   CHK(v4i_y(k) == (int32_t)0x00000000);
    174   CHK(v4i_z(k) == (int32_t)0xFFFFFFFF);
    175   CHK(v4i_w(k) == (int32_t)0xFFFFFFFF);
    176 
    177   k = v4i_sel(i, j, v4i_set(~0, 0, ~0, 0));
    178   CHK(v4i_x(k) == -2);
    179   CHK(v4i_y(k) == 2);
    180   CHK(v4i_z(k) == 3);
    181   CHK(v4i_w(k) == 4);
    182 
    183   k = v4i_xxxx(i);
    184   CHK(v4i_x(k) == 1);
    185   CHK(v4i_y(k) == 1);
    186   CHK(v4i_z(k) == 1);
    187   CHK(v4i_w(k) == 1);
    188 
    189   k = v4i_wwxy(i);
    190   CHK(v4i_x(k) == 4);
    191   CHK(v4i_y(k) == 4);
    192   CHK(v4i_z(k) == 1);
    193   CHK(v4i_w(k) == 2);
    194 
    195   k = v4i_xyxy(i);
    196   CHK(v4i_x(k) == 1);
    197   CHK(v4i_y(k) == 2);
    198   CHK(v4i_z(k) == 1);
    199   CHK(v4i_w(k) == 2);
    200 
    201   k = v4i_wyyz(i);
    202   CHK(v4i_x(k) == 4);
    203   CHK(v4i_y(k) == 2);
    204   CHK(v4i_z(k) == 2);
    205   CHK(v4i_w(k) == 3);
    206 
    207   i = v4i_set(1, 2, 3, 4);
    208   j = v4i_set(-2, -4, 3, 6);
    209   k = v4i_min(i, j);
    210   CHK(v4i_x(k) == -2);
    211   CHK(v4i_y(k) == -4);
    212   CHK(v4i_z(k) == 3);
    213   CHK(v4i_w(k) == 4);
    214 
    215   k = v4i_max(i, j);
    216   CHK(v4i_x(k) == 1);
    217   CHK(v4i_y(k) == 2);
    218   CHK(v4i_z(k) == 3);
    219   CHK(v4i_w(k) == 6);
    220 
    221   k = v4i_reduce_min(i);
    222   CHK(v4i_x(k) == 1);
    223   CHK(v4i_y(k) == 1);
    224   CHK(v4i_z(k) == 1);
    225   CHK(v4i_w(k) == 1);
    226   CHK(v4i_reduce_min_i32(i) == 1);
    227 
    228   k = v4i_reduce_min(j);
    229   CHK(v4i_x(k) == -4);
    230   CHK(v4i_y(k) == -4);
    231   CHK(v4i_z(k) == -4);
    232   CHK(v4i_w(k) == -4);
    233   CHK(v4i_reduce_min_i32(j) == -4);
    234 
    235   k = v4i_reduce_max(i);
    236   CHK(v4i_x(k) == 4);
    237   CHK(v4i_y(k) == 4);
    238   CHK(v4i_z(k) == 4);
    239   CHK(v4i_w(k) == 4);
    240   CHK(v4i_reduce_max_i32(i) == 4);
    241 
    242   k = v4i_reduce_max(j);
    243   CHK(v4i_x(k) == 6);
    244   CHK(v4i_y(k) == 6);
    245   CHK(v4i_z(k) == 6);
    246   CHK(v4i_w(k) == 6);
    247   CHK(v4i_reduce_max_i32(j) == 6);
    248 
    249   return 0;
    250 }
    251