rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

soaXfY.h (9318B)


      1 /* Copyright (C) 2014-2019, 2021, 2023, 2025 Vincent Forest (vaplv@free.fr)
      2  *
      3  * The RSIMD library is free software: you can redistribute it and/or modify
      4  * it under the terms of the GNU General Public License as published
      5  * by the Free Software Foundation, either version 3 of the License, or
      6  * (at your option) any later version.
      7  *
      8  * The RSIMD library is distributed in the hope that it will be useful,
      9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     11  * GNU General Public License for more details.
     12  *
     13  * You should have received a copy of the GNU General Public License
     14  * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
     15 
     16 /*
     17  * Header used to generate funcs on SoA SIMD float vectors of Y dimensions
     18  */
     19 
     20 #ifndef SOAXFY_BEGIN_H
     21   #error "The soaXfY_begin.h header must be included first"
     22 #endif
     23 
     24 /* Force GCC to unroll the loops */
     25 #ifdef COMPILER_GCC
     26   #pragma GCC push_options
     27   #pragma GCC optimize("unroll-loops")
     28 #endif
     29 
     30 #if RSIMD_SOA_DIMENSION__ <= 4
     31 static FINLINE RSIMD_vXf_T__*
     32 RSIMD_soaXfY_PREFIX__
     33   (RSIMD_vXf_T__* dst
     34   ,const RSIMD_vXf_T__ x
     35   ,const RSIMD_vXf_T__ y
     36 #if RSIMD_SOA_DIMENSION__ > 2
     37   ,const RSIMD_vXf_T__ z
     38 #endif
     39 #if RSIMD_SOA_DIMENSION__ > 3
     40   ,const RSIMD_vXf_T__ w
     41 #endif
     42   )
     43 {
     44   ASSERT(dst);
     45   dst[0] = x;
     46   dst[1] = y;
     47 #if RSIMD_SOA_DIMENSION__ > 2
     48   dst[2] = z;
     49 #endif
     50 #if RSIMD_SOA_DIMENSION__ > 3
     51   dst[3] = w;
     52 #endif
     53   return dst;
     54 }
     55 #endif
     56 
     57 static FINLINE RSIMD_vXf_T__*
     58 RSIMD_soaXfY__(splat)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__ val)
     59 {
     60   int i;
     61   ASSERT(dst);
     62   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
     63     dst[i] = val;
     64   return dst;
     65 }
     66 
     67 static FINLINE RSIMD_vXf_T__*
     68 RSIMD_soaXfY__(set__)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* src)
     69 {
     70   int i;
     71   ASSERT(dst && src);
     72   ASSERT(!MEM_AREA_OVERLAP(dst, SIZEOF_RSIMD_soaXfY__, src, SIZEOF_RSIMD_soaXfY__));
     73   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
     74     dst[i] = src[i];
     75   return dst;
     76 }
     77 
     78 static FINLINE RSIMD_vXf_T__*
     79 RSIMD_soaXfY__(set)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* src)
     80 {
     81   ASSERT(dst && src);
     82   if(!MEM_AREA_OVERLAP(dst, SIZEOF_RSIMD_soaXfY__, src, SIZEOF_RSIMD_soaXfY__)) {
     83     return RSIMD_soaXfY__(set__)(dst, src);
     84   } else {
     85     RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
     86     return RSIMD_soaXfY__(set__)(dst, RSIMD_soaXfY__(set__)(tmp, src));
     87   }
     88 }
     89 
     90 static FINLINE RSIMD_vXf_T__
     91 RSIMD_soaXfY__(dot)(const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
     92 {
     93   RSIMD_vXf_T__ dot;
     94   int i;
     95   ASSERT(a && b);
     96   dot = RSIMD_vXf__(mul)(a[0], b[0]);
     97   FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__) {
     98     dot = RSIMD_vXf__(madd)(a[i], b[i], dot);
     99   }
    100   return dot;
    101 }
    102 
    103 static FINLINE RSIMD_vXf_T__
    104 RSIMD_soaXfY__(len)(const RSIMD_vXf_T__* a)
    105 {
    106   ASSERT(a);
    107   return RSIMD_vXf__(sqrt)(RSIMD_soaXfY__(dot)(a, a));
    108 }
    109 
    110 static FINLINE RSIMD_vXf_T__
    111 RSIMD_soaXfY__(normalize)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a)
    112 {
    113   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    114   RSIMD_vXf_T__ sqr_len, rcp_len;
    115   RSIMD_vXf_T__ mask;
    116   int i;
    117   ASSERT(dst && a);
    118 
    119   sqr_len = RSIMD_soaXfY__(dot)(a, a);
    120   mask = RSIMD_vXf__(neq)(sqr_len, RSIMD_vXf__(zero)());
    121   rcp_len = RSIMD_vXf__(rsqrt)(sqr_len);
    122   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    123     tmp[i] = RSIMD_vXf__(and)(mask, RSIMD_vXf__(mul)(a[i], rcp_len));
    124   RSIMD_soaXfY__(set__)(dst, tmp);
    125   return RSIMD_vXf__(mul)(sqr_len, rcp_len);
    126 }
    127 
    128 static FINLINE RSIMD_vXf_T__
    129 RSIMD_soaXfY__(is_normalized)(const RSIMD_vXf_T__* a)
    130 {
    131   return RSIMD_vXf__(eq_eps)
    132     (RSIMD_soaXfY__(len)(a),
    133      RSIMD_vXf__(set1)(1.f),
    134      RSIMD_vXf__(set1)(1.e-6f));
    135 }
    136 
    137 static FINLINE RSIMD_vXf_T__*
    138 RSIMD_soaXfY__(add)
    139   (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
    140 {
    141   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    142   int i;
    143   ASSERT(dst && a && b);
    144   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    145     tmp[i] = RSIMD_vXf__(add)(a[i], b[i]);
    146   return RSIMD_soaXfY__(set__)(dst, tmp);
    147 }
    148 
    149 static FINLINE RSIMD_vXf_T__*
    150 RSIMD_soaXfY__(addf)
    151   (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f)
    152 {
    153   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    154   int i;
    155   ASSERT(dst && a);
    156   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    157     tmp[i] = RSIMD_vXf__(add)(a[i], f);
    158   return RSIMD_soaXfY__(set__)(dst, tmp);
    159 }
    160 
    161 static FINLINE RSIMD_vXf_T__*
    162 RSIMD_soaXfY__(sub)
    163   (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
    164 {
    165   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    166   int i;
    167   ASSERT(dst && a && b);
    168   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    169     tmp[i] = RSIMD_vXf__(sub)(a[i], b[i]);
    170   return RSIMD_soaXfY__(set__)(dst, tmp);
    171 }
    172 
    173 static FINLINE RSIMD_vXf_T__*
    174 RSIMD_soaXfY__(subf)
    175   (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f)
    176 {
    177   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    178   int i;
    179   ASSERT(dst && a);
    180   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    181     tmp[i] = RSIMD_vXf__(sub)(a[i], f);
    182   return RSIMD_soaXfY__(set__)(dst, tmp);
    183 }
    184 
    185 static FINLINE RSIMD_vXf_T__*
    186 RSIMD_soaXfY__(mul)
    187   (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
    188 {
    189   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    190   int i;
    191   ASSERT(dst && a && b);
    192   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    193     tmp[i] = RSIMD_vXf__(mul)(a[i], b[i]);
    194   return RSIMD_soaXfY__(set__)(dst, tmp);
    195 }
    196 
    197 static FINLINE RSIMD_vXf_T__*
    198 RSIMD_soaXfY__(mulf)
    199   (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f)
    200 {
    201   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    202   int i;
    203   ASSERT(dst && a);
    204   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    205     tmp[i] = RSIMD_vXf__(mul)(a[i], f);
    206   return RSIMD_soaXfY__(set__)(dst, tmp);
    207 }
    208 
    209 static FINLINE RSIMD_vXf_T__*
    210 RSIMD_soaXfY__(div)
    211   (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
    212 {
    213   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    214   int i;
    215   ASSERT(dst && a && b);
    216   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    217     tmp[i] = RSIMD_vXf__(div)(a[i], b[i]);
    218   return RSIMD_soaXfY__(set__)(dst, tmp);
    219 }
    220 
    221 static FINLINE RSIMD_vXf_T__*
    222 RSIMD_soaXfY__(divf)
    223   (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__ f)
    224 {
    225   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    226   int i;
    227   ASSERT(dst && a);
    228   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    229     tmp[i] = RSIMD_vXf__(div)(a[i], f);
    230   return RSIMD_soaXfY__(set__)(dst, tmp);
    231 }
    232 
    233 static FINLINE RSIMD_vXf_T__*
    234 RSIMD_soaXfY__(minus)(RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a)
    235 {
    236   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    237   int i;
    238   ASSERT(dst && a);
    239   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    240     tmp[i] = RSIMD_vXf__(minus)(a[i]);
    241   return RSIMD_soaXfY__(set__)(dst, tmp);
    242 }
    243 
    244 static FINLINE RSIMD_vXf_T__
    245 RSIMD_soaXfY__(sum)(const RSIMD_vXf_T__* a)
    246 {
    247   RSIMD_vXf_T__ f;
    248   int i = 0;
    249   ASSERT(a);
    250   f = a[i];
    251   FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__)
    252     f = RSIMD_vXf__(add)(f, a[i]);
    253   return f;
    254 }
    255 
    256 static FINLINE RSIMD_vXf_T__*
    257 RSIMD_soaXfY__(lerp)
    258   (RSIMD_vXf_T__* dst,
    259    const RSIMD_vXf_T__* from,
    260    const RSIMD_vXf_T__* to,
    261    const RSIMD_vXf_T__ t)
    262 {
    263   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    264   int i;
    265   ASSERT(dst && from && to);
    266 
    267   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    268     tmp[i] = RSIMD_vXf__(lerp)(from[i], to[i], t);
    269   RSIMD_soaXfY__(set__)(dst, tmp);
    270   return dst;
    271 }
    272 
    273 static FINLINE RSIMD_vXf_T__
    274 RSIMD_soaXfY__(eq)(const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
    275 {
    276   RSIMD_vXf_T__ is_eq;
    277   int i = 0;
    278   ASSERT(a && b);
    279   is_eq = RSIMD_vXf__(eq)(a[0], b[0]);
    280   FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__)
    281     is_eq = RSIMD_vXf__(and)(is_eq, RSIMD_vXf__(eq)(a[i], b[i]));
    282   return is_eq;
    283 }
    284 
    285 static FINLINE RSIMD_vXf_T__
    286 RSIMD_soaXfY__(eq_eps)
    287   (const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b, const RSIMD_vXf_T__ eps)
    288 {
    289   RSIMD_vXf_T__ is_eq;
    290   int i = 0;
    291   ASSERT(a && b);
    292   is_eq = RSIMD_vXf__(eq_eps)(a[0], b[0], eps);
    293   FOR_EACH(i, 1, RSIMD_SOA_DIMENSION__)
    294     is_eq = RSIMD_vXf__(and)(is_eq, RSIMD_vXf__(eq_eps)(a[i], b[i], eps));
    295   return is_eq;
    296 }
    297 
    298 static FINLINE RSIMD_vXf_T__*
    299 RSIMD_soaXfY__(max)
    300   (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
    301 {
    302   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    303   int i;
    304   ASSERT(dst && a && b);
    305   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    306     tmp[i] = RSIMD_vXf__(max)(a[i], b[i]);
    307   return RSIMD_soaXfY__(set__)(dst, tmp);
    308 }
    309 
    310 static FINLINE RSIMD_vXf_T__*
    311 RSIMD_soaXfY__(min)
    312   (RSIMD_vXf_T__* dst, const RSIMD_vXf_T__* a, const RSIMD_vXf_T__* b)
    313 {
    314   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    315   int i;
    316   ASSERT(dst && a && b);
    317   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    318     tmp[i] = RSIMD_vXf__(min)(a[i], b[i]);
    319   return RSIMD_soaXfY__(set__)(dst, tmp);
    320 }
    321 
    322 static FINLINE RSIMD_vXf_T__*
    323 RSIMD_soaXfY__(sel)
    324   (RSIMD_vXf_T__* dst,
    325    const RSIMD_vXf_T__* vfalse,
    326    const RSIMD_vXf_T__* vtrue,
    327    const RSIMD_vXf_T__ cond)
    328 {
    329   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    330   int i;
    331   ASSERT(dst && vfalse && vtrue);
    332   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    333     tmp[i] = RSIMD_vXf__(sel)(vfalse[i], vtrue[i], cond);
    334   return RSIMD_soaXfY__(set__)(dst, tmp);
    335 }
    336 
    337 static FINLINE RSIMD_vXf_T__*
    338 RSIMD_soaXfY__(selv)
    339   (RSIMD_vXf_T__* dst,
    340    const RSIMD_vXf_T__* vfalse,
    341    const RSIMD_vXf_T__* vtrue,
    342    const RSIMD_vXf_T__* vcond)
    343 {
    344   RSIMD_vXf_T__ tmp[RSIMD_SOA_DIMENSION__];
    345   int i;
    346   ASSERT(dst && vfalse && vtrue);
    347   FOR_EACH(i, 0, RSIMD_SOA_DIMENSION__)
    348     tmp[i] = RSIMD_vXf__(sel)(vfalse[i], vtrue[i], vcond[i]);
    349   return RSIMD_soaXfY__(set__)(dst, tmp);
    350 }
    351 
    352 /* Restore compilation parameters */
    353 #ifdef COMPILER_GCC
    354   #pragma GCC pop_options
    355 #endif
    356