rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

aosf33.h (8252B)


      1 /* Copyright (C) 2014-2019, 2021, 2023, 2025 Vincent Forest (vaplv@free.fr)
      2  *
      3  * The RSIMD library is free software: you can redistribute it and/or modify
      4  * it under the terms of the GNU General Public License as published
      5  * by the Free Software Foundation, either version 3 of the License, or
      6  * (at your option) any later version.
      7  *
      8  * The RSIMD library is distributed in the hope that it will be useful,
      9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     11  * GNU General Public License for more details.
     12  *
     13  * You should have received a copy of the GNU General Public License
     14  * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
     15 
     16 #ifndef AOSF33_H
     17 #define AOSF33_H
     18 
     19 #include "rsimd.h"
     20 #include <math.h>
     21 
     22 /*
     23  * Functions on column major AoS float33 matrices. A 3x3 matrix is a set of 3
     24  * 4-wide SIMD float vectors, each representing a matrix column. Actually the
     25  * fourth component of each vector is ignored and its value is thus undefined.
     26  */
     27 
     28 /*******************************************************************************
     29  * Set operations
     30  ******************************************************************************/
     31 static FINLINE float*
     32 aosf33_store(float res[9]/* Column major */, const v4f_T m[3])
     33 {
     34   ALIGN(16) float tmp[4];
     35   int i;
     36   ASSERT(res && m);
     37   FOR_EACH(i, 0, 3) {
     38     v4f_store(tmp, m[i]);
     39     res[i*3 + 0] = tmp[0];
     40     res[i*3 + 1] = tmp[1];
     41     res[i*3 + 2] = tmp[2];
     42   }
     43   return res;
     44 }
     45 
     46 static FINLINE v4f_T*
     47 aosf33_load(v4f_T res[3], const float m[9]/* Column major */)
     48 {
     49   int i;
     50   ASSERT(res && m);
     51   FOR_EACH(i, 0, 3)
     52     res[i] = v4f_set(m[i*3+0], m[i*3+1], m[i*3+2], 0.f);
     53   return res;
     54 }
     55 
     56 static FINLINE v4f_T*
     57 aosf33_set(v4f_T m[3], const v4f_T c0, const v4f_T c1, const v4f_T c2)
     58 {
     59   ASSERT(m);
     60   m[0] = c0;
     61   m[1] = c1;
     62   m[2] = c2;
     63   return m;
     64 }
     65 
     66 static FINLINE v4f_T*
     67 aosf33_identity(v4f_T m[3])
     68 {
     69   ASSERT(m);
     70   m[0] = v4f_set(1.f, 0.f, 0.f, 0.f);
     71   m[1] = v4f_set(0.f, 1.f, 0.f, 0.f);
     72   m[2] = v4f_set(0.f, 0.f, 1.f, 0.f);
     73   return m;
     74 }
     75 
     76 static FINLINE v4f_T*
     77 aosf33_zero(v4f_T m[3])
     78 {
     79   ASSERT(m);
     80   m[0] = v4f_zero();
     81   m[1] = v4f_zero();
     82   m[2] = v4f_zero();
     83   return m;
     84 }
     85 
     86 static FINLINE v4f_T*
     87 aosf33_set_row0(v4f_T m[3], const v4f_T v)
     88 {
     89   ASSERT(m);
     90   m[0] = v4f_ayzw(m[0], v);
     91   m[1] = v4f_ayzw(m[1], v4f_yyww(v));
     92   m[2] = v4f_ayzw(m[2], v4f_zwzw(v));
     93   return m;
     94 }
     95 
     96 static FINLINE v4f_T*
     97 aosf33_set_row1(v4f_T m[3], const v4f_T v)
     98 {
     99   ASSERT(m);
    100   m[0] = v4f_xbzw(m[0], v4f_xxyy(v));
    101   m[1] = v4f_xbzw(m[1], v);
    102   m[2] = v4f_xbzw(m[2], v4f_zzww(v));
    103   return m;
    104 }
    105 
    106 static FINLINE v4f_T*
    107 aosf33_set_row2(v4f_T m[3], const v4f_T v)
    108 {
    109   ASSERT(m);
    110   m[0] = v4f_xyab(m[0], v4f_xyxy(v));
    111   m[1] = v4f_xyab(m[1], v4f_yyzz(v));
    112   m[2] = v4f_xyab(m[2], v4f_zzww(v));
    113   return m;
    114 }
    115 
    116 static FINLINE v4f_T*
    117 aosf33_set_row(v4f_T m[3], const v4f_T v, const int id)
    118 {
    119   const v4f_T mask = v4f_mask(-(id==0), -(id==1), -(id==2), 0);
    120   ASSERT(m && id >= 0 && id <= 2);
    121   m[0] = v4f_sel(m[0], v4f_xxxx(v), mask);
    122   m[1] = v4f_sel(m[1], v4f_yyyy(v), mask);
    123   m[2] = v4f_sel(m[2], v4f_zzzz(v), mask);
    124   return m;
    125 }
    126 
    127 static FINLINE v4f_T*
    128 aosf33_set_col(v4f_T m[3], const v4f_T v, const int id)
    129 {
    130   ASSERT(m && id >= 0 && id <= 2);
    131   m[id] = v;
    132   return m;
    133 }
    134 
    135 /*******************************************************************************
    136  * Arithmetic operations
    137  ******************************************************************************/
    138 static FINLINE v4f_T*
    139 aosf33_add(v4f_T res[3], const v4f_T m0[3], const v4f_T m1[3])
    140 {
    141   ASSERT(res && m0 && m1);
    142   res[0] = v4f_add(m0[0], m1[0]);
    143   res[1] = v4f_add(m0[1], m1[1]);
    144   res[2] = v4f_add(m0[2], m1[2]);
    145   return res;
    146 }
    147 
    148 static FINLINE v4f_T*
    149 aosf33_sub(v4f_T res[3], const v4f_T m0[3], const v4f_T m1[3])
    150 {
    151   ASSERT(res && m0 && m1);
    152   res[0] = v4f_sub(m0[0], m1[0]);
    153   res[1] = v4f_sub(m0[1], m1[1]);
    154   res[2] = v4f_sub(m0[2], m1[2]);
    155   return res;
    156 }
    157 
    158 static FINLINE v4f_T*
    159 aosf33_minus(v4f_T res[3], const v4f_T m[3])
    160 {
    161   ASSERT(res && m);
    162   res[0] = v4f_minus(m[0]);
    163   res[1] = v4f_minus(m[1]);
    164   res[2] = v4f_minus(m[2]);
    165   return res;
    166 }
    167 
    168 static FINLINE v4f_T*
    169 aosf33_abs(v4f_T res[3], const v4f_T m[3])
    170 {
    171   ASSERT(res && m);
    172   res[0] = v4f_abs(m[0]);
    173   res[1] = v4f_abs(m[1]);
    174   res[2] = v4f_abs(m[2]);
    175   return res;
    176 }
    177 
    178 static FINLINE v4f_T*
    179 aosf33_mul(v4f_T res[3], const v4f_T m[3], const v4f_T v)
    180 {
    181   ASSERT(res && m);
    182   res[0] = v4f_mul(m[0], v);
    183   res[1] = v4f_mul(m[1], v);
    184   res[2] = v4f_mul(m[2], v);
    185   return res;
    186 }
    187 
    188 static FINLINE v4f_T
    189 aosf33_mulf3(const v4f_T m[3], const v4f_T v)
    190 {
    191   v4f_T r0, r1;
    192   ASSERT(m);
    193   r0 = v4f_mul(m[0], v4f_xxxx(v));
    194   r1 = v4f_madd(m[1], v4f_yyyy(v), r0);
    195   return v4f_madd(m[2], v4f_zzzz(v), r1);
    196 }
    197 
    198 static FINLINE v4f_T
    199 aosf3_mulf33(const v4f_T v, const v4f_T m[3])
    200 {
    201   v4f_T xxxx, yyyy, zzzz, yyzz;
    202   ASSERT(m);
    203   xxxx = v4f_dot3(v, m[0]);
    204   yyyy = v4f_dot3(v, m[1]);
    205   zzzz = v4f_dot3(v, m[2]);
    206   yyzz = v4f_xyab(yyyy, zzzz);
    207   return v4f_ayzw(yyzz, xxxx);
    208 }
    209 
    210 static FINLINE v4f_T*
    211 aosf33_mulf33(v4f_T res[3], const v4f_T a[3], const v4f_T b[3])
    212 {
    213   v4f_T c0, c1, c2;
    214   ASSERT(res && a && b);
    215   c0 = aosf33_mulf3(a, b[0]);
    216   c1 = aosf33_mulf3(a, b[1]);
    217   c2 = aosf33_mulf3(a, b[2]);
    218   res[0] = c0;
    219   res[1] = c1;
    220   res[2] = c2;
    221   return res;
    222 }
    223 
    224 static FINLINE v4f_T*
    225 aosf33_transpose(v4f_T res[3], const v4f_T m[3])
    226 {
    227   v4f_T c0, c1, c2;
    228   v4f_T x0x2y0y2, z0z2w0w2, z1z1y1y1;
    229   ASSERT(res && m);
    230   c0 = m[0];
    231   c1 = m[1];
    232   c2 = m[2];
    233   x0x2y0y2 = v4f_xayb(c0, c2);
    234   z0z2w0w2 = v4f_zcwd(c0, c2);
    235   z1z1y1y1 = v4f_zzyy(c1);
    236   res[0] = v4f_xayb(x0x2y0y2, c1);
    237   res[1] = v4f_zcwd(x0x2y0y2, z1z1y1y1);
    238   res[2] = v4f_xayb(z0z2w0w2, z1z1y1y1);
    239   return res;
    240 }
    241 
    242 static FINLINE v4f_T
    243 aosf33_det(const v4f_T m[3])
    244 {
    245   ASSERT(m);
    246   return v4f_dot3(m[2], v4f_cross3(m[0], m[1]));
    247 }
    248 
    249 static FINLINE v4f_T /* Return the determinant */
    250 aosf33_invtrans(v4f_T res[3], const v4f_T m[3])
    251 {
    252   v4f_T t[3], det, invdet;
    253   ASSERT(res && m);
    254   t[0] = v4f_cross3(m[1], m[2]);
    255   t[1] = v4f_cross3(m[2], m[0]);
    256   t[2] = v4f_cross3(m[0], m[1]);
    257   det = v4f_dot3(t[2], m[2]);
    258   invdet = v4f_rcp(det);
    259   aosf33_mul(res, t, invdet);
    260   return det;
    261 }
    262 
    263 static FINLINE v4f_T
    264 aosf33_inverse(v4f_T res[3], const v4f_T m[3])
    265 {
    266   v4f_T det;
    267   ASSERT(res && m);
    268   det = aosf33_invtrans(res, m);
    269   aosf33_transpose(res, res);
    270   return det;
    271 }
    272 
    273 /*******************************************************************************
    274  * Get operations
    275  ******************************************************************************/
    276 static FINLINE v4f_T
    277 aosf33_row0(const v4f_T m[3])
    278 {
    279   ASSERT(m);
    280   return v4f_ayzw(v4f_xyab(v4f_xxzz(m[1]), v4f_xxzz(m[2])), m[0]);
    281 }
    282 
    283 static FINLINE v4f_T
    284 aosf33_row1(const v4f_T m[3])
    285 {
    286   ASSERT(m);
    287   return v4f_ayzw(v4f_xyab(v4f_yyww(m[1]), v4f_yyww(m[2])), v4f_yyww(m[0]));
    288 }
    289 
    290 static FINLINE v4f_T
    291 aosf33_row2(const v4f_T m[3])
    292 {
    293   ASSERT(m);
    294   return v4f_ayzw(v4f_xyab(v4f_zzww(m[1]), v4f_zzww(m[2])), v4f_zzww(m[0]));
    295 }
    296 
    297 static FINLINE v4f_T
    298 aosf33_row(const v4f_T m[3], int id)
    299 {
    300   v4f_T t[3];
    301   ASSERT(m && id >= 0 && id <= 2);
    302   aosf33_transpose(t, m);
    303   return t[id];
    304 }
    305 
    306 static FINLINE v4f_T
    307 aosf33_col(const v4f_T m[3], int id)
    308 {
    309   ASSERT(m && id >= 0 && id <= 2);
    310   return m[id];
    311 }
    312 
    313 /*******************************************************************************
    314  * Build functions
    315  ******************************************************************************/
    316 static FINLINE v4f_T* /* XYZ norm */
    317 aosf33_rotation(v4f_T res[3], float pitch, float yaw, float roll)
    318 {
    319   float c1, c2, c3, s1, s2, s3;
    320   ASSERT(res);
    321   c1 = (float)cos(pitch);
    322   c2 = (float)cos(yaw);
    323   c3 = (float)cos(roll);
    324   s1 = (float)sin(pitch);
    325   s2 = (float)sin(yaw);
    326   s3 = (float)sin(roll);
    327   res[0] = v4f_set(c2*c3, c1*s3 + c3*s1*s2, s1*s3 - c1*c3*s2, 0.f);
    328   res[1] = v4f_set(-c2*s3, c1*c3 - s1*s2*s3, c1*s2*s3 + c3*s1, 0.f);
    329   res[2] = v4f_set(s2, -c2*s1, c1*c2, 0.f);
    330   return res;
    331 }
    332 
    333 static FINLINE v4f_T* /* rotation around the Y axis */
    334 aosf33_yaw_rotation(v4f_T res[3], float yaw)
    335 {
    336   float c, s;
    337   ASSERT(res);
    338   c = (float)cos(yaw);
    339   s = (float)sin(yaw);
    340   res[0] = v4f_set(c, 0.f, -s, 0.f);
    341   res[1] = v4f_set(0.f, 1.f, 0.f, 0.f);
    342   res[2] = v4f_set(s, 0.f, c, 0.f);
    343   return res;
    344 }
    345 
    346 #endif /* AOSF33_H */
    347