rsimd

Make SIMD instruction sets easier to use
git clone git://git.meso-star.fr/rsimd.git
Log | Files | Refs | README | LICENSE

aosf44.c (4065B)


      1 /* Copyright (C) 2014-2019, 2021, 2023, 2025 Vincent Forest (vaplv@free.fr)
      2  *
      3  * The RSIMD library is free software: you can redistribute it and/or modify
      4  * it under the terms of the GNU General Public License as published
      5  * by the Free Software Foundation, either version 3 of the License, or
      6  * (at your option) any later version.
      7  *
      8  * The RSIMD library is distributed in the hope that it will be useful,
      9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     11  * GNU General Public License for more details.
     12  *
     13  * You should have received a copy of the GNU General Public License
     14  * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */
     15 
     16 #include "aosf44.h"
     17 
     18 v4f_T
     19 aosf44_inverse(v4f_T res[4], const v4f_T m[4])
     20 {
     21   v4f_T c0, c1, c2, c3, r3;
     22   v4f_T f33_023_c0, f33_023_c1, f33_023_c2, f33_023_c3;
     23   v4f_T f33_123_c0, f33_123_c1, f33_123_c2, f33_123_c3;
     24   v4f_T f33_013_c0, f33_013_c1, f33_013_c2, f33_013_c3;
     25   v4f_T f33_012_012[3], f33_012_013[3], f33_012_023[3], f33_012_123[3];
     26   v4f_T f33_023_012[3], f33_023_013[3], f33_023_023[3], f33_023_123[3];
     27   v4f_T f33_123_012[3], f33_123_013[3], f33_123_023[3], f33_123_123[3];
     28   v4f_T f33_013_012[3], f33_013_013[3], f33_013_023[3], f33_013_123[3];
     29   v4f_T det_012, det_023, det_123, det_013;
     30   v4f_T cofacts, det, idet, mpmp_idet, pmpm_idet;
     31   ASSERT(res && m);
     32 
     33   /* Retrieve the columns 0, 1, 2 and 3 and the row 3 of the "m" matrix. */
     34   c0 = m[0];
     35   c1 = m[1];
     36   c2 = m[2];
     37   c3 = m[3];
     38   r3 = aosf44_row3(m);
     39 
     40   /* Define the 3x3 sub-matrix and compute their determinant */
     41   aosf33_set(f33_012_012, c0, c1, c2);
     42   aosf33_set(f33_012_013, c0, c1, c3);
     43   aosf33_set(f33_012_023, c0, c2, c3);
     44   aosf33_set(f33_012_123, c1, c2, c3);
     45   det_012 = v4f_048C
     46     (aosf33_det(f33_012_123),
     47      aosf33_det(f33_012_023),
     48      aosf33_det(f33_012_013),
     49      aosf33_det(f33_012_012));
     50 
     51   f33_023_c0 = v4f_xzww(c0);
     52   f33_023_c1 = v4f_xzww(c1);
     53   f33_023_c2 = v4f_xzww(c2);
     54   f33_023_c3 = v4f_xzww(c3);
     55   aosf33_set(f33_023_012, f33_023_c0, f33_023_c1, f33_023_c2);
     56   aosf33_set(f33_023_013, f33_023_c0, f33_023_c1, f33_023_c3);
     57   aosf33_set(f33_023_023, f33_023_c0, f33_023_c2, f33_023_c3);
     58   aosf33_set(f33_023_123, f33_023_c1, f33_023_c2, f33_023_c3);
     59   det_023 = v4f_048C
     60     (aosf33_det(f33_023_123),
     61      aosf33_det(f33_023_023),
     62      aosf33_det(f33_023_013),
     63      aosf33_det(f33_023_012));
     64 
     65   f33_123_c0 = v4f_yzww(c0);
     66   f33_123_c1 = v4f_yzww(c1);
     67   f33_123_c2 = v4f_yzww(c2);
     68   f33_123_c3 = v4f_yzww(c3);
     69   aosf33_set(f33_123_012, f33_123_c0, f33_123_c1, f33_123_c2);
     70   aosf33_set(f33_123_013, f33_123_c0, f33_123_c1, f33_123_c3);
     71   aosf33_set(f33_123_023, f33_123_c0, f33_123_c2, f33_123_c3);
     72   aosf33_set(f33_123_123, f33_123_c1, f33_123_c2, f33_123_c3);
     73   det_123 = v4f_048C
     74     (aosf33_det(f33_123_123),
     75      aosf33_det(f33_123_023),
     76      aosf33_det(f33_123_013),
     77      aosf33_det(f33_123_012));
     78 
     79   f33_013_c0 = v4f_xyww(c0);
     80   f33_013_c1 = v4f_xyww(c1);
     81   f33_013_c2 = v4f_xyww(c2);
     82   f33_013_c3 = v4f_xyww(c3);
     83   aosf33_set(f33_013_012, f33_013_c0, f33_013_c1, f33_013_c2);
     84   aosf33_set(f33_013_013, f33_013_c0, f33_013_c1, f33_013_c3);
     85   aosf33_set(f33_013_023, f33_013_c0, f33_013_c2, f33_013_c3);
     86   aosf33_set(f33_013_123, f33_013_c1, f33_013_c2, f33_013_c3);
     87   det_013 = v4f_048C
     88     (aosf33_det(f33_013_123),
     89      aosf33_det(f33_013_023),
     90      aosf33_det(f33_013_013),
     91      aosf33_det(f33_013_012));
     92 
     93   /* Compute the cofactors of the column 3 */
     94   cofacts = v4f_mul(det_012, v4f_set(-1.f, 1.f, -1.f, 1.f));
     95 
     96   /* Compute the determinant of the "m" matrix */
     97   det = v4f_dot(cofacts, r3);
     98 
     99   /* Invert the matrix */
    100   idet = v4f_rcp(det);
    101   mpmp_idet = v4f_xor
    102     (idet, v4f_mask((int32_t)0x80000000, 0, (int32_t)0x80000000, 0));
    103   pmpm_idet = v4f_xor
    104     (idet, v4f_mask(0, (int32_t)0x80000000, 0, (int32_t)0x80000000));
    105   res[0] = v4f_mul(det_123, pmpm_idet);
    106   res[1] = v4f_mul(det_023, mpmp_idet);
    107   res[2] = v4f_mul(det_013, pmpm_idet);
    108   res[3] = v4f_mul(det_012, mpmp_idet);
    109 
    110   return det;
    111 }
    112