aosf44.c (4065B)
1 /* Copyright (C) 2014-2019, 2021, 2023, 2025 Vincent Forest (vaplv@free.fr) 2 * 3 * The RSIMD library is free software: you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published 5 * by the Free Software Foundation, either version 3 of the License, or 6 * (at your option) any later version. 7 * 8 * The RSIMD library is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ 15 16 #include "aosf44.h" 17 18 v4f_T 19 aosf44_inverse(v4f_T res[4], const v4f_T m[4]) 20 { 21 v4f_T c0, c1, c2, c3, r3; 22 v4f_T f33_023_c0, f33_023_c1, f33_023_c2, f33_023_c3; 23 v4f_T f33_123_c0, f33_123_c1, f33_123_c2, f33_123_c3; 24 v4f_T f33_013_c0, f33_013_c1, f33_013_c2, f33_013_c3; 25 v4f_T f33_012_012[3], f33_012_013[3], f33_012_023[3], f33_012_123[3]; 26 v4f_T f33_023_012[3], f33_023_013[3], f33_023_023[3], f33_023_123[3]; 27 v4f_T f33_123_012[3], f33_123_013[3], f33_123_023[3], f33_123_123[3]; 28 v4f_T f33_013_012[3], f33_013_013[3], f33_013_023[3], f33_013_123[3]; 29 v4f_T det_012, det_023, det_123, det_013; 30 v4f_T cofacts, det, idet, mpmp_idet, pmpm_idet; 31 ASSERT(res && m); 32 33 /* Retrieve the columns 0, 1, 2 and 3 and the row 3 of the "m" matrix. */ 34 c0 = m[0]; 35 c1 = m[1]; 36 c2 = m[2]; 37 c3 = m[3]; 38 r3 = aosf44_row3(m); 39 40 /* Define the 3x3 sub-matrix and compute their determinant */ 41 aosf33_set(f33_012_012, c0, c1, c2); 42 aosf33_set(f33_012_013, c0, c1, c3); 43 aosf33_set(f33_012_023, c0, c2, c3); 44 aosf33_set(f33_012_123, c1, c2, c3); 45 det_012 = v4f_048C 46 (aosf33_det(f33_012_123), 47 aosf33_det(f33_012_023), 48 aosf33_det(f33_012_013), 49 aosf33_det(f33_012_012)); 50 51 f33_023_c0 = v4f_xzww(c0); 52 f33_023_c1 = v4f_xzww(c1); 53 f33_023_c2 = v4f_xzww(c2); 54 f33_023_c3 = v4f_xzww(c3); 55 aosf33_set(f33_023_012, f33_023_c0, f33_023_c1, f33_023_c2); 56 aosf33_set(f33_023_013, f33_023_c0, f33_023_c1, f33_023_c3); 57 aosf33_set(f33_023_023, f33_023_c0, f33_023_c2, f33_023_c3); 58 aosf33_set(f33_023_123, f33_023_c1, f33_023_c2, f33_023_c3); 59 det_023 = v4f_048C 60 (aosf33_det(f33_023_123), 61 aosf33_det(f33_023_023), 62 aosf33_det(f33_023_013), 63 aosf33_det(f33_023_012)); 64 65 f33_123_c0 = v4f_yzww(c0); 66 f33_123_c1 = v4f_yzww(c1); 67 f33_123_c2 = v4f_yzww(c2); 68 f33_123_c3 = v4f_yzww(c3); 69 aosf33_set(f33_123_012, f33_123_c0, f33_123_c1, f33_123_c2); 70 aosf33_set(f33_123_013, f33_123_c0, f33_123_c1, f33_123_c3); 71 aosf33_set(f33_123_023, f33_123_c0, f33_123_c2, f33_123_c3); 72 aosf33_set(f33_123_123, f33_123_c1, f33_123_c2, f33_123_c3); 73 det_123 = v4f_048C 74 (aosf33_det(f33_123_123), 75 aosf33_det(f33_123_023), 76 aosf33_det(f33_123_013), 77 aosf33_det(f33_123_012)); 78 79 f33_013_c0 = v4f_xyww(c0); 80 f33_013_c1 = v4f_xyww(c1); 81 f33_013_c2 = v4f_xyww(c2); 82 f33_013_c3 = v4f_xyww(c3); 83 aosf33_set(f33_013_012, f33_013_c0, f33_013_c1, f33_013_c2); 84 aosf33_set(f33_013_013, f33_013_c0, f33_013_c1, f33_013_c3); 85 aosf33_set(f33_013_023, f33_013_c0, f33_013_c2, f33_013_c3); 86 aosf33_set(f33_013_123, f33_013_c1, f33_013_c2, f33_013_c3); 87 det_013 = v4f_048C 88 (aosf33_det(f33_013_123), 89 aosf33_det(f33_013_023), 90 aosf33_det(f33_013_013), 91 aosf33_det(f33_013_012)); 92 93 /* Compute the cofactors of the column 3 */ 94 cofacts = v4f_mul(det_012, v4f_set(-1.f, 1.f, -1.f, 1.f)); 95 96 /* Compute the determinant of the "m" matrix */ 97 det = v4f_dot(cofacts, r3); 98 99 /* Invert the matrix */ 100 idet = v4f_rcp(det); 101 mpmp_idet = v4f_xor 102 (idet, v4f_mask((int32_t)0x80000000, 0, (int32_t)0x80000000, 0)); 103 pmpm_idet = v4f_xor 104 (idet, v4f_mask(0, (int32_t)0x80000000, 0, (int32_t)0x80000000)); 105 res[0] = v4f_mul(det_123, pmpm_idet); 106 res[1] = v4f_mul(det_023, mpmp_idet); 107 res[2] = v4f_mul(det_013, pmpm_idet); 108 res[3] = v4f_mul(det_012, mpmp_idet); 109 110 return det; 111 } 112