aosq.c (2871B)
1 /* Copyright (C) 2014-2019, 2021, 2023, 2025 Vincent Forest (vaplv@free.fr) 2 * 3 * The RSIMD library is free software: you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published 5 * by the Free Software Foundation, either version 3 of the License, or 6 * (at your option) any later version. 7 * 8 * The RSIMD library is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with the RSIMD library. If not, see <http://www.gnu.org/licenses/>. */ 15 16 #include "aosq.h" 17 18 v4f_T 19 aosq_slerp(const v4f_T from, const v4f_T to, const v4f_T vvvv) 20 { 21 v4f_T tmp_cos_omega, cos_omega, omega, rcp_sin_omega; 22 v4f_T one_sub_v; 23 v4f_T mask; 24 v4f_T tmp0, tmp1, tmp2; 25 v4f_T scale0, scale1; 26 float f; 27 28 f = v4f_x(vvvv); 29 if(f == 0.f) 30 return from; 31 else if(f == 1.f) 32 return to; 33 34 tmp_cos_omega = v4f_dot(from, to); 35 36 mask = v4f_lt(tmp_cos_omega, v4f_zero()); 37 tmp0 = v4f_sel(to, v4f_minus(to), mask); 38 cos_omega = v4f_sel(tmp_cos_omega, v4f_minus(tmp_cos_omega), mask); 39 40 omega = v4f_acos(cos_omega); 41 rcp_sin_omega = v4f_rcp(v4f_sin(omega)); 42 one_sub_v = v4f_sub(v4f_set1(1.f), vvvv); 43 tmp1 = v4f_mul(v4f_sin(v4f_mul(one_sub_v, omega)), rcp_sin_omega); 44 tmp2 = v4f_mul(v4f_sin(v4f_mul(omega, vvvv)), rcp_sin_omega); 45 46 mask = v4f_gt(v4f_sub(v4f_set1(1.f), cos_omega), v4f_set1(1.e-6f)); 47 scale0 = v4f_sel(one_sub_v, tmp1, mask); 48 scale1 = v4f_sel(vvvv, tmp2, mask); 49 50 return v4f_madd(from, scale0, v4f_mul(tmp0, scale1)); 51 } 52 53 void 54 aosq_to_aosf33(const v4f_T q, v4f_T out[3]) 55 { 56 const v4f_T i2j2k2_ = v4f_add(q, q); 57 58 const v4f_T r0 = /* { jj2 + kk2, ij2 + ak2, ik2 - aj2 } */ 59 v4f_madd(v4f_mul(v4f_zzyy(i2j2k2_), v4f_zwwz(q)), 60 v4f_set(1.f, 1.f, -1.f, 0.f), 61 v4f_mul(v4f_yyzz(i2j2k2_), v4f_yxxy(q))); 62 const v4f_T r1 = /* { ij2 - ak2, ii2 + kk2, jk2 + ai2 } */ 63 v4f_madd(v4f_mul(v4f_zzxx(i2j2k2_), v4f_wzwz(q)), 64 v4f_set(-1.f, 1.f, 1.f, 0.f), 65 v4f_mul(v4f_yxzw(i2j2k2_), v4f_xxyy(q))); 66 const v4f_T r2 = /* { ik2 + aj2, jk2 - ai2, ii2 + jj2 } */ 67 v4f_madd(v4f_mul(v4f_yxyx(i2j2k2_), v4f_wwyy(q)), 68 v4f_set(1.f, -1.f, 1.f, 0.f), 69 v4f_mul(v4f_zzxx(i2j2k2_), v4f_xyxy(q))); 70 71 out[0] = /* { 1 - (jj2 + kk2), ij2 + ak2, ik2 - aj2 } */ 72 v4f_madd(r0, v4f_set(-1.f, 1.f, 1.f, 0.f), v4f_set(1.f, 0.f, 0.f, 0.f)); 73 out[1] = /* { ij2 - ak2, 1 - (ii2 + kk2), jk2 + ai2 } */ 74 v4f_madd(r1, v4f_set(1.f, -1.f, 1.f, 0.f), v4f_set(0.f, 1.f, 0.f, 0.f)); 75 out[2] = /* { ik2 + aj2, jk2 - ai2, 1 - (ii2 + jj2) } */ 76 v4f_madd(r2, v4f_set(1.f, 1.f, -1.f, 0.f), v4f_set(0.f, 0.f, 1.f, 0.f)); 77 } 78