BmnRoot
Loading...
Searching...
No Matches
P4_F64vec2.h
Go to the documentation of this file.
1#ifndef L1Algo_F64vec2P4_H
2#define L1Algo_F64vec2P4_H
3
4#include <iostream>
5#include <cmath>
6#include <emmintrin.h>
9/**********************************
10 *
11 * Vector of four single doubles
12 *
13 **********************************/
15//#pragma pack(push,16)/* Must ensure class & union 16-B aligned */
16
17//typedef __m128d Vectordouble __attribute__ ((aligned(16)));
19const union
21 double d;
22 long long i;
23} __d_one = {(double)1.};
25const union
26{
27 long long i[2];
28 __m128d m;
29}
30 __f64vec2_abs_mask_cheat = {{0x7fffffffffffffffll, 0x7fffffffffffffffll}},
31 __f64vec2_sgn_mask_cheat = {{0x8000000000000000ull, 0x8000000000000000ull}},
34 __f64vec2_true_cheat = {{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}},
35 __f64vec2_false_cheat = {{0x0000000000000000, 0x0000000000000000}};
36
37#define _f64vec2_abs_mask (static_cast<F64vec2>(__f64vec2_abs_mask_cheat.m))
38#define _f64vec2_sgn_mask (static_cast<F64vec2>(__f64vec2_sgn_mask_cheat.m))
39#define _f64vec2_zero (static_cast<F64vec2>(__f64vec2_zero_cheat.m))
40#define _f64vec2_one (static_cast<F64vec2>(__f64vec2_one_cheat.m))
41#define _f64vec2_true (static_cast<F64vec2>(__f64vec2_true_cheat.m))
42#define _f64vec2_false (static_cast<F64vec2>(__f64vec2_false_cheat.m))
43
44class F64vec2
46 public:
47
48 __m128d v;
49
50 double & operator[]( int i ){ return (reinterpret_cast<double*>(&v))[i]; }
51 double operator[]( int i ) const { return (reinterpret_cast<const double*>(&v))[i]; }
53 F64vec2( ):v(_mm_set_pd1(0)){}
54 F64vec2( const __m128d &a ):v(a) {}
55 F64vec2( const double &a ):v(_mm_set_pd1(a)) {}
56
57 F64vec2( const double &f0, const double &f1 ):v(_mm_set_pd(f1,f0)) {}
59 /* Conversion function */
60 operator __m128d() const { return v; } /* Convert to __m128d */
62 /* Arithmetic Operators */
63 friend F64vec2 operator +(const F64vec2 &a, const F64vec2 &b) { return _mm_add_pd(a,b); }
64 friend F64vec2 operator -(const F64vec2 &a, const F64vec2 &b) { return _mm_sub_pd(a,b); }
65 friend F64vec2 operator *(const F64vec2 &a, const F64vec2 &b) { return _mm_mul_pd(a,b); }
66 friend F64vec2 operator /(const F64vec2 &a, const F64vec2 &b) { return _mm_div_pd(a,b); }
67
68 /* Functions */
69 friend F64vec2 min( const F64vec2 &a, const F64vec2 &b ){ return _mm_min_pd(a, b); }
70 friend F64vec2 max( const F64vec2 &a, const F64vec2 &b ){ return _mm_max_pd(a, b); }
71
72 /* Square Root */
73 friend F64vec2 sqrt ( const F64vec2 &a ){ return _mm_sqrt_pd (a); }
74
75 /* Reciprocal( inverse) Square Root */
76 friend F64vec2 rsqrt( const F64vec2 &a ){ return 1./sqrt(a); }
77
78 /* Reciprocal (inversion) */
79 // friend F64vec2 rcp ( const F64vec2 &a ){ return _mm_rcp_pd (a); }
80 /* Reciprocal (inversion) */
81 //friend F64vec2 rcp ( const F64vec2 &a ){ return 1. / a; }
82 /* NewtonRaphson Reciprocal
83 [2 * rcppd(x) - (x * rcppd(x) * rcppd(x))] */
84 friend F64vec2 rcp(const F64vec2 &a) { return 1./a; }
85
86
87 /* Absolute value */
88 friend F64vec2 fabs(const F64vec2 &a){ return _mm_and_pd(a, _f64vec2_abs_mask); }
89
90 /* Sign */
91 friend F64vec2 sgn(const F64vec2 &a){ return _mm_or_pd(_mm_and_pd(a, _f64vec2_sgn_mask),_f64vec2_one); }
92 friend F64vec2 asgnb(const F64vec2 &a, const F64vec2 &b ){
93 return _mm_or_pd(_mm_and_pd(b, _f64vec2_sgn_mask),a);
94 }
95
96 /* Logical */
97
98 friend F64vec2 operator&( const F64vec2 &a, const F64vec2 &b ){ // mask returned
99 return _mm_and_pd(a, b);
100 }
101 friend F64vec2 operator|( const F64vec2 &a, const F64vec2 &b ){ // mask returned
102 return _mm_or_pd(a, b);
103 }
104 friend F64vec2 operator^( const F64vec2 &a, const F64vec2 &b ){ // mask returned
105 return _mm_xor_pd(a, b);
107 friend F64vec2 operator!( const F64vec2 &a ){ // mask returned
108 return _mm_xor_pd(a, _f64vec2_true);
109 }
110 // friend F64vec2 operator||( const F64vec2 &a, const F64vec2 &b ){ // mask returned
111 // return _mm_or_pd(a, b);
112 // }
113
114 /* Comparison */
115
116 friend F64vec2 operator<( const F64vec2 &a, const F64vec2 &b ){ // mask returned
117 return _mm_cmplt_pd(a, b);
118 }
119 friend F64vec2 operator<=( const F64vec2 &a, const F64vec2 &b ){ // mask returned
120 return _mm_cmple_pd(a, b);
121 }
122 friend F64vec2 operator>( const F64vec2 &a, const F64vec2 &b ){ // mask returned
123 return _mm_cmpgt_pd(a, b);
124 }
125 friend F64vec2 operator>=( const F64vec2 &a, const F64vec2 &b ){ // mask returned
126 return _mm_cmpge_pd(a, b);
127 }
128 friend F64vec2 operator==( const F64vec2 &a, const F64vec2 &b ){ // mask returned
129 return _mm_cmpeq_pd(a, b);
130 }
131
132 #define if3(a, b, c) ((a)&(b)) | ((!(a))&(c)) // analog (a) ? b : c
133
134 #define NotEmpty(a) bool((a)[0])|bool((a)[1])|bool((a)[2])|bool((a)[3])
135 #define Empty(a) !(bool((a)[0])|bool((a)[1])|bool((a)[2])|bool((a)[3]))
136 // bool NotEmpty(const F64vec2 &a) { return a[0]||a[1]||a[2]||a[3]; }
137 // bool Empty(const F64vec2 &a) { return !(a[0]||a[1]||a[2]||a[3]); } // optimize
138 friend F64vec2 bool2int( const F64vec2 &a){ // mask returned
139 return if3(a,1,0);
140 }
141
142 /* Define all operators for consistensy */
143
145
146 /* Non intrinsic functions */
147
148#define _f1(A,F) F64vec2( F(A[0]), F(A[1]) )
149
150 friend F64vec2 exp( const F64vec2 &a ){ return _f1( a, exp ); }
151 friend F64vec2 log( const F64vec2 &a ){ return _f1( a, log ); }
152 friend F64vec2 sin( const F64vec2 &a ){ return _f1( a, sin ); }
153 friend F64vec2 cos( const F64vec2 &a ){ return _f1( a, cos ); }
154 friend F64vec2 acos( const F64vec2 &a ){ return _f1( a, acos ); }
155
156#undef _f1
157
158 friend F64vec2 atan2(const F64vec2 &y, const F64vec2 &x) {
159 const F64vec2 pi(3.1415926535897932);
160 const F64vec2 pi_2 = pi/2;
161 const F64vec2 zero(0);
162
163 const F64vec2 &xZero = F64vec2(x == zero);
164 const F64vec2 &yZero = F64vec2(y == zero);
165 const F64vec2 &xNeg = F64vec2(x < zero);
166 const F64vec2 &yNeg = F64vec2(y < zero);
167
168 const F64vec2 &absX = fabs(x);
169 const F64vec2 &absY = fabs(y);
170
171 F64vec2 a = absY / absX;
172 const F64vec2 pi_4 = pi/4;
173 const F64vec2 &gt_tan_3pi_8 = F64vec2(a > F64vec2(2.414213562373095));
174 const F64vec2 &gt_tan_pi_8 = F64vec2(a > F64vec2(0.4142135623730950)) & F64vec2(!gt_tan_3pi_8);
175 const F64vec2 minusOne(-1);
176 F64vec2 b(zero);
177 b = (pi_2 & gt_tan_3pi_8) + (F64vec2(!gt_tan_3pi_8) & b);
178 b = (pi_4 & gt_tan_pi_8) + (F64vec2(!gt_tan_pi_8) & b);
179 a = (gt_tan_3pi_8 & (minusOne / a)) + (F64vec2(!gt_tan_3pi_8) & a);
180 a = (gt_tan_pi_8 & ((absY - absX) / (absY + absX))) + (F64vec2(!gt_tan_pi_8) & a) ;
181 const F64vec2 &a2 = a * a;
182 b += (((8.05374449538e-2 * a2
183 - 1.38776856032E-1) * a2
184 + 1.99777106478E-1) * a2
185 - 3.33329491539E-1) * a2 * a
186 + a;
187 F64vec2 xyNeg = F64vec2(xNeg ^ yNeg);
188 b = (xyNeg & (-b) ) + (F64vec2(!xyNeg) & b);
189 xyNeg = F64vec2(xNeg & !yNeg);
190 b = (xyNeg & (b+pi)) + (F64vec2(!xyNeg) & b);
191 xyNeg = F64vec2(xNeg & yNeg);
192 b = (xyNeg & (b-pi)) + (F64vec2(!xyNeg) & b);
193 xyNeg = F64vec2(xZero & yZero);
194 b = (xyNeg & zero) + (F64vec2(!xyNeg) & b);
195 xyNeg = F64vec2(xZero & yNeg);
196 b = (xyNeg & (-pi_2)) + (F64vec2(!xyNeg) & b);
197 return b;
198 }
199
200 friend std::ostream & operator<<(std::ostream &strm, const F64vec2 &a ){
201 strm<<"["<<a[0]<<" "<<a[1]<<" "<<a[2]<<" "<<a[3]<<"]";
202 return strm;
203 }
204
205 friend std::istream & operator>>(std::istream &strm, F64vec2 &a ){
206 double tmp;
207 strm>>tmp;
208 a = tmp;
209 return strm;
210 }
211
212} __attribute__ ((aligned(16)));
213
214
215typedef F64vec2 fvec;
216typedef double fscal;
217const int fvecLen = 2;
218//#define fvec_true _f64vec2_true
219//#define fvec_false _f64vec2_false
220#define _fvecalignment __attribute__ ((aligned(16)))
221
222
223#include "std_alloc.h"
224
225
226#endif
const union @19 __f64vec2_one_cheat
const union @19 __f64vec2_zero_cheat
#define _f64vec2_sgn_mask
Definition P4_F64vec2.h:38
const int fvecLen
Definition P4_F64vec2.h:217
const union @19 __f64vec2_sgn_mask_cheat
const union @19 __f64vec2_false_cheat
const union @18 __d_one
F64vec2 fvec
Definition P4_F64vec2.h:215
double fscal
Definition P4_F64vec2.h:216
double d
Definition P4_F64vec2.h:21
__m128d m
Definition P4_F64vec2.h:28
#define _f64vec2_abs_mask
Definition P4_F64vec2.h:37
const union @19 __f64vec2_abs_mask_cheat
#define _f64vec2_true
Definition P4_F64vec2.h:41
#define _f1(A, F)
Definition P4_F64vec2.h:148
#define if3(a, b, c)
Definition P4_F64vec2.h:132
const union @19 __f64vec2_true_cheat
long long i
Definition P4_F64vec2.h:22
#define _f64vec2_one
Definition P4_F64vec2.h:40
nsL1vector __attribute__
friend F64vec2 operator!(const F64vec2 &a)
Definition P4_F64vec2.h:107
friend F64vec2 operator<(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:116
friend F64vec2 operator>(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:122
friend F64vec2 operator*(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:65
friend F64vec2 asgnb(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:92
friend F64vec2 log(const F64vec2 &a)
Definition P4_F64vec2.h:151
friend F64vec2 operator==(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:128
friend F64vec2 operator>=(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:125
vec_arithmetic(F64vec2, double)
double & operator[](int i)
Definition P4_F64vec2.h:50
friend F64vec2 cos(const F64vec2 &a)
Definition P4_F64vec2.h:153
friend F64vec2 operator/(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:66
F64vec2(const double &f0, const double &f1)
Definition P4_F64vec2.h:57
friend F64vec2 min(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:69
friend std::istream & operator>>(std::istream &strm, F64vec2 &a)
Definition P4_F64vec2.h:205
friend F64vec2 operator-(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:64
friend F64vec2 max(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:70
friend F64vec2 operator<=(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:119
friend F64vec2 fabs(const F64vec2 &a)
Definition P4_F64vec2.h:88
friend F64vec2 operator&(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:98
friend F64vec2 atan2(const F64vec2 &y, const F64vec2 &x)
Definition P4_F64vec2.h:158
friend F64vec2 rsqrt(const F64vec2 &a)
Definition P4_F64vec2.h:76
friend F64vec2 bool2int(const F64vec2 &a)
Definition P4_F64vec2.h:138
friend F64vec2 sqrt(const F64vec2 &a)
Definition P4_F64vec2.h:73
friend F64vec2 operator+(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:63
friend F64vec2 exp(const F64vec2 &a)
Definition P4_F64vec2.h:150
friend F64vec2 sin(const F64vec2 &a)
Definition P4_F64vec2.h:152
friend F64vec2 operator|(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:101
friend F64vec2 rcp(const F64vec2 &a)
Definition P4_F64vec2.h:84
friend std::ostream & operator<<(std::ostream &strm, const F64vec2 &a)
Definition P4_F64vec2.h:200
F64vec2(const __m128d &a)
Definition P4_F64vec2.h:54
__m128d v
Definition P4_F64vec2.h:48
friend F64vec2 operator^(const F64vec2 &a, const F64vec2 &b)
Definition P4_F64vec2.h:104
F64vec2(const double &a)
Definition P4_F64vec2.h:55
double operator[](int i) const
Definition P4_F64vec2.h:51
friend F64vec2 acos(const F64vec2 &a)
Definition P4_F64vec2.h:154
friend F64vec2 sgn(const F64vec2 &a)
Definition P4_F64vec2.h:91