11 #ifndef EIGEN_GENERIC_PACKET_MATH_H
12 #define EIGEN_GENERIC_PACKET_MATH_H
26 #ifndef EIGEN_DEBUG_ALIGNED_LOAD
27 #define EIGEN_DEBUG_ALIGNED_LOAD
30 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD
31 #define EIGEN_DEBUG_UNALIGNED_LOAD
34 #ifndef EIGEN_DEBUG_ALIGNED_STORE
35 #define EIGEN_DEBUG_ALIGNED_STORE
38 #ifndef EIGEN_DEBUG_UNALIGNED_STORE
39 #define EIGEN_DEBUG_UNALIGNED_STORE
42 struct default_packet_traits
96 template<
typename T>
struct packet_traits : default_packet_traits
120 template<
typename T>
struct packet_traits<const T> : packet_traits<T> { };
122 template <
typename Src,
typename Tgt>
struct type_casting_traits {
132 template <
typename SrcPacket,
typename TgtPacket>
133 EIGEN_DEVICE_FUNC
inline TgtPacket
134 pcast(
const SrcPacket& a) {
135 return static_cast<TgtPacket
>(a);
137 template <
typename SrcPacket,
typename TgtPacket>
138 EIGEN_DEVICE_FUNC
inline TgtPacket
139 pcast(
const SrcPacket& a,
const SrcPacket& ) {
140 return static_cast<TgtPacket
>(a);
143 template <
typename SrcPacket,
typename TgtPacket>
144 EIGEN_DEVICE_FUNC
inline TgtPacket
145 pcast(
const SrcPacket& a,
const SrcPacket& ,
const SrcPacket& ,
const SrcPacket& ) {
146 return static_cast<TgtPacket
>(a);
150 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
151 padd(
const Packet& a,
152 const Packet& b) {
return a+b; }
155 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
156 psub(
const Packet& a,
157 const Packet& b) {
return a-b; }
160 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
161 pnegate(
const Packet& a) {
return -a; }
165 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
166 pconj(
const Packet& a) {
return numext::conj(a); }
169 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
170 pmul(
const Packet& a,
171 const Packet& b) {
return a*b; }
174 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
175 pdiv(
const Packet& a,
176 const Packet& b) {
return a/b; }
179 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
180 pmin(
const Packet& a,
181 const Packet& b) {
return numext::mini(a, b); }
184 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
185 pmax(
const Packet& a,
186 const Packet& b) {
return numext::maxi(a, b); }
189 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
190 pabs(
const Packet& a) {
using std::abs;
return abs(a); }
193 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
194 parg(
const Packet& a) {
using numext::arg;
return arg(a); }
197 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
198 pand(
const Packet& a,
const Packet& b) {
return a & b; }
201 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
202 por(
const Packet& a,
const Packet& b) {
return a | b; }
205 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
206 pxor(
const Packet& a,
const Packet& b) {
return a ^ b; }
209 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
210 pandnot(
const Packet& a,
const Packet& b) {
return a & (!b); }
213 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
214 pload(
const typename unpacket_traits<Packet>::type* from) {
return *from; }
217 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
218 ploadu(
const typename unpacket_traits<Packet>::type* from) {
return *from; }
221 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
222 pset1(
const typename unpacket_traits<Packet>::type& a) {
return a; }
225 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
226 pload1(
const typename unpacket_traits<Packet>::type *a) {
return pset1<Packet>(*a); }
233 template<
typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
234 ploaddup(
const typename unpacket_traits<Packet>::type* from) {
return *from; }
242 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
243 ploadquad(
const typename unpacket_traits<Packet>::type* from)
244 {
return pload1<Packet>(from); }
255 template<
typename Packet> EIGEN_DEVICE_FUNC
256 inline void pbroadcast4(
const typename unpacket_traits<Packet>::type *a,
257 Packet& a0, Packet& a1, Packet& a2, Packet& a3)
259 a0 = pload1<Packet>(a+0);
260 a1 = pload1<Packet>(a+1);
261 a2 = pload1<Packet>(a+2);
262 a3 = pload1<Packet>(a+3);
272 template<
typename Packet> EIGEN_DEVICE_FUNC
273 inline void pbroadcast2(
const typename unpacket_traits<Packet>::type *a,
274 Packet& a0, Packet& a1)
276 a0 = pload1<Packet>(a+0);
277 a1 = pload1<Packet>(a+1);
281 template<
typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
282 plset(
const typename unpacket_traits<Packet>::type& a) {
return a; }
285 template<
typename Scalar,
typename Packet> EIGEN_DEVICE_FUNC
inline void pstore(Scalar* to,
const Packet& from)
289 template<
typename Scalar,
typename Packet> EIGEN_DEVICE_FUNC
inline void pstoreu(Scalar* to,
const Packet& from)
292 template<
typename Scalar,
typename Packet> EIGEN_DEVICE_FUNC
inline Packet pgather(
const Scalar* from,
Index )
293 {
return ploadu<Packet>(from); }
295 template<
typename Scalar,
typename Packet> EIGEN_DEVICE_FUNC
inline void pscatter(Scalar* to,
const Packet& from,
Index )
296 { pstore(to, from); }
299 template<
typename Scalar> EIGEN_DEVICE_FUNC
inline void prefetch(
const Scalar* addr)
302 #if defined(__LP64__)
304 asm(
" prefetch.L1 [ %1 ];" :
"=l"(addr) :
"l"(addr));
307 asm(
" prefetch.L1 [ %1 ];" :
"=r"(addr) :
"r"(addr));
309 #elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
310 __builtin_prefetch(addr);
315 template<
typename Packet> EIGEN_DEVICE_FUNC
inline typename unpacket_traits<Packet>::type pfirst(
const Packet& a)
319 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
320 preduxp(
const Packet* vecs) {
return vecs[0]; }
323 template<
typename Packet> EIGEN_DEVICE_FUNC
inline typename unpacket_traits<Packet>::type predux(
const Packet& a)
330 template<
typename Packet> EIGEN_DEVICE_FUNC
inline
331 typename conditional<(unpacket_traits<Packet>::size%8)==0,
typename unpacket_traits<Packet>::half,Packet>::type
332 predux_downto4(
const Packet& a)
336 template<
typename Packet> EIGEN_DEVICE_FUNC
inline typename unpacket_traits<Packet>::type predux_mul(
const Packet& a)
340 template<
typename Packet> EIGEN_DEVICE_FUNC
inline typename unpacket_traits<Packet>::type predux_min(
const Packet& a)
344 template<
typename Packet> EIGEN_DEVICE_FUNC
inline typename unpacket_traits<Packet>::type predux_max(
const Packet& a)
348 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet preverse(
const Packet& a)
352 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet pcplxflip(
const Packet& a)
354 return Packet(a.imag(),a.real());
362 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
363 Packet psin(
const Packet& a) {
using std::sin;
return sin(a); }
366 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
367 Packet pcos(
const Packet& a) {
using std::cos;
return cos(a); }
370 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
371 Packet ptan(
const Packet& a) {
using std::tan;
return tan(a); }
374 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
375 Packet pasin(
const Packet& a) {
using std::asin;
return asin(a); }
378 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
379 Packet pacos(
const Packet& a) {
using std::acos;
return acos(a); }
382 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
383 Packet patan(
const Packet& a) {
using std::atan;
return atan(a); }
386 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
387 Packet psinh(
const Packet& a) {
using std::sinh;
return sinh(a); }
390 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
391 Packet pcosh(
const Packet& a) {
using std::cosh;
return cosh(a); }
394 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
395 Packet ptanh(
const Packet& a) {
using std::tanh;
return tanh(a); }
398 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
399 Packet pexp(
const Packet& a) {
using std::exp;
return exp(a); }
402 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
403 Packet plog(
const Packet& a) {
using std::log;
return log(a); }
406 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
407 Packet plog1p(
const Packet& a) {
return numext::log1p(a); }
410 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
411 Packet plog10(
const Packet& a) {
using std::log10;
return log10(a); }
414 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
415 Packet psqrt(
const Packet& a) {
using std::sqrt;
return sqrt(a); }
418 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
419 Packet prsqrt(
const Packet& a) {
420 return pdiv(pset1<Packet>(1), psqrt(a));
424 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
425 Packet pround(
const Packet& a) {
using numext::round;
return round(a); }
428 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
429 Packet pfloor(
const Packet& a) {
using numext::floor;
return floor(a); }
432 template<
typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
433 Packet pceil(
const Packet& a) {
using numext::ceil;
return ceil(a); }
441 template<
typename Packet>
442 inline void pstore1(
typename unpacket_traits<Packet>::type* to,
const typename unpacket_traits<Packet>::type& a)
444 pstore(to, pset1<Packet>(a));
448 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
449 pmadd(
const Packet& a,
452 {
return padd(pmul(a, b),c); }
456 template<
typename Packet,
int Alignment>
457 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(
const typename unpacket_traits<Packet>::type* from)
459 if(Alignment >= unpacket_traits<Packet>::alignment)
460 return pload<Packet>(from);
462 return ploadu<Packet>(from);
467 template<
typename Scalar,
typename Packet,
int Alignment>
468 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void pstoret(Scalar* to,
const Packet& from)
470 if(Alignment >= unpacket_traits<Packet>::alignment)
481 template<
typename Packet,
int LoadMode>
482 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(
const typename unpacket_traits<Packet>::type* from)
484 return ploadt<Packet, LoadMode>(from);
488 template<
int Offset,
typename PacketType>
492 static inline void run(PacketType&,
const PacketType&) {}
510 template<
int Offset,
typename PacketType>
511 inline void palign(PacketType& first,
const PacketType& second)
513 palign_impl<Offset,PacketType>::run(first,second);
523 template<>
inline std::complex<float> pmul(
const std::complex<float>& a,
const std::complex<float>& b)
524 {
return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
526 template<>
inline std::complex<double> pmul(
const std::complex<double>& a,
const std::complex<double>& b)
527 {
return std::complex<double>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
536 template <typename Packet,int N=unpacket_traits<Packet>::size>
struct PacketBlock {
540 template<
typename Packet> EIGEN_DEVICE_FUNC
inline void
541 ptranspose(PacketBlock<Packet,1>& ) {
549 template <
size_t N>
struct Selector {
553 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
554 pblend(
const Selector<unpacket_traits<Packet>::size>& ifPacket,
const Packet& thenPacket,
const Packet& elsePacket) {
555 return ifPacket.select[0] ? thenPacket : elsePacket;
559 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
560 pinsertfirst(
const Packet& a,
typename unpacket_traits<Packet>::type b)
564 Selector<unpacket_traits<Packet>::size> mask;
565 mask.select[0] =
true;
567 for(
Index i=1; i<unpacket_traits<Packet>::size; ++i)
568 mask.select[i] =
false;
569 return pblend(mask, pset1<Packet>(b), a);
573 template<
typename Packet> EIGEN_DEVICE_FUNC
inline Packet
574 pinsertlast(
const Packet& a,
typename unpacket_traits<Packet>::type b)
578 Selector<unpacket_traits<Packet>::size> mask;
580 for(
Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
581 mask.select[i] =
false;
582 mask.select[unpacket_traits<Packet>::size-1] =
true;
583 return pblend(mask, pset1<Packet>(b), a);
590 #endif // EIGEN_GENERIC_PACKET_MATH_H