Blender  V3.3
sseb.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2013 Intel Corporation
3  * Modifications Copyright 2014-2022 Blender Foundation. */
4 
5 #ifndef __UTIL_SSEB_H__
6 #define __UTIL_SSEB_H__
7 
9 
10 #ifdef __KERNEL_SSE2__
11 
12 struct ssei;
13 struct ssef;
14 
16 struct sseb {
17  typedef sseb Mask; // mask type
18  typedef ssei Int; // int type
19  typedef ssef Float; // float type
20 
21  enum { size = 4 }; // number of SIMD elements
22  union {
23  __m128 m128;
24  int32_t v[4];
25  }; // data
26 
30 
31  __forceinline sseb()
32  {
33  }
34  __forceinline sseb(const sseb &other)
35  {
36  m128 = other.m128;
37  }
38  __forceinline sseb &operator=(const sseb &other)
39  {
40  m128 = other.m128;
41  return *this;
42  }
43 
44  __forceinline sseb(const __m128 input) : m128(input)
45  {
46  }
47  __forceinline operator const __m128 &(void) const
48  {
49  return m128;
50  }
51  __forceinline operator const __m128i(void) const
52  {
53  return _mm_castps_si128(m128);
54  }
55  __forceinline operator const __m128d(void) const
56  {
57  return _mm_castps_pd(m128);
58  }
59 
60  __forceinline sseb(bool a)
61  : m128(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)])
62  {
63  }
64  __forceinline sseb(bool a, bool b)
65  : m128(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)])
66  {
67  }
68  __forceinline sseb(bool a, bool b, bool c, bool d)
69  : m128(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)])
70  {
71  }
72  __forceinline sseb(int mask)
73  {
74  assert(mask >= 0 && mask < 16);
75  m128 = _mm_lookupmask_ps[mask];
76  }
77 
81 
82  __forceinline sseb(FalseTy) : m128(_mm_setzero_ps())
83  {
84  }
85  __forceinline sseb(TrueTy)
86  : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())))
87  {
88  }
89 
93 
94  __forceinline bool operator[](const size_t i) const
95  {
96  assert(i < 4);
97  return (_mm_movemask_ps(m128) >> i) & 1;
98  }
99  __forceinline int32_t &operator[](const size_t i)
100  {
101  assert(i < 4);
102  return v[i];
103  }
104 };
105 
109 
110 __forceinline const sseb operator!(const sseb &a)
111 {
112  return _mm_xor_ps(a, sseb(True));
113 }
114 
118 
119 __forceinline const sseb operator&(const sseb &a, const sseb &b)
120 {
121  return _mm_and_ps(a, b);
122 }
123 __forceinline const sseb operator|(const sseb &a, const sseb &b)
124 {
125  return _mm_or_ps(a, b);
126 }
127 __forceinline const sseb operator^(const sseb &a, const sseb &b)
128 {
129  return _mm_xor_ps(a, b);
130 }
131 
135 
136 __forceinline const sseb operator&=(sseb &a, const sseb &b)
137 {
138  return a = a & b;
139 }
140 __forceinline const sseb operator|=(sseb &a, const sseb &b)
141 {
142  return a = a | b;
143 }
144 __forceinline const sseb operator^=(sseb &a, const sseb &b)
145 {
146  return a = a ^ b;
147 }
148 
152 
153 __forceinline const sseb operator!=(const sseb &a, const sseb &b)
154 {
155  return _mm_xor_ps(a, b);
156 }
157 __forceinline const sseb operator==(const sseb &a, const sseb &b)
158 {
159  return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b));
160 }
161 
162 __forceinline const sseb select(const sseb &m, const sseb &t, const sseb &f)
163 {
164 # if defined(__KERNEL_SSE41__)
165  return _mm_blendv_ps(f, t, m);
166 # else
167  return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
168 # endif
169 }
170 
174 
175 __forceinline const sseb unpacklo(const sseb &a, const sseb &b)
176 {
177  return _mm_unpacklo_ps(a, b);
178 }
179 __forceinline const sseb unpackhi(const sseb &a, const sseb &b)
180 {
181  return _mm_unpackhi_ps(a, b);
182 }
183 
184 template<size_t i0, size_t i1, size_t i2, size_t i3>
185 __forceinline const sseb shuffle(const sseb &a)
186 {
187 # ifdef __KERNEL_NEON__
188  return shuffle_neon<int32x4_t, i0, i1, i2, i3>(a);
189 # else
190  return _mm_castsi128_ps(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)));
191 # endif
192 }
193 
194 # ifndef __KERNEL_NEON__
195 template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a)
196 {
197  return _mm_movelh_ps(a, a);
198 }
199 
200 template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a)
201 {
202  return _mm_movehl_ps(a, a);
203 }
204 # endif
205 
206 template<size_t i0, size_t i1, size_t i2, size_t i3>
207 __forceinline const sseb shuffle(const sseb &a, const sseb &b)
208 {
209 # ifdef __KERNEL_NEON__
210  return shuffle_neon<int32x4_t, i0, i1, i2, i3>(a, b);
211 # else
212  return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
213 # endif
214 }
215 
216 # ifndef __KERNEL_NEON__
217 template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a, const sseb &b)
218 {
219  return _mm_movelh_ps(a, b);
220 }
221 
222 template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a, const sseb &b)
223 {
224  return _mm_movehl_ps(b, a);
225 }
226 # endif
227 
228 # if defined(__KERNEL_SSE3__) && !defined(__KERNEL_NEON__)
229 template<> __forceinline const sseb shuffle<0, 0, 2, 2>(const sseb &a)
230 {
231  return _mm_moveldup_ps(a);
232 }
233 template<> __forceinline const sseb shuffle<1, 1, 3, 3>(const sseb &a)
234 {
235  return _mm_movehdup_ps(a);
236 }
237 # endif
238 
239 # if defined(__KERNEL_SSE41__)
240 template<size_t dst, size_t src, size_t clr>
241 __forceinline const sseb insert(const sseb &a, const sseb &b)
242 {
243 # ifdef __KERNEL_NEON__
244  sseb res = a;
245  if (clr)
246  res[dst] = 0;
247  else
248  res[dst] = b[src];
249  return res;
250 # else
251  return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr);
252 # endif
253 }
254 template<size_t dst, size_t src> __forceinline const sseb insert(const sseb &a, const sseb &b)
255 {
256  return insert<dst, src, 0>(a, b);
257 }
258 template<size_t dst> __forceinline const sseb insert(const sseb &a, const bool b)
259 {
260  return insert<dst, 0>(a, sseb(b));
261 }
262 # endif
263 
267 
268 # if defined(__KERNEL_SSE41__)
269 __forceinline uint32_t popcnt(const sseb &a)
270 {
271 # if defined(__KERNEL_NEON__)
272  const int32x4_t mask = {1, 1, 1, 1};
273  int32x4_t t = vandq_s32(vreinterpretq_s32_m128(a.m128), mask);
274  return vaddvq_s32(t);
275 # else
276  return _mm_popcnt_u32(_mm_movemask_ps(a));
277 # endif
278 }
279 # else
280 __forceinline uint32_t popcnt(const sseb &a)
281 {
282  return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]);
283 }
284 # endif
285 
286 __forceinline bool reduce_and(const sseb &a)
287 {
288 # if defined(__KERNEL_NEON__)
289  return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) == -4;
290 # else
291  return _mm_movemask_ps(a) == 0xf;
292 # endif
293 }
294 __forceinline bool reduce_or(const sseb &a)
295 {
296 # if defined(__KERNEL_NEON__)
297  return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) != 0x0;
298 # else
299  return _mm_movemask_ps(a) != 0x0;
300 # endif
301 }
302 __forceinline bool all(const sseb &b)
303 {
304 # if defined(__KERNEL_NEON__)
305  return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == -4;
306 # else
307  return _mm_movemask_ps(b) == 0xf;
308 # endif
309 }
310 __forceinline bool any(const sseb &b)
311 {
312 # if defined(__KERNEL_NEON__)
313  return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) != 0x0;
314 # else
315  return _mm_movemask_ps(b) != 0x0;
316 # endif
317 }
318 __forceinline bool none(const sseb &b)
319 {
320 # if defined(__KERNEL_NEON__)
321  return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == 0x0;
322 # else
323  return _mm_movemask_ps(b) == 0x0;
324 # endif
325 }
326 
327 __forceinline uint32_t movemask(const sseb &a)
328 {
329  return _mm_movemask_ps(a);
330 }
331 
335 
336 ccl_device_inline void print_sseb(const char *label, const sseb &a)
337 {
338  printf("%s: %d %d %d %d\n", label, a[0], a[1], a[2], a[3]);
339 }
340 
341 #endif
342 
344 
345 #endif
struct Mask Mask
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint i1
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
__forceinline const avxb operator|=(avxb &a, const avxb &b)
Definition: avxb.h:121
__forceinline uint32_t movemask(const avxb &a)
Definition: avxb.h:214
__forceinline bool any(const avxb &b)
Definition: avxb.h:205
__forceinline bool all(const avxb &b)
Definition: avxb.h:201
__forceinline bool reduce_and(const avxb &a)
Definition: avxb.h:193
__forceinline const avxb operator&(const avxb &a, const avxb &b)
Binary Operators.
Definition: avxb.h:100
__forceinline const avxb operator^=(avxb &a, const avxb &b)
Definition: avxb.h:125
__forceinline const avxb operator&=(avxb &a, const avxb &b)
Assignment Operators.
Definition: avxb.h:117
__forceinline const avxb unpacklo(const avxb &a, const avxb &b)
Movement/Shifting/Shuffling Functions.
Definition: avxb.h:167
__forceinline const avxb operator|(const avxb &a, const avxb &b)
Definition: avxb.h:104
__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
Definition: avxb.h:154
__forceinline const avxb operator!(const avxb &a)
Unary Operators.
Definition: avxb.h:91
__forceinline const avxb unpackhi(const avxb &a, const avxb &b)
Definition: avxb.h:171
__forceinline bool reduce_or(const avxb &a)
Definition: avxb.h:197
__forceinline uint32_t popcnt(const avxb &a)
Reduction Operations.
Definition: avxb.h:186
__forceinline bool none(const avxb &b)
Definition: avxb.h:209
__forceinline const avxi shuffle< 0, 0, 2, 2 >(const avxi &b)
Definition: avxi.h:612
__forceinline const avxi shuffle< 0, 1, 0, 1 >(const avxi &b)
Definition: avxi.h:620
__forceinline const avxi shuffle< 1, 1, 3, 3 >(const avxi &b)
Definition: avxi.h:616
ATTR_WARN_UNUSED_RESULT const BMVert * v
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
btGeneric6DofConstraint & operator=(btGeneric6DofConstraint &other)
SIMD_FORCE_INLINE btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:157
#define ccl_device_inline
Definition: cuda/compat.h:34
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
const char * label
SyclQueue void void * src
SyclQueue void void size_t num_bytes void
static void shuffle(float2 points[], int size, int rng_seed)
Definition: jitter.cpp:230
ccl_global KernelShaderEvalInput * input
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
Definition: math_float4.h:513
static unsigned c
Definition: RandGen.cpp:83
static unsigned a[3]
Definition: RandGen.cpp:78
Insertion insert(const float3 &point_prev, const float3 &handle_prev, const float3 &handle_next, const float3 &point_next, float parameter)
Definition: curve_bezier.cc:61
bool operator==(const AttributeIDRef &a, const AttributeIDRef &b)
GPUState operator^(const GPUState &a, const GPUState &b)
constexpr bool operator!=(StringRef a, StringRef b)
static const pxr::TfToken b("b", pxr::TfToken::Immortal)
#define __forceinline
unsigned int uint32_t
Definition: stdint.h:80
signed int int32_t
Definition: stdint.h:77