Blender  V3.3
avxb.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2013 Intel Corporation
3  * Modifications Copyright 2014-2022 Blender Foundation. */
4 
5 #ifndef __UTIL_AVXB_H__
6 #define __UTIL_AVXB_H__
7 
9 
10 struct avxf;
11 
13 struct avxb {
14  typedef avxb Mask; // mask type
15  typedef avxf Float; // float type
16 
17  enum { size = 8 }; // number of SIMD elements
18  union {
19  __m256 m256;
20  int32_t v[8];
21  }; // data
22 
26 
28  {
29  }
30  __forceinline avxb(const avxb &other)
31  {
32  m256 = other.m256;
33  }
35  {
36  m256 = other.m256;
37  return *this;
38  }
39 
40  __forceinline avxb(const __m256 input) : m256(input)
41  {
42  }
43  __forceinline avxb(const __m128 &a, const __m128 &b)
44  : m256(_mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1))
45  {
46  }
47  __forceinline operator const __m256 &(void) const
48  {
49  return m256;
50  }
51  __forceinline operator const __m256i(void) const
52  {
53  return _mm256_castps_si256(m256);
54  }
55  __forceinline operator const __m256d(void) const
56  {
57  return _mm256_castps_pd(m256);
58  }
59 
63 
64  __forceinline avxb(FalseTy) : m256(_mm256_setzero_ps())
65  {
66  }
67  __forceinline avxb(TrueTy) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1)))
68  {
69  }
70 
74 
75  __forceinline bool operator[](const size_t i) const
76  {
77  assert(i < 8);
78  return (_mm256_movemask_ps(m256) >> i) & 1;
79  }
80  __forceinline int32_t &operator[](const size_t i)
81  {
82  assert(i < 8);
83  return v[i];
84  }
85 };
86 
90 
92 {
93  return _mm256_xor_ps(a, avxb(True));
94 }
95 
99 
100 __forceinline const avxb operator&(const avxb &a, const avxb &b)
101 {
102  return _mm256_and_ps(a, b);
103 }
104 __forceinline const avxb operator|(const avxb &a, const avxb &b)
105 {
106  return _mm256_or_ps(a, b);
107 }
108 __forceinline const avxb operator^(const avxb &a, const avxb &b)
109 {
110  return _mm256_xor_ps(a, b);
111 }
112 
116 
118 {
119  return a = a & b;
120 }
122 {
123  return a = a | b;
124 }
126 {
127  return a = a ^ b;
128 }
129 
133 
134 __forceinline const avxb operator!=(const avxb &a, const avxb &b)
135 {
136  return _mm256_xor_ps(a, b);
137 }
138 __forceinline const avxb operator==(const avxb &a, const avxb &b)
139 {
140 #ifdef __KERNEL_AVX2__
141  return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b));
142 #else
143  __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0));
144  __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1));
145  __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0));
146  __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1));
147  __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo);
148  __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi);
149  __m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1);
150  return _mm256_castsi256_ps(result);
151 #endif
152 }
153 
154 __forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
155 {
156 #if defined(__KERNEL_SSE41__)
157  return _mm256_blendv_ps(f, t, m);
158 #else
159  return _mm256_or_ps(_mm256_and_ps(m, t), _mm256_andnot_ps(m, f));
160 #endif
161 }
162 
166 
167 __forceinline const avxb unpacklo(const avxb &a, const avxb &b)
168 {
169  return _mm256_unpacklo_ps(a, b);
170 }
171 __forceinline const avxb unpackhi(const avxb &a, const avxb &b)
172 {
173  return _mm256_unpackhi_ps(a, b);
174 }
175 
179 
180 #if defined(__KERNEL_SSE41__)
182 {
183  return _mm_popcnt_u32(_mm256_movemask_ps(a));
184 }
185 #else
187 {
188  return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]) + bool(a[4]) + bool(a[5]) + bool(a[6]) +
189  bool(a[7]);
190 }
191 #endif
192 
194 {
195  return _mm256_movemask_ps(a) == 0xf;
196 }
198 {
199  return _mm256_movemask_ps(a) != 0x0;
200 }
201 __forceinline bool all(const avxb &b)
202 {
203  return _mm256_movemask_ps(b) == 0xf;
204 }
205 __forceinline bool any(const avxb &b)
206 {
207  return _mm256_movemask_ps(b) != 0x0;
208 }
209 __forceinline bool none(const avxb &b)
210 {
211  return _mm256_movemask_ps(b) == 0x0;
212 }
213 
215 {
216  return _mm256_movemask_ps(a);
217 }
218 
222 
223 ccl_device_inline void print_avxb(const char *label, const avxb &a)
224 {
225  printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
226 }
227 
229 
230 #endif
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
__forceinline const avxb operator|=(avxb &a, const avxb &b)
Definition: avxb.h:121
__forceinline uint32_t movemask(const avxb &a)
Definition: avxb.h:214
__forceinline bool any(const avxb &b)
Definition: avxb.h:205
__forceinline bool all(const avxb &b)
Definition: avxb.h:201
__forceinline const avxb operator==(const avxb &a, const avxb &b)
Definition: avxb.h:138
__forceinline bool reduce_and(const avxb &a)
Definition: avxb.h:193
__forceinline const avxb operator&(const avxb &a, const avxb &b)
Binary Operators.
Definition: avxb.h:100
__forceinline const avxb operator^=(avxb &a, const avxb &b)
Definition: avxb.h:125
__forceinline const avxb operator&=(avxb &a, const avxb &b)
Assignment Operators.
Definition: avxb.h:117
__forceinline const avxb unpacklo(const avxb &a, const avxb &b)
Movement/Shifting/Shuffling Functions.
Definition: avxb.h:167
__forceinline const avxb operator|(const avxb &a, const avxb &b)
Definition: avxb.h:104
__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
Definition: avxb.h:154
ccl_device_inline void print_avxb(const char *label, const avxb &a)
Debug Functions.
Definition: avxb.h:223
__forceinline const avxb operator^(const avxb &a, const avxb &b)
Definition: avxb.h:108
__forceinline const avxb operator!(const avxb &a)
Unary Operators.
Definition: avxb.h:91
__forceinline const avxb unpackhi(const avxb &a, const avxb &b)
Definition: avxb.h:171
__forceinline bool reduce_or(const avxb &a)
Definition: avxb.h:197
__forceinline const avxb operator!=(const avxb &a, const avxb &b)
Comparison Operators + Select.
Definition: avxb.h:134
__forceinline uint32_t popcnt(const avxb &a)
Reduction Operations.
Definition: avxb.h:186
__forceinline bool none(const avxb &b)
Definition: avxb.h:209
#define ccl_device_inline
Definition: cuda/compat.h:34
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
const char * label
SyclQueue void void size_t num_bytes void
ccl_global KernelShaderEvalInput * input
static unsigned a[3]
Definition: RandGen.cpp:78
static const pxr::TfToken b("b", pxr::TfToken::Immortal)
#define __forceinline
unsigned int uint32_t
Definition: stdint.h:80
signed int int32_t
Definition: stdint.h:77
Definition: avxb.h:13
__forceinline avxb(const __m256 input)
Definition: avxb.h:40
__forceinline avxb(TrueTy)
Definition: avxb.h:67
__forceinline avxb(const __m128 &a, const __m128 &b)
Definition: avxb.h:43
int32_t v[8]
Definition: avxb.h:20
__forceinline avxb & operator=(const avxb &other)
Definition: avxb.h:34
@ size
Definition: avxb.h:17
__forceinline avxb(FalseTy)
Constants.
Definition: avxb.h:64
__forceinline avxb()
Constructors, Assignment & Cast Operators.
Definition: avxb.h:27
__forceinline bool operator[](const size_t i) const
Array Access.
Definition: avxb.h:75
avxf Float
Definition: avxb.h:15
__forceinline int32_t & operator[](const size_t i)
Definition: avxb.h:80
__m256 m256
Definition: avxb.h:19
avxb Mask
Definition: avxb.h:14
__forceinline avxb(const avxb &other)
Definition: avxb.h:30
Definition: avxf.h:11