Blender  V3.3
ssef.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2013 Intel Corporation
3  * Modifications Copyright 2014-2022 Blender Foundation. */
4 
5 #ifndef __UTIL_SSEF_H__
6 #define __UTIL_SSEF_H__
7 
8 #include <math.h>
9 
10 #include "util/ssei.h"
11 
13 
14 #ifdef __KERNEL_SSE2__
15 
16 struct sseb;
17 struct ssef;
18 
20 struct ssef {
21  typedef sseb Mask; // mask type
22  typedef ssei Int; // int type
23  typedef ssef Float; // float type
24 
25  enum { size = 4 }; // number of SIMD elements
26  union {
27  __m128 m128;
28  float f[4];
29  int i[4];
30  }; // data
31 
35 
36  __forceinline ssef()
37  {
38  }
39  __forceinline ssef(const ssef &other)
40  {
41  m128 = other.m128;
42  }
43  __forceinline ssef &operator=(const ssef &other)
44  {
45  m128 = other.m128;
46  return *this;
47  }
48 
49  __forceinline ssef(const __m128 a) : m128(a)
50  {
51  }
52  __forceinline operator const __m128 &() const
53  {
54  return m128;
55  }
56  __forceinline operator __m128 &()
57  {
58  return m128;
59  }
60 
61  __forceinline ssef(float a) : m128(_mm_set1_ps(a))
62  {
63  }
64  __forceinline ssef(float a, float b, float c, float d) : m128(_mm_setr_ps(a, b, c, d))
65  {
66  }
67 
68  __forceinline explicit ssef(const __m128i a) : m128(_mm_cvtepi32_ps(a))
69  {
70  }
71 
75 
76 # if defined(__KERNEL_AVX__)
77  static __forceinline ssef broadcast(const void *const a)
78  {
79  return _mm_broadcast_ss((float *)a);
80  }
81 # else
82  static __forceinline ssef broadcast(const void *const a)
83  {
84  return _mm_set1_ps(*(float *)a);
85  }
86 # endif
87 
91 
92  __forceinline const float &operator[](const size_t i) const
93  {
94  assert(i < 4);
95  return f[i];
96  }
97  __forceinline float &operator[](const size_t i)
98  {
99  assert(i < 4);
100  return f[i];
101  }
102 };
103 
107 
108 __forceinline const ssef cast(const __m128i &a)
109 {
110  return _mm_castsi128_ps(a);
111 }
112 __forceinline const ssef operator+(const ssef &a)
113 {
114  return a;
115 }
116 __forceinline const ssef operator-(const ssef &a)
117 {
118  return _mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
119 }
120 __forceinline const ssef abs(const ssef &a)
121 {
122  return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)));
123 }
124 # if defined(__KERNEL_SSE41__)
125 __forceinline const ssef sign(const ssef &a)
126 {
127  return _mm_blendv_ps(ssef(1.0f), -ssef(1.0f), _mm_cmplt_ps(a, ssef(0.0f)));
128 }
129 # endif
130 __forceinline const ssef signmsk(const ssef &a)
131 {
132  return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
133 }
134 
135 __forceinline const ssef rcp(const ssef &a)
136 {
137  const ssef r = _mm_rcp_ps(a.m128);
138  return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
139 }
140 __forceinline const ssef sqr(const ssef &a)
141 {
142  return _mm_mul_ps(a, a);
143 }
144 __forceinline const ssef mm_sqrt(const ssef &a)
145 {
146  return _mm_sqrt_ps(a.m128);
147 }
148 __forceinline const ssef rsqrt(const ssef &a)
149 {
150  const ssef r = _mm_rsqrt_ps(a.m128);
151  return _mm_add_ps(
152  _mm_mul_ps(_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f), r),
153  _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set_ps(-0.5f, -0.5f, -0.5f, -0.5f)), r),
154  _mm_mul_ps(r, r)));
155 }
156 
160 
161 __forceinline const ssef operator+(const ssef &a, const ssef &b)
162 {
163  return _mm_add_ps(a.m128, b.m128);
164 }
165 __forceinline const ssef operator+(const ssef &a, const float &b)
166 {
167  return a + ssef(b);
168 }
169 __forceinline const ssef operator+(const float &a, const ssef &b)
170 {
171  return ssef(a) + b;
172 }
173 
174 __forceinline const ssef operator-(const ssef &a, const ssef &b)
175 {
176  return _mm_sub_ps(a.m128, b.m128);
177 }
178 __forceinline const ssef operator-(const ssef &a, const float &b)
179 {
180  return a - ssef(b);
181 }
182 __forceinline const ssef operator-(const float &a, const ssef &b)
183 {
184  return ssef(a) - b;
185 }
186 
187 __forceinline const ssef operator*(const ssef &a, const ssef &b)
188 {
189  return _mm_mul_ps(a.m128, b.m128);
190 }
191 __forceinline const ssef operator*(const ssef &a, const float &b)
192 {
193  return a * ssef(b);
194 }
195 __forceinline const ssef operator*(const float &a, const ssef &b)
196 {
197  return ssef(a) * b;
198 }
199 
200 __forceinline const ssef operator/(const ssef &a, const ssef &b)
201 {
202  return _mm_div_ps(a.m128, b.m128);
203 }
204 __forceinline const ssef operator/(const ssef &a, const float &b)
205 {
206  return a / ssef(b);
207 }
208 __forceinline const ssef operator/(const float &a, const ssef &b)
209 {
210  return ssef(a) / b;
211 }
212 
213 __forceinline const ssef operator^(const ssef &a, const ssef &b)
214 {
215  return _mm_xor_ps(a.m128, b.m128);
216 }
217 __forceinline const ssef operator^(const ssef &a, const ssei &b)
218 {
219  return _mm_xor_ps(a.m128, _mm_castsi128_ps(b.m128));
220 }
221 
222 __forceinline const ssef operator&(const ssef &a, const ssef &b)
223 {
224  return _mm_and_ps(a.m128, b.m128);
225 }
226 __forceinline const ssef operator&(const ssef &a, const ssei &b)
227 {
228  return _mm_and_ps(a.m128, _mm_castsi128_ps(b.m128));
229 }
230 
231 __forceinline const ssef operator|(const ssef &a, const ssef &b)
232 {
233  return _mm_or_ps(a.m128, b.m128);
234 }
235 __forceinline const ssef operator|(const ssef &a, const ssei &b)
236 {
237  return _mm_or_ps(a.m128, _mm_castsi128_ps(b.m128));
238 }
239 
240 __forceinline const ssef andnot(const ssef &a, const ssef &b)
241 {
242  return _mm_andnot_ps(a.m128, b.m128);
243 }
244 
245 __forceinline const ssef min(const ssef &a, const ssef &b)
246 {
247  return _mm_min_ps(a.m128, b.m128);
248 }
249 __forceinline const ssef min(const ssef &a, const float &b)
250 {
251  return _mm_min_ps(a.m128, ssef(b));
252 }
253 __forceinline const ssef min(const float &a, const ssef &b)
254 {
255  return _mm_min_ps(ssef(a), b.m128);
256 }
257 
258 __forceinline const ssef max(const ssef &a, const ssef &b)
259 {
260  return _mm_max_ps(a.m128, b.m128);
261 }
262 __forceinline const ssef max(const ssef &a, const float &b)
263 {
264  return _mm_max_ps(a.m128, ssef(b));
265 }
266 __forceinline const ssef max(const float &a, const ssef &b)
267 {
268  return _mm_max_ps(ssef(a), b.m128);
269 }
270 
271 # if defined(__KERNEL_SSE41__)
272 __forceinline ssef mini(const ssef &a, const ssef &b)
273 {
274  const ssei ai = _mm_castps_si128(a);
275  const ssei bi = _mm_castps_si128(b);
276  const ssei ci = _mm_min_epi32(ai, bi);
277  return _mm_castsi128_ps(ci);
278 }
279 # endif
280 
281 # if defined(__KERNEL_SSE41__)
282 __forceinline ssef maxi(const ssef &a, const ssef &b)
283 {
284  const ssei ai = _mm_castps_si128(a);
285  const ssei bi = _mm_castps_si128(b);
286  const ssei ci = _mm_max_epi32(ai, bi);
287  return _mm_castsi128_ps(ci);
288 }
289 # endif
290 
294 
295 __forceinline const ssef madd(const ssef &a, const ssef &b, const ssef &c)
296 {
297 # if defined(__KERNEL_NEON__)
298  return vfmaq_f32(c, a, b);
299 # elif defined(__KERNEL_AVX2__)
300  return _mm_fmadd_ps(a, b, c);
301 # else
302  return a * b + c;
303 # endif
304 }
305 __forceinline const ssef msub(const ssef &a, const ssef &b, const ssef &c)
306 {
307 # if defined(__KERNEL_NEON__)
308  return vfmaq_f32(vnegq_f32(c), a, b);
309 # elif defined(__KERNEL_AVX2__)
310  return _mm_fmsub_ps(a, b, c);
311 # else
312  return a * b - c;
313 # endif
314 }
315 __forceinline const ssef nmadd(const ssef &a, const ssef &b, const ssef &c)
316 {
317 # if defined(__KERNEL_NEON__)
318  return vfmsq_f32(c, a, b);
319 # elif defined(__KERNEL_AVX2__)
320  return _mm_fnmadd_ps(a, b, c);
321 # else
322  return c - a * b;
323 # endif
324 }
325 __forceinline const ssef nmsub(const ssef &a, const ssef &b, const ssef &c)
326 {
327 # if defined(__KERNEL_NEON__)
328  return vfmsq_f32(vnegq_f32(c), a, b);
329 # elif defined(__KERNEL_AVX2__)
330  return _mm_fnmsub_ps(a, b, c);
331 # else
332  return -a * b - c;
333 # endif
334 }
335 
339 
340 __forceinline ssef &operator+=(ssef &a, const ssef &b)
341 {
342  return a = a + b;
343 }
344 __forceinline ssef &operator+=(ssef &a, const float &b)
345 {
346  return a = a + b;
347 }
348 
349 __forceinline ssef &operator-=(ssef &a, const ssef &b)
350 {
351  return a = a - b;
352 }
353 __forceinline ssef &operator-=(ssef &a, const float &b)
354 {
355  return a = a - b;
356 }
357 
358 __forceinline ssef &operator*=(ssef &a, const ssef &b)
359 {
360  return a = a * b;
361 }
362 __forceinline ssef &operator*=(ssef &a, const float &b)
363 {
364  return a = a * b;
365 }
366 
367 __forceinline ssef &operator/=(ssef &a, const ssef &b)
368 {
369  return a = a / b;
370 }
371 __forceinline ssef &operator/=(ssef &a, const float &b)
372 {
373  return a = a / b;
374 }
375 
379 
380 __forceinline const sseb operator==(const ssef &a, const ssef &b)
381 {
382  return _mm_cmpeq_ps(a.m128, b.m128);
383 }
384 __forceinline const sseb operator==(const ssef &a, const float &b)
385 {
386  return a == ssef(b);
387 }
388 __forceinline const sseb operator==(const float &a, const ssef &b)
389 {
390  return ssef(a) == b;
391 }
392 
393 __forceinline const sseb operator!=(const ssef &a, const ssef &b)
394 {
395  return _mm_cmpneq_ps(a.m128, b.m128);
396 }
397 __forceinline const sseb operator!=(const ssef &a, const float &b)
398 {
399  return a != ssef(b);
400 }
401 __forceinline const sseb operator!=(const float &a, const ssef &b)
402 {
403  return ssef(a) != b;
404 }
405 
406 __forceinline const sseb operator<(const ssef &a, const ssef &b)
407 {
408  return _mm_cmplt_ps(a.m128, b.m128);
409 }
410 __forceinline const sseb operator<(const ssef &a, const float &b)
411 {
412  return a < ssef(b);
413 }
414 __forceinline const sseb operator<(const float &a, const ssef &b)
415 {
416  return ssef(a) < b;
417 }
418 
419 __forceinline const sseb operator>=(const ssef &a, const ssef &b)
420 {
421  return _mm_cmpnlt_ps(a.m128, b.m128);
422 }
423 __forceinline const sseb operator>=(const ssef &a, const float &b)
424 {
425  return a >= ssef(b);
426 }
427 __forceinline const sseb operator>=(const float &a, const ssef &b)
428 {
429  return ssef(a) >= b;
430 }
431 
432 __forceinline const sseb operator>(const ssef &a, const ssef &b)
433 {
434  return _mm_cmpnle_ps(a.m128, b.m128);
435 }
436 __forceinline const sseb operator>(const ssef &a, const float &b)
437 {
438  return a > ssef(b);
439 }
440 __forceinline const sseb operator>(const float &a, const ssef &b)
441 {
442  return ssef(a) > b;
443 }
444 
445 __forceinline const sseb operator<=(const ssef &a, const ssef &b)
446 {
447  return _mm_cmple_ps(a.m128, b.m128);
448 }
449 __forceinline const sseb operator<=(const ssef &a, const float &b)
450 {
451  return a <= ssef(b);
452 }
453 __forceinline const sseb operator<=(const float &a, const ssef &b)
454 {
455  return ssef(a) <= b;
456 }
457 
458 __forceinline const ssef select(const sseb &m, const ssef &t, const ssef &f)
459 {
460 # ifdef __KERNEL_SSE41__
461  return _mm_blendv_ps(f, t, m);
462 # else
463  return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
464 # endif
465 }
466 
467 __forceinline const ssef select(const ssef &m, const ssef &t, const ssef &f)
468 {
469 # ifdef __KERNEL_SSE41__
470  return _mm_blendv_ps(f, t, m);
471 # else
472  return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
473 # endif
474 }
475 
476 __forceinline const ssef select(const int mask, const ssef &t, const ssef &f)
477 {
478 # if defined(__KERNEL_SSE41__) && \
479  ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER))
480  return _mm_blend_ps(f, t, mask);
481 # else
482  return select(sseb(mask), t, f);
483 # endif
484 }
485 
489 
490 # if defined(__KERNEL_SSE41__)
491 __forceinline const ssef round_even(const ssef &a)
492 {
493 # ifdef __KERNEL_NEON__
494  return vrndnq_f32(a);
495 # else
496  return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT);
497 # endif
498 }
499 __forceinline const ssef round_down(const ssef &a)
500 {
501 # ifdef __KERNEL_NEON__
502  return vrndmq_f32(a);
503 # else
504  return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF);
505 # endif
506 }
507 __forceinline const ssef round_up(const ssef &a)
508 {
509 # ifdef __KERNEL_NEON__
510  return vrndpq_f32(a);
511 # else
512  return _mm_round_ps(a, _MM_FROUND_TO_POS_INF);
513 # endif
514 }
515 __forceinline const ssef round_zero(const ssef &a)
516 {
517 # ifdef __KERNEL_NEON__
518  return vrndq_f32(a);
519 # else
520  return _mm_round_ps(a, _MM_FROUND_TO_ZERO);
521 # endif
522 }
523 __forceinline const ssef floor(const ssef &a)
524 {
525 # ifdef __KERNEL_NEON__
526  return vrndmq_f32(a);
527 # else
528  return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF);
529 # endif
530 }
531 __forceinline const ssef ceil(const ssef &a)
532 {
533 # ifdef __KERNEL_NEON__
534  return vrndpq_f32(a);
535 # else
536  return _mm_round_ps(a, _MM_FROUND_TO_POS_INF);
537 # endif
538 }
539 # else
540 /* Non-SSE4.1 fallback, needed for floorfrac. */
541 __forceinline const ssef floor(const ssef &a)
542 {
543  return _mm_set_ps(floorf(a.f[3]), floorf(a.f[2]), floorf(a.f[1]), floorf(a.f[0]));
544 }
545 # endif
546 
547 __forceinline ssei truncatei(const ssef &a)
548 {
549  return _mm_cvttps_epi32(a.m128);
550 }
551 
552 __forceinline ssef floorfrac(const ssef &x, ssei *i)
553 {
554  ssef f = floor(x);
555  *i = truncatei(f);
556  return x - f;
557 }
558 
562 
563 __forceinline ssef mix(const ssef &a, const ssef &b, const ssef &t)
564 {
565  return madd(t, b, (ssef(1.0f) - t) * a);
566 }
567 
571 
572 __forceinline ssef unpacklo(const ssef &a, const ssef &b)
573 {
574  return _mm_unpacklo_ps(a.m128, b.m128);
575 }
576 __forceinline ssef unpackhi(const ssef &a, const ssef &b)
577 {
578  return _mm_unpackhi_ps(a.m128, b.m128);
579 }
580 
581 template<size_t i0, size_t i1, size_t i2, size_t i3>
582 __forceinline const ssef shuffle(const ssef &b)
583 {
584 # ifdef __KERNEL_NEON__
585  return shuffle_neon<float32x4_t, i0, i1, i2, i3>(b.m128);
586 # else
587  return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0)));
588 # endif
589 }
590 
591 template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a)
592 {
593  return _mm_movelh_ps(a, a);
594 }
595 
596 template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a)
597 {
598  return _mm_movehl_ps(a, a);
599 }
600 
601 template<size_t i0, size_t i1, size_t i2, size_t i3>
602 __forceinline const ssef shuffle(const ssef &a, const ssef &b)
603 {
604 # ifdef __KERNEL_NEON__
605  return shuffle_neon<float32x4_t, i0, i1, i2, i3>(a, b);
606 # else
607  return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
608 # endif
609 }
610 
611 template<size_t i0> __forceinline const ssef shuffle(const ssef &a, const ssef &b)
612 {
613 # ifdef __KERNEL_NEON__
614  return shuffle_neon<float32x4_t, i0, i0, i0, i0>(a, b);
615 # else
616  return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0));
617 # endif
618 }
619 
620 # ifndef __KERNEL_NEON__
621 template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a, const ssef &b)
622 {
623  return _mm_movelh_ps(a, b);
624 }
625 
626 template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a, const ssef &b)
627 {
628  return _mm_movehl_ps(b, a);
629 }
630 # endif
631 
632 # if defined(__KERNEL_SSSE3__)
633 __forceinline const ssef shuffle8(const ssef &a, const ssei &shuf)
634 {
635  return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf));
636 }
637 # endif
638 
639 # if defined(__KERNEL_SSE3__)
640 template<> __forceinline const ssef shuffle<0, 0, 2, 2>(const ssef &b)
641 {
642  return _mm_moveldup_ps(b);
643 }
644 template<> __forceinline const ssef shuffle<1, 1, 3, 3>(const ssef &b)
645 {
646  return _mm_movehdup_ps(b);
647 }
648 # endif
649 
650 template<size_t i0> __forceinline const ssef shuffle(const ssef &b)
651 {
652  return shuffle<i0, i0, i0, i0>(b);
653 }
654 
655 # if defined(__KERNEL_AVX__)
656 __forceinline const ssef shuffle(const ssef &a, const ssei &shuf)
657 {
658  return _mm_permutevar_ps(a, shuf);
659 }
660 # endif
661 
662 template<size_t i> __forceinline float extract(const ssef &a)
663 {
664  return _mm_cvtss_f32(shuffle<i, i, i, i>(a));
665 }
666 template<> __forceinline float extract<0>(const ssef &a)
667 {
668  return _mm_cvtss_f32(a);
669 }
670 
671 # if defined(__KERNEL_SSE41__)
672 template<size_t dst, size_t src, size_t clr>
673 __forceinline const ssef insert(const ssef &a, const ssef &b)
674 {
675 # ifdef __KERNEL_NEON__
676  ssef res = a;
677  if (clr)
678  res[dst] = 0;
679  else
680  res[dst] = b[src];
681  return res;
682 # else
683  return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr);
684 # endif
685 }
686 template<size_t dst, size_t src> __forceinline const ssef insert(const ssef &a, const ssef &b)
687 {
688  return insert<dst, src, 0>(a, b);
689 }
690 template<size_t dst> __forceinline const ssef insert(const ssef &a, const float b)
691 {
692  return insert<dst, 0>(a, _mm_set_ss(b));
693 }
694 # else
695 template<size_t dst> __forceinline const ssef insert(const ssef &a, const float b)
696 {
697  ssef c = a;
698  c[dst] = b;
699  return c;
700 }
701 # endif
702 
706 
707 __forceinline void transpose(const ssef &r0,
708  const ssef &r1,
709  const ssef &r2,
710  const ssef &r3,
711  ssef &c0,
712  ssef &c1,
713  ssef &c2,
714  ssef &c3)
715 {
716  ssef l02 = unpacklo(r0, r2);
717  ssef h02 = unpackhi(r0, r2);
718  ssef l13 = unpacklo(r1, r3);
719  ssef h13 = unpackhi(r1, r3);
720  c0 = unpacklo(l02, l13);
721  c1 = unpackhi(l02, l13);
722  c2 = unpacklo(h02, h13);
723  c3 = unpackhi(h02, h13);
724 }
725 
727  const ssef &r0, const ssef &r1, const ssef &r2, const ssef &r3, ssef &c0, ssef &c1, ssef &c2)
728 {
729  ssef l02 = unpacklo(r0, r2);
730  ssef h02 = unpackhi(r0, r2);
731  ssef l13 = unpacklo(r1, r3);
732  ssef h13 = unpackhi(r1, r3);
733  c0 = unpacklo(l02, l13);
734  c1 = unpackhi(l02, l13);
735  c2 = unpacklo(h02, h13);
736 }
737 
741 
742 __forceinline const ssef vreduce_min(const ssef &v)
743 {
744 # ifdef __KERNEL_NEON__
745  return vdupq_n_f32(vminvq_f32(v));
746 # else
747  ssef h = min(shuffle<1, 0, 3, 2>(v), v);
748  return min(shuffle<2, 3, 0, 1>(h), h);
749 # endif
750 }
751 __forceinline const ssef vreduce_max(const ssef &v)
752 {
753 # ifdef __KERNEL_NEON__
754  return vdupq_n_f32(vmaxvq_f32(v));
755 # else
756  ssef h = max(shuffle<1, 0, 3, 2>(v), v);
757  return max(shuffle<2, 3, 0, 1>(h), h);
758 # endif
759 }
760 __forceinline const ssef vreduce_add(const ssef &v)
761 {
762 # ifdef __KERNEL_NEON__
763  return vdupq_n_f32(vaddvq_f32(v));
764 # else
765  ssef h = shuffle<1, 0, 3, 2>(v) + v;
766  return shuffle<2, 3, 0, 1>(h) + h;
767 # endif
768 }
769 
770 __forceinline float reduce_min(const ssef &v)
771 {
772 # ifdef __KERNEL_NEON__
773  return vminvq_f32(v);
774 # else
775  return _mm_cvtss_f32(vreduce_min(v));
776 # endif
777 }
778 __forceinline float reduce_max(const ssef &v)
779 {
780 # ifdef __KERNEL_NEON__
781  return vmaxvq_f32(v);
782 # else
783  return _mm_cvtss_f32(vreduce_max(v));
784 # endif
785 }
786 __forceinline float reduce_add(const ssef &v)
787 {
788 # ifdef __KERNEL_NEON__
789  return vaddvq_f32(v);
790 # else
791  return _mm_cvtss_f32(vreduce_add(v));
792 # endif
793 }
794 
795 __forceinline uint32_t select_min(const ssef &v)
796 {
797  return __bsf(movemask(v == vreduce_min(v)));
798 }
799 __forceinline uint32_t select_max(const ssef &v)
800 {
801  return __bsf(movemask(v == vreduce_max(v)));
802 }
803 
804 __forceinline uint32_t select_min(const sseb &valid, const ssef &v)
805 {
806  const ssef a = select(valid, v, ssef(pos_inf));
807  return __bsf(movemask(valid & (a == vreduce_min(a))));
808 }
809 __forceinline uint32_t select_max(const sseb &valid, const ssef &v)
810 {
811  const ssef a = select(valid, v, ssef(neg_inf));
812  return __bsf(movemask(valid & (a == vreduce_max(a))));
813 }
814 
815 __forceinline uint32_t movemask(const ssef &a)
816 {
817  return _mm_movemask_ps(a);
818 }
819 
823 
824 __forceinline ssef load4f(const float4 &a)
825 {
826 # ifdef __KERNEL_WITH_SSE_ALIGN__
827  return _mm_load_ps(&a.x);
828 # else
829  return _mm_loadu_ps(&a.x);
830 # endif
831 }
832 
833 __forceinline ssef load4f(const float3 &a)
834 {
835 # ifdef __KERNEL_WITH_SSE_ALIGN__
836  return _mm_load_ps(&a.x);
837 # else
838  return _mm_loadu_ps(&a.x);
839 # endif
840 }
841 
842 __forceinline ssef load4f(const void *const a)
843 {
844  return _mm_load_ps((float *)a);
845 }
846 
847 __forceinline ssef load1f_first(const float a)
848 {
849  return _mm_set_ss(a);
850 }
851 
852 __forceinline void store4f(void *ptr, const ssef &v)
853 {
854  _mm_store_ps((float *)ptr, v);
855 }
856 
857 __forceinline ssef loadu4f(const void *const a)
858 {
859  return _mm_loadu_ps((float *)a);
860 }
861 
862 __forceinline void storeu4f(void *ptr, const ssef &v)
863 {
864  _mm_storeu_ps((float *)ptr, v);
865 }
866 
867 __forceinline void store4f(const sseb &mask, void *ptr, const ssef &f)
868 {
869 # if defined(__KERNEL_AVX__)
870  _mm_maskstore_ps((float *)ptr, (__m128i)mask, f);
871 # else
872  *(ssef *)ptr = select(mask, f, *(ssef *)ptr);
873 # endif
874 }
875 
876 __forceinline ssef load4f_nt(void *ptr)
877 {
878 # if defined(__KERNEL_SSE41__)
879  return _mm_castsi128_ps(_mm_stream_load_si128((__m128i *)ptr));
880 # else
881  return _mm_load_ps((float *)ptr);
882 # endif
883 }
884 
885 __forceinline void store4f_nt(void *ptr, const ssef &v)
886 {
887 # if defined(__KERNEL_SSE41__)
888  _mm_stream_ps((float *)ptr, v);
889 # else
890  _mm_store_ps((float *)ptr, v);
891 # endif
892 }
893 
897 
898 __forceinline float dot(const ssef &a, const ssef &b)
899 {
900  return reduce_add(a * b);
901 }
902 
903 /* calculate shuffled cross product, useful when order of components does not matter */
904 __forceinline ssef cross_zxy(const ssef &a, const ssef &b)
905 {
906  const ssef a0 = a;
907  const ssef b0 = shuffle<1, 2, 0, 3>(b);
908  const ssef a1 = shuffle<1, 2, 0, 3>(a);
909  const ssef b1 = b;
910  return msub(a0, b0, a1 * b1);
911 }
912 
913 __forceinline ssef cross(const ssef &a, const ssef &b)
914 {
915  return shuffle<1, 2, 0, 3>(cross_zxy(a, b));
916 }
917 
918 ccl_device_inline const ssef dot3_splat(const ssef &a, const ssef &b)
919 {
920 # ifdef __KERNEL_SSE41__
921  return _mm_dp_ps(a.m128, b.m128, 0x7f);
922 # else
923  ssef t = a * b;
924  return ssef(((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2]);
925 # endif
926 }
927 
928 /* squared length taking only specified axes into account */
929 template<size_t X, size_t Y, size_t Z, size_t W> ccl_device_inline float len_squared(const ssef &a)
930 {
931 # ifndef __KERNEL_SSE41__
932  float4 &t = (float4 &)a;
933  return (X ? t.x * t.x : 0.0f) + (Y ? t.y * t.y : 0.0f) + (Z ? t.z * t.z : 0.0f) +
934  (W ? t.w * t.w : 0.0f);
935 # else
936  return extract<0>(
937  ssef(_mm_dp_ps(a.m128, a.m128, (X << 4) | (Y << 5) | (Z << 6) | (W << 7) | 0xf)));
938 # endif
939 }
940 
941 ccl_device_inline float dot3(const ssef &a, const ssef &b)
942 {
943 # ifdef __KERNEL_SSE41__
944  return extract<0>(ssef(_mm_dp_ps(a.m128, b.m128, 0x7f)));
945 # else
946  ssef t = a * b;
947  return ((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2];
948 # endif
949 }
950 
951 ccl_device_inline const ssef len3_squared_splat(const ssef &a)
952 {
953  return dot3_splat(a, a);
954 }
955 
956 ccl_device_inline float len3_squared(const ssef &a)
957 {
958  return dot3(a, a);
959 }
960 
961 ccl_device_inline float len3(const ssef &a)
962 {
963  return extract<0>(mm_sqrt(dot3_splat(a, a)));
964 }
965 
966 /* SSE shuffle utility functions */
967 
968 # ifdef __KERNEL_SSSE3__
969 
970 /* faster version for SSSE3 */
971 typedef ssei shuffle_swap_t;
972 
973 ccl_device_inline shuffle_swap_t shuffle_swap_identity()
974 {
975  return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
976 }
977 
978 ccl_device_inline shuffle_swap_t shuffle_swap_swap()
979 {
980  return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
981 }
982 
983 ccl_device_inline const ssef shuffle_swap(const ssef &a, const shuffle_swap_t &shuf)
984 {
985  return cast(_mm_shuffle_epi8(cast(a), shuf));
986 }
987 
988 # else
989 
990 /* somewhat slower version for SSE2 */
991 typedef int shuffle_swap_t;
992 
993 ccl_device_inline shuffle_swap_t shuffle_swap_identity()
994 {
995  return 0;
996 }
997 
998 ccl_device_inline shuffle_swap_t shuffle_swap_swap()
999 {
1000  return 1;
1001 }
1002 
1003 ccl_device_inline const ssef shuffle_swap(const ssef &a, shuffle_swap_t shuf)
1004 {
1005  /* shuffle value must be a constant, so we need to branch */
1006  if (shuf)
1007  return shuffle<1, 0, 3, 2>(a);
1008  else
1009  return shuffle<3, 2, 1, 0>(a);
1010 }
1011 
1012 # endif
1013 
1014 # if defined(__KERNEL_SSE41__) && !defined(__KERNEL_NEON__)
1015 
1016 ccl_device_inline void gen_idirsplat_swap(const ssef &pn,
1017  const shuffle_swap_t &shuf_identity,
1018  const shuffle_swap_t &shuf_swap,
1019  const float3 &idir,
1020  ssef idirsplat[3],
1021  shuffle_swap_t shufflexyz[3])
1022 {
1023  const __m128 idirsplat_raw[] = {_mm_set_ps1(idir.x), _mm_set_ps1(idir.y), _mm_set_ps1(idir.z)};
1024  idirsplat[0] = _mm_xor_ps(idirsplat_raw[0], pn);
1025  idirsplat[1] = _mm_xor_ps(idirsplat_raw[1], pn);
1026  idirsplat[2] = _mm_xor_ps(idirsplat_raw[2], pn);
1027 
1028  const ssef signmask = cast(ssei(0x80000000));
1029  const ssef shuf_identity_f = cast(shuf_identity);
1030  const ssef shuf_swap_f = cast(shuf_swap);
1031 
1032  shufflexyz[0] = _mm_castps_si128(
1033  _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[0], signmask)));
1034  shufflexyz[1] = _mm_castps_si128(
1035  _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[1], signmask)));
1036  shufflexyz[2] = _mm_castps_si128(
1037  _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[2], signmask)));
1038 }
1039 
1040 # else
1041 
1042 ccl_device_inline void gen_idirsplat_swap(const ssef &pn,
1043  const shuffle_swap_t &shuf_identity,
1044  const shuffle_swap_t &shuf_swap,
1045  const float3 &idir,
1046  ssef idirsplat[3],
1047  shuffle_swap_t shufflexyz[3])
1048 {
1049  idirsplat[0] = ssef(idir.x) ^ pn;
1050  idirsplat[1] = ssef(idir.y) ^ pn;
1051  idirsplat[2] = ssef(idir.z) ^ pn;
1052 
1053  shufflexyz[0] = (idir.x >= 0) ? shuf_identity : shuf_swap;
1054  shufflexyz[1] = (idir.y >= 0) ? shuf_identity : shuf_swap;
1055  shufflexyz[2] = (idir.z >= 0) ? shuf_identity : shuf_swap;
1056 }
1057 
1058 # endif
1059 
1060 ccl_device_inline const ssef uint32_to_float(const ssei &in)
1061 {
1062  ssei a = _mm_srli_epi32(in, 16);
1063  ssei b = _mm_and_si128(in, _mm_set1_epi32(0x0000ffff));
1064  ssei c = _mm_or_si128(a, _mm_set1_epi32(0x53000000));
1065  ssef d = _mm_cvtepi32_ps(b);
1066  ssef e = _mm_sub_ps(_mm_castsi128_ps(c), _mm_castsi128_ps(_mm_set1_epi32(0x53000000)));
1067  return _mm_add_ps(e, d);
1068 }
1069 
1070 template<size_t S1, size_t S2, size_t S3, size_t S4>
1071 ccl_device_inline const ssef set_sign_bit(const ssef &a)
1072 {
1073  return cast(cast(a) ^ ssei(S1 << 31, S2 << 31, S3 << 31, S4 << 31));
1074 }
1075 
1079 
1080 ccl_device_inline void print_ssef(const char *label, const ssef &a)
1081 {
1082  printf(
1083  "%s: %.8f %.8f %.8f %.8f\n", label, (double)a[0], (double)a[1], (double)a[2], (double)a[3]);
1084 }
1085 
1086 #endif
1087 
1089 
1090 #endif
struct Mask Mask
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble GLdouble r _GL_VOID_RET _GL_VOID GLfloat GLfloat r _GL_VOID_RET _GL_VOID GLint GLint r _GL_VOID_RET _GL_VOID GLshort GLshort r _GL_VOID_RET _GL_VOID GLdouble GLdouble r
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint i1
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
float float4[4]
#define Z
Definition: GeomUtils.cpp:201
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object for example to give variation to multiple instances of an object Point Retrieve information about points in a point cloud Retrieve the edges of an object as it appears to Cycles topology will always appear triangulated Convert a blackbody temperature to an RGB value Normal Generate a perturbed normal from an RGB normal map image Typically used for faking highly detailed surfaces Generate an OSL shader from a file or text data block Image Sample an image file as a texture Sky Generate a procedural sky texture Noise Generate fractal Perlin noise Wave Generate procedural bands or rings with noise Voronoi Generate Worley noise based on the distance to random points Typically used to generate textures such as or biological cells Brick Generate a procedural texture producing bricks Texture Retrieve multiple types of texture coordinates nTypically used as inputs for texture nodes Vector Convert a or normal between and object coordinate space Combine Create a color from its and value channels Color Retrieve a color or the default fallback if none is specified Separate Split a vector into its X
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object for example to give variation to multiple instances of an object Point Retrieve information about points in a point cloud Retrieve the edges of an object as it appears to Cycles topology will always appear triangulated Convert a blackbody temperature to an RGB value Normal Generate a perturbed normal from an RGB normal map image Typically used for faking highly detailed surfaces Generate an OSL shader from a file or text data block Image Sample an image file as a texture Sky Generate a procedural sky texture Noise Generate fractal Perlin noise Wave Generate procedural bands or rings with noise Voronoi Generate Worley noise based on the distance to random points Typically used to generate textures such as or biological cells Brick Generate a procedural texture producing bricks Texture Retrieve multiple types of texture coordinates nTypically used as inputs for texture nodes Vector Convert a or normal between and object coordinate space Combine Create a color from its and value channels Color Retrieve a color or the default fallback if none is specified Separate Split a vector into its Y
__forceinline uint32_t movemask(const avxb &a)
Definition: avxb.h:214
__forceinline const avxb operator&(const avxb &a, const avxb &b)
Binary Operators.
Definition: avxb.h:100
__forceinline const avxb unpacklo(const avxb &a, const avxb &b)
Movement/Shifting/Shuffling Functions.
Definition: avxb.h:167
__forceinline const avxb operator|(const avxb &a, const avxb &b)
Definition: avxb.h:104
__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
Definition: avxb.h:154
__forceinline const avxb unpackhi(const avxb &a, const avxb &b)
Definition: avxb.h:171
__forceinline avxf maxi(const avxf &a, const avxf &b)
Definition: avxf.h:305
__forceinline avxf mini(const avxf &a, const avxf &b)
Definition: avxf.h:311
__forceinline const avxf madd(const avxf &a, const avxf &b, const avxf &c)
Ternary Operators.
Definition: avxf.h:321
__forceinline const avxf msub(const avxf &a, const avxf &b, const avxf &c)
Definition: avxf.h:338
ccl_device_inline const avxf set_sign_bit(const avxf &a)
Definition: avxf.h:286
__forceinline const avxf nmadd(const avxf &a, const avxf &b, const avxf &c)
Definition: avxf.h:330
__forceinline float extract< 0 >(const avxf &a)
Definition: avxf.h:259
__forceinline void dot3(const avxf &a, const avxf &b, float &den, float &den2)
Definition: avxf.h:119
__forceinline const avxi shuffle< 0, 0, 2, 2 >(const avxi &b)
Definition: avxi.h:612
__forceinline avxi & operator-=(avxi &a, const avxi &b)
Definition: avxi.h:394
__forceinline const avxi shuffle< 0, 1, 0, 1 >(const avxi &b)
Definition: avxi.h:620
__forceinline int reduce_max(const avxi &v)
Definition: avxi.h:692
__forceinline uint32_t select_max(const avxi &v)
Definition: avxi.h:705
__forceinline const avxi vreduce_add(const avxi &v)
Definition: avxi.h:682
__forceinline int reduce_min(const avxi &v)
Definition: avxi.h:688
__forceinline const avxi shuffle< 1, 1, 3, 3 >(const avxi &b)
Definition: avxi.h:616
__forceinline const avxi vreduce_min(const avxi &v)
Definition: avxi.h:652
__forceinline const avxi broadcast(const int *ptr)
Definition: avxi.h:626
__forceinline avxi & operator+=(avxi &a, const avxi &b)
Assignment Operators.
Definition: avxi.h:385
__forceinline const avxi vreduce_max(const avxi &v)
Definition: avxi.h:667
__forceinline int reduce_add(const avxi &v)
Definition: avxi.h:696
__forceinline uint32_t select_min(const avxi &v)
Definition: avxi.h:701
__forceinline avxi & operator*=(avxi &a, const avxi &b)
Definition: avxi.h:403
__forceinline float extract(const int4 &b)
Definition: binning.cpp:32
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
ATTR_WARN_UNUSED_RESULT const BMVert * v
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
btGeneric6DofConstraint & operator=(btGeneric6DofConstraint &other)
btMatrix3x3 transpose() const
Return the transpose of the matrix.
SIMD_FORCE_INLINE btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:157
#define ccl_device_inline
Definition: cuda/compat.h:34
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
const char * label
SyclQueue void void * src
#define mix(a, b, c)
Definition: hash.h:17
static void shuffle(float2 points[], int size, int rng_seed)
Definition: jitter.cpp:230
ccl_device_inline float2 operator/=(float2 &a, const float2 &b)
Definition: math_float2.h:144
ccl_device_inline float3 ceil(const float3 &a)
Definition: math_float3.h:363
ccl_device_inline float3 rcp(const float3 &a)
Definition: math_float3.h:377
ccl_device_inline float len_squared(const float3 a)
Definition: math_float3.h:423
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
Definition: math_float4.h:513
#define floorf(x)
Definition: metal/compat.h:224
U * cast(T *in)
Definition: Cast.h:13
Matrix< T, M, N > operator-(const Matrix< T, M, N > &m1, const Matrix< T, M, N > &m2)
Definition: VecMat.h:908
Matrix< T, M, N > operator/(const Matrix< T, M, N > &m1, const typename Matrix< T, M, N >::value_type lambda)
Definition: VecMat.h:934
Vec< T, N > operator*(const typename Vec< T, N >::value_type r, const Vec< T, N > &v)
Definition: VecMat.h:844
static unsigned c
Definition: RandGen.cpp:83
static unsigned a[3]
Definition: RandGen.cpp:78
double sign(double arg)
Definition: utility.h:250
Insertion insert(const float3 &point_prev, const float3 &handle_prev, const float3 &handle_next, const float3 &point_next, float parameter)
Definition: curve_bezier.cc:61
bool operator==(const AttributeIDRef &a, const AttributeIDRef &b)
GPUState operator^(const GPUState &a, const GPUState &b)
T dot(const vec_base< T, Size > &a, const vec_base< T, Size > &b)
vec_base< T, 3 > cross(const vec_base< T, 3 > &a, const vec_base< T, 3 > &b)
T floor(const T &a)
T abs(const T &a)
constexpr bool operator!=(StringRef a, StringRef b)
constexpr bool operator>=(StringRef a, StringRef b)
constexpr bool operator<(StringRef a, StringRef b)
constexpr bool operator<=(StringRef a, StringRef b)
constexpr bool operator>(StringRef a, StringRef b)
std::string operator+(StringRef a, StringRef b)
static const pxr::TfToken b("b", pxr::TfToken::Immortal)
#define __forceinline
CCL_NAMESPACE_BEGIN __forceinline uint32_t __bsf(const uint32_t x)
Definition: simd.h:377
#define min(a, b)
Definition: sort.c:35
unsigned int uint32_t
Definition: stdint.h:80
float z
float y
float x
float max
ccl_device_inline float sqr(float a)
Definition: util/math.h:746
ccl_device_inline float floorfrac(float x, ccl_private int *i)
Definition: util/math.h:420
ccl_device_inline size_t round_down(size_t x, size_t multiple)
Definition: util/types.h:61
ccl_device_inline size_t round_up(size_t x, size_t multiple)
Definition: util/types.h:56
PointerRNA * ptr
Definition: wm_files.c:3480