Blender  V3.3
half.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 #ifndef __UTIL_HALF_H__
5 #define __UTIL_HALF_H__
6 
7 #include "util/math.h"
8 #include "util/types.h"
9 
10 #if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__)
11 # include "util/simd.h"
12 #endif
13 
15 
16 /* Half Floats */
17 
18 #if defined(__KERNEL_METAL__)
19 
20 ccl_device_inline float half_to_float(half h_in)
21 {
22  float f;
23  union {
24  half h;
25  uint16_t s;
26  } val;
27  val.h = h_in;
28 
29  *((ccl_private int *)&f) = ((val.s & 0x8000) << 16) | (((val.s & 0x7c00) + 0x1C000) << 13) |
30  ((val.s & 0x03FF) << 13);
31 
32  return f;
33 }
34 
35 #else
36 
37 /* CUDA has its own half data type, no need to define then */
38 # if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__) && !defined(__KERNEL_ONEAPI__)
39 /* Implementing this as a class rather than a typedef so that the compiler can tell it apart from
40  * unsigned shorts. */
41 class half {
42  public:
43  half() : v(0)
44  {
45  }
46  half(const unsigned short &i) : v(i)
47  {
48  }
49  operator unsigned short()
50  {
51  return v;
52  }
53  half &operator=(const unsigned short &i)
54  {
55  v = i;
56  return *this;
57  }
58 
59  private:
60  unsigned short v;
61 };
62 # endif
63 
64 struct half4 {
65  half x, y, z, w;
66 };
67 #endif
68 
69 /* Conversion to/from half float for image textures
70  *
71  * Simplified float to half for fast sampling on processor without a native
72  * instruction, and eliminating any NaN and inf values. */
73 
75 {
76 #if defined(__KERNEL_METAL__) || defined(__KERNEL_ONEAPI__)
77  return half(min(f, 65504.0f));
78 #elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
79  return __float2half(min(f, 65504.0f));
80 #else
81  const uint u = __float_as_uint(f);
82  /* Sign bit, shifted to its position. */
83  uint sign_bit = u & 0x80000000;
84  sign_bit >>= 16;
85  /* Exponent. */
86  uint exponent_bits = u & 0x7f800000;
87  /* Non-sign bits. */
88  uint value_bits = u & 0x7fffffff;
89  value_bits >>= 13; /* Align mantissa on MSB. */
90  value_bits -= 0x1c000; /* Adjust bias. */
91  /* Flush-to-zero. */
92  value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits;
93  /* Clamp-to-max. */
94  value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits;
95  /* Denormals-as-zero. */
96  value_bits = (exponent_bits == 0 ? 0 : value_bits);
97  /* Re-insert sign bit and return. */
98  return (value_bits | sign_bit);
99 #endif
100 }
101 
103 {
104 #if defined(__KERNEL_METAL__)
105  return half_to_float(h);
106 #elif defined(__KERNEL_ONEAPI__)
107  return float(h);
108 #elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
109  return __half2float(h);
110 #else
111  const int x = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13);
112  return __int_as_float(x);
113 #endif
114 }
115 
117 {
118  /* Unable to use because it gives different results half_to_float_image, can we
119  * modify float_to_half_image so the conversion results are identical? */
120 #if 0 /* defined(__KERNEL_AVX2__) */
121  /* CPU: AVX. */
122  __m128i x = _mm_castpd_si128(_mm_load_sd((const double *)&h));
123  return float4(_mm_cvtph_ps(x));
124 #endif
125 
130  return f;
131 }
132 
133 /* Conversion to half float texture for display.
134  *
135  * Simplified float to half for fast display texture conversion on processors
136  * without a native instruction. Assumes no negative, no NaN, no inf, and sets
137  * denormal to 0. */
138 
140 {
141 #if defined(__KERNEL_METAL__) || defined(__KERNEL_ONEAPI__)
142  return half(min(f, 65504.0f));
143 #elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
144  return __float2half(min(f, 65504.0f));
145 #else
146  const int x = __float_as_int((f > 0.0f) ? ((f < 65504.0f) ? f : 65504.0f) : 0.0f);
147  const int absolute = x & 0x7FFFFFFF;
148  const int Z = absolute + 0xC8000000;
149  const int result = (absolute < 0x38800000) ? 0 : Z;
150  const int rshift = (result >> 13);
151  return (rshift & 0x7FFF);
152 #endif
153 }
154 
156 {
157 #ifdef __KERNEL_SSE2__
158  /* CPU: SSE and AVX. */
159  ssef x = min(max(load4f(f), 0.0f), 65504.0f);
160 # ifdef __KERNEL_AVX2__
161  ssei rpack = _mm_cvtps_ph(x, 0);
162 # else
163  ssei absolute = cast(x) & 0x7FFFFFFF;
164  ssei Z = absolute + 0xC8000000;
165  ssei result = andnot(absolute < 0x38800000, Z);
166  ssei rshift = (result >> 13) & 0x7FFF;
167  ssei rpack = _mm_packs_epi32(rshift, rshift);
168 # endif
169  half4 h;
170  _mm_storel_pi((__m64 *)&h, _mm_castsi128_ps(rpack));
171  return h;
172 #else
173  /* GPU and scalar fallback. */
174  const half4 h = {float_to_half_display(f.x),
177  float_to_half_display(f.w)};
178  return h;
179 #endif
180 }
181 
183 
184 #endif /* __UTIL_HALF_H__ */
typedef float(TangentPoint)[2]
unsigned int uint
Definition: BLI_sys_types.h:67
float float4[4]
#define Z
Definition: GeomUtils.cpp:201
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
Definition: btMatrix3x3.h:1028
Definition: half.h:41
half()
Definition: half.h:43
half(const unsigned short &i)
Definition: half.h:46
half & operator=(const unsigned short &i)
Definition: half.h:53
#define ccl_private
Definition: cuda/compat.h:48
unsigned short half
Definition: cuda/compat.h:110
__device__ float __half2float(const half h)
Definition: cuda/compat.h:119
#define ccl_device_inline
Definition: cuda/compat.h:34
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
ccl_device_inline half float_to_half_display(const float f)
Definition: half.h:139
ccl_device_inline half float_to_half_image(float f)
Definition: half.h:74
ccl_device_inline float4 half4_to_float4_image(const half4 h)
Definition: half.h:116
ccl_device_inline float half_to_float_image(half h)
Definition: half.h:102
ccl_device_inline half4 float4_to_half4_display(const float4 f)
Definition: half.h:155
#define __float2half(x)
Definition: metal/compat.h:217
#define make_float4(x, y, z, w)
Definition: metal/compat.h:205
U * cast(T *in)
Definition: Cast.h:13
#define min(a, b)
Definition: sort.c:35
unsigned short uint16_t
Definition: stdint.h:79
Definition: half.h:64
half x
Definition: half.h:65
half w
Definition: half.h:65
half z
Definition: half.h:65
half y
Definition: half.h:65
float max
ccl_device_inline uint __float_as_uint(float f)
Definition: util/math.h:263
ccl_device_inline int __float_as_int(float f)
Definition: util/math.h:243
ccl_device_inline float __int_as_float(int i)
Definition: util/math.h:253