4 #ifndef __UTIL_HALF_H__
5 #define __UTIL_HALF_H__
10 #if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__)
18 #if defined(__KERNEL_METAL__)
29 *((
ccl_private int *)&f) = ((val.s & 0x8000) << 16) | (((val.s & 0x7c00) + 0x1C000) << 13) |
30 ((val.s & 0x03FF) << 13);
38 # if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__) && !defined(__KERNEL_ONEAPI__)
46 half(
const unsigned short &i) : v(i)
49 operator unsigned short()
76 #if defined(__KERNEL_METAL__) || defined(__KERNEL_ONEAPI__)
78 #elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
83 uint sign_bit = u & 0x80000000;
86 uint exponent_bits = u & 0x7f800000;
88 uint value_bits = u & 0x7fffffff;
90 value_bits -= 0x1c000;
92 value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits;
94 value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits;
96 value_bits = (exponent_bits == 0 ? 0 : value_bits);
98 return (value_bits | sign_bit);
104 #if defined(__KERNEL_METAL__)
105 return half_to_float(h);
106 #elif defined(__KERNEL_ONEAPI__)
108 #elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
111 const int x = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13);
122 __m128i
x = _mm_castpd_si128(_mm_load_sd((
const double *)&h));
123 return float4(_mm_cvtph_ps(
x));
141 #if defined(__KERNEL_METAL__) || defined(__KERNEL_ONEAPI__)
143 #elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
146 const int x =
__float_as_int((f > 0.0f) ? ((f < 65504.0f) ? f : 65504.0f) : 0.0f);
150 const int rshift = (
result >> 13);
151 return (rshift & 0x7FFF);
157 #ifdef __KERNEL_SSE2__
159 ssef
x =
min(
max(load4f(f), 0.0f), 65504.0f);
160 # ifdef __KERNEL_AVX2__
161 ssei rpack = _mm_cvtps_ph(
x, 0);
166 ssei rshift = (
result >> 13) & 0x7FFF;
167 ssei rpack = _mm_packs_epi32(rshift, rshift);
170 _mm_storel_pi((__m64 *)&h, _mm_castsi128_ps(rpack));
typedef float(TangentPoint)[2]
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
half(const unsigned short &i)
half & operator=(const unsigned short &i)
__device__ float __half2float(const half h)
#define ccl_device_inline
#define CCL_NAMESPACE_END
ccl_device_inline half float_to_half_display(const float f)
ccl_device_inline half float_to_half_image(float f)
ccl_device_inline float4 half4_to_float4_image(const half4 h)
ccl_device_inline float half_to_float_image(half h)
ccl_device_inline half4 float4_to_half4_display(const float4 f)
ccl_device_inline uint __float_as_uint(float f)
ccl_device_inline int __float_as_int(float f)
ccl_device_inline float __int_as_float(int i)