Blender  V3.3
oneapi/compat.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2021-2022 Intel Corporation */
3 
4 #pragma once
5 
6 #define __KERNEL_GPU__
7 #define __KERNEL_ONEAPI__
8 
9 #define CCL_NAMESPACE_BEGIN
10 #define CCL_NAMESPACE_END
11 
12 #include <cstdint>
13 
14 #ifndef __NODES_MAX_GROUP__
15 # define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
16 #endif
17 #ifndef __NODES_FEATURES__
18 # define __NODES_FEATURES__ NODE_FEATURE_ALL
19 #endif
20 
21 /* This one does not have an abstraction.
22  * It's used by other devices directly.
23  */
24 
25 #define __device__
26 
27 /* Qualifier wrappers for different names on different devices */
28 
29 #define ccl_device
30 #define ccl_global
31 #define ccl_always_inline __attribute__((always_inline))
32 #define ccl_device_inline inline
33 #define ccl_noinline __attribute__((noinline))
34 #define ccl_inline_constant const constexpr
35 #define ccl_static_constant const
36 #define ccl_device_forceinline __attribute__((always_inline))
37 #define ccl_device_noinline ccl_device ccl_noinline
38 #define ccl_device_noinline_cpu ccl_device
39 #define ccl_device_inline_method ccl_device
40 #define ccl_restrict __restrict__
41 #define ccl_loop_no_unroll
42 #define ccl_optional_struct_init
43 #define ccl_private
44 #define ATTR_FALLTHROUGH __attribute__((fallthrough))
45 #define ccl_constant const
46 #define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
47 #define ccl_align(n) __attribute__((aligned(n)))
48 #define kernel_assert(cond)
49 #define ccl_may_alias
50 
51 /* clang-format off */
52 
53 /* kernel.h adapters */
54 #define ccl_gpu_kernel(block_num_threads, thread_num_registers)
55 #define ccl_gpu_kernel_threads(block_num_threads)
56 
57 #ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
58 # define KG_ND_ITEMS \
59  kg->nd_item_local_id_0 = item.get_local_id(0); \
60  kg->nd_item_local_range_0 = item.get_local_range(0); \
61  kg->nd_item_group_0 = item.get_group(0); \
62  kg->nd_item_group_range_0 = item.get_group_range(0); \
63  kg->nd_item_global_id_0 = item.get_global_id(0); \
64  kg->nd_item_global_range_0 = item.get_global_range(0);
65 #else
66 # define KG_ND_ITEMS
67 #endif
68 
69 #define ccl_gpu_kernel_signature(name, ...) \
70 void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
71  size_t kernel_global_size, \
72  size_t kernel_local_size, \
73  sycl::handler &cgh, \
74  __VA_ARGS__) { \
75  (kg); \
76  cgh.parallel_for<class kernel_##name>( \
77  sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
78  [=](sycl::nd_item<1> item) { \
79  KG_ND_ITEMS
80 
81 #define ccl_gpu_kernel_postfix \
82  }); \
83  }
84 
85 #define ccl_gpu_kernel_call(x) ((ONEAPIKernelContext*)kg)->x
86 
87 #define ccl_gpu_kernel_lambda(func, ...) \
88  struct KernelLambda \
89  { \
90  KernelLambda(const ONEAPIKernelContext *_kg) : kg(_kg) {} \
91  ccl_private const ONEAPIKernelContext *kg; \
92  __VA_ARGS__; \
93  int operator()(const int state) const { return (func); } \
94  } ccl_gpu_kernel_lambda_pass((ONEAPIKernelContext *)kg)
95 
96 /* GPU thread, block, grid size and index */
97 #ifndef WITH_ONEAPI_SYCL_HOST_ENABLED
98 # define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0))
99 # define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0))
100 # define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0))
101 # define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0))
102 # define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
103 # define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
104 
105 # define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0))
106 # define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0))
107 #else
108 # define ccl_gpu_thread_idx_x (kg->nd_item_local_id_0)
109 # define ccl_gpu_block_dim_x (kg->nd_item_local_range_0)
110 # define ccl_gpu_block_idx_x (kg->nd_item_group_0)
111 # define ccl_gpu_grid_dim_x (kg->nd_item_group_range_0)
112 # define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
113 # define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
114 
115 # define ccl_gpu_global_id_x() (kg->nd_item_global_id_0)
116 # define ccl_gpu_global_size_x() (kg->nd_item_global_range_0)
117 #endif
118 
119 
120 /* GPU warp synchronization */
121 
122 #define ccl_gpu_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier()
123 #define ccl_gpu_local_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier(sycl::access::fence_space::local_space)
124 #ifdef __SYCL_DEVICE_ONLY__
125  #define ccl_gpu_ballot(predicate) (sycl::ext::oneapi::group_ballot(sycl::ext::oneapi::experimental::this_sub_group(), predicate).count())
126 #else
127  #define ccl_gpu_ballot(predicate) (predicate ? 1 : 0)
128 #endif
129 
130 /* Debug defines */
131 #if defined(__SYCL_DEVICE_ONLY__)
132 # define CONSTANT __attribute__((opencl_constant))
133 #else
134 # define CONSTANT
135 #endif
136 
137 #define sycl_printf(format, ...) { \
138  static const CONSTANT char fmt[] = format; \
139  sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
140  }
141 
142 #define sycl_printf_(format) { \
143  static const CONSTANT char fmt[] = format; \
144  sycl::ext::oneapi::experimental::printf(fmt); \
145  }
146 
147 /* GPU texture objects */
148 
149 /* clang-format on */
150 
151 /* Types */
152 /* It's not possible to use sycl types like sycl::float3, sycl::int3, etc
153  * because these types have different interfaces from blender version */
154 
155 using uchar = unsigned char;
156 using sycl::half;
157 
158 struct float3 {
159  float x, y, z;
160 };
161 
162 ccl_always_inline float3 make_float3(float x, float y, float z)
163 {
164  return {x, y, z};
165 }
167 {
168  return {x, x, x};
169 }
170 
171 /* math functions */
172 #define fabsf(x) sycl::fabs((x))
173 #define copysignf(x, y) sycl::copysign((x), (y))
174 #define asinf(x) sycl::asin((x))
175 #define acosf(x) sycl::acos((x))
176 #define atanf(x) sycl::atan((x))
177 #define floorf(x) sycl::floor((x))
178 #define ceilf(x) sycl::ceil((x))
179 #define sinhf(x) sycl::sinh((x))
180 #define coshf(x) sycl::cosh((x))
181 #define tanhf(x) sycl::tanh((x))
182 #define hypotf(x, y) sycl::hypot((x), (y))
183 #define atan2f(x, y) sycl::atan2((x), (y))
184 #define fmaxf(x, y) sycl::fmax((x), (y))
185 #define fminf(x, y) sycl::fmin((x), (y))
186 #define fmodf(x, y) sycl::fmod((x), (y))
187 #define lgammaf(x) sycl::lgamma((x))
188 
189 #define __forceinline __attribute__((always_inline))
190 
191 /* Types */
192 #include "util/half.h"
193 #include "util/types.h"
194 
195 /* NOTE(@nsirgien): Declaring these functions after types headers is very important because they
196  * include oneAPI headers, which transitively include math.h headers which will cause redefinitions
197  * of the math defines because math.h also uses them and having them defined before math.h include
198  * is actually UB. */
199 /* sycl::native::cos precision is not sufficient and -ffast-math lets
200  * the current DPC++ compiler overload sycl::cos with it.
201  * We work around this issue by directly calling the spirv implementation which
202  * provides greater precision. */
203 #if defined(__SYCL_DEVICE_ONLY__) && defined(__SPIR__)
204 # define cosf(x) __spirv_ocl_cos(((float)(x)))
205 #else
206 # define cosf(x) sycl::cos(((float)(x)))
207 #endif
208 #define sinf(x) sycl::native::sin(((float)(x)))
209 #define powf(x, y) sycl::native::powr(((float)(x)), ((float)(y)))
210 #define tanf(x) sycl::native::tan(((float)(x)))
211 #define logf(x) sycl::native::log(((float)(x)))
212 #define expf(x) sycl::native::exp(((float)(x)))
unsigned char uchar
Definition: BLI_sys_types.h:70
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble z
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
unsigned short half
Definition: cuda/compat.h:110
ccl_always_inline float3 make_float3(float x, float y, float z)
#define ccl_always_inline
Definition: oneapi/compat.h:31
float z
float y
float x