Blender  V3.3
kernel_arch_impl.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 /* Templated common implementation part of all CPU kernels.
5  *
6  * The idea is that particular .cpp files sets needed optimization flags and
7  * simply includes this file without worry of copying actual implementation over.
8  */
9 
10 #pragma once
11 
12 // clang-format off
14 
15 #ifndef KERNEL_STUB
17 # include "kernel/device/cpu/image.h"
18 
19 # include "kernel/integrator/state.h"
22 
35 
37 # include "kernel/film/id_passes.h"
38 # include "kernel/film/read.h"
39 
40 # include "kernel/bake/bake.h"
41 
42 #else
43 # define STUB_ASSERT(arch, name) \
44  assert(!(#name " kernel stub for architecture " #arch " was called!"))
45 #endif /* KERNEL_STUB */
46 // clang-format on
47 
49 
50 /* --------------------------------------------------------------------
51  * Integrator.
52  */
53 
54 #ifdef KERNEL_STUB
55 # define KERNEL_INVOKE(name, ...) (STUB_ASSERT(KERNEL_ARCH, name), 0)
56 #else
57 # define KERNEL_INVOKE(name, ...) integrator_##name(__VA_ARGS__)
58 #endif
59 
60 /* TODO: Either use something like get_work_pixel(), or simplify tile which is passed here, so
61  * that it does not contain unused fields. */
62 #define DEFINE_INTEGRATOR_INIT_KERNEL(name) \
63  bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \
64  IntegratorStateCPU *state, \
65  KernelWorkTile *tile, \
66  ccl_global float *render_buffer) \
67  { \
68  return KERNEL_INVOKE( \
69  name, kg, state, tile, render_buffer, tile->x, tile->y, tile->start_sample); \
70  }
71 
72 #define DEFINE_INTEGRATOR_KERNEL(name) \
73  void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \
74  IntegratorStateCPU *state) \
75  { \
76  KERNEL_INVOKE(name, kg, state); \
77  }
78 
79 #define DEFINE_INTEGRATOR_SHADE_KERNEL(name) \
80  void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \
81  const KernelGlobalsCPU *kg, IntegratorStateCPU *state, ccl_global float *render_buffer) \
82  { \
83  KERNEL_INVOKE(name, kg, state, render_buffer); \
84  }
85 
86 #define DEFINE_INTEGRATOR_SHADOW_KERNEL(name) \
87  void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \
88  IntegratorStateCPU *state) \
89  { \
90  KERNEL_INVOKE(name, kg, &state->shadow); \
91  }
92 
93 #define DEFINE_INTEGRATOR_SHADOW_SHADE_KERNEL(name) \
94  void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \
95  const KernelGlobalsCPU *kg, IntegratorStateCPU *state, ccl_global float *render_buffer) \
96  { \
97  KERNEL_INVOKE(name, kg, &state->shadow, render_buffer); \
98  }
99 
100 DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera)
101 DEFINE_INTEGRATOR_INIT_KERNEL(init_from_bake)
102 DEFINE_INTEGRATOR_SHADE_KERNEL(intersect_closest)
103 DEFINE_INTEGRATOR_KERNEL(intersect_subsurface)
104 DEFINE_INTEGRATOR_KERNEL(intersect_volume_stack)
105 DEFINE_INTEGRATOR_SHADE_KERNEL(shade_background)
107 DEFINE_INTEGRATOR_SHADE_KERNEL(shade_surface)
108 DEFINE_INTEGRATOR_SHADE_KERNEL(shade_volume)
110 DEFINE_INTEGRATOR_SHADOW_KERNEL(intersect_shadow)
112 
113 /* --------------------------------------------------------------------
114  * Shader evaluation.
115  */
116 
119  float *output,
120  const int offset)
121 {
122 #ifdef KERNEL_STUB
123  STUB_ASSERT(KERNEL_ARCH, shader_eval_displace);
124 #else
126 #endif
127 }
128 
131  float *output,
132  const int offset)
133 {
134 #ifdef KERNEL_STUB
135  STUB_ASSERT(KERNEL_ARCH, shader_eval_background);
136 #else
138 #endif
139 }
140 
142  const KernelGlobalsCPU *kg,
144  float *output,
145  const int offset)
146 {
147 #ifdef KERNEL_STUB
149 #else
151 #endif
152 }
153 
154 /* --------------------------------------------------------------------
155  * Adaptive sampling.
156  */
157 
159  const KernelGlobalsCPU *kg,
160  ccl_global float *render_buffer,
161  int x,
162  int y,
163  float threshold,
164  bool reset,
165  int offset,
166  int stride)
167 {
168 #ifdef KERNEL_STUB
170  return false;
171 #else
174 #endif
175 }
176 
178  ccl_global float *render_buffer,
179  int y,
180  int start_x,
181  int width,
182  int offset,
183  int stride)
184 {
185 #ifdef KERNEL_STUB
187 #else
189 #endif
190 }
191 
193  ccl_global float *render_buffer,
194  int x,
195  int start_y,
196  int height,
197  int offset,
198  int stride)
199 {
200 #ifdef KERNEL_STUB
202 #else
204 #endif
205 }
206 
207 /* --------------------------------------------------------------------
208  * Cryptomatte.
209  */
210 
212  ccl_global float *render_buffer,
213  int pixel_index)
214 {
215 #ifdef KERNEL_STUB
216  STUB_ASSERT(KERNEL_ARCH, cryptomatte_postprocess);
217 #else
218  kernel_cryptomatte_post(kg, render_buffer, pixel_index);
219 #endif
220 }
221 
222 /* --------------------------------------------------------------------
223  * Film Convert.
224  */
225 
226 #ifdef KERNEL_STUB
227 
228 # define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \
229  void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
230  const float *buffer, \
231  float *pixel, \
232  const int width, \
233  const int buffer_stride, \
234  const int pixel_stride) \
235  { \
236  STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \
237  } \
238  void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
239  const KernelFilmConvert *kfilm_convert, \
240  const float *buffer, \
241  half4 *pixel, \
242  const int width, \
243  const int buffer_stride) \
244  { \
245  STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \
246  }
247 
248 #else
249 
250 # define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \
251  void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
252  const float *buffer, \
253  float *pixel, \
254  const int width, \
255  const int buffer_stride, \
256  const int pixel_stride) \
257  { \
258  for (int i = 0; i < width; i++, buffer += buffer_stride, pixel += pixel_stride) { \
259  film_get_pass_pixel_##name(kfilm_convert, buffer, pixel); \
260  } \
261  } \
262  void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
263  const KernelFilmConvert *kfilm_convert, \
264  const float *buffer, \
265  half4 *pixel, \
266  const int width, \
267  const int buffer_stride) \
268  { \
269  for (int i = 0; i < width; i++, buffer += buffer_stride, pixel++) { \
270  float pixel_rgba[4] = {0.0f, 0.0f, 0.0f, 1.0f}; \
271  film_get_pass_pixel_##name(kfilm_convert, buffer, pixel_rgba); \
272  if (is_float) { \
273  pixel_rgba[1] = pixel_rgba[0]; \
274  pixel_rgba[2] = pixel_rgba[0]; \
275  } \
276  film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba); \
277  *pixel = float4_to_half4_display( \
278  make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3])); \
279  } \
280  }
281 
282 #endif
283 
284 KERNEL_FILM_CONVERT_FUNCTION(depth, true)
286 KERNEL_FILM_CONVERT_FUNCTION(sample_count, true)
287 KERNEL_FILM_CONVERT_FUNCTION(float, true)
288 
289 KERNEL_FILM_CONVERT_FUNCTION(light_path, false)
291 
292 KERNEL_FILM_CONVERT_FUNCTION(motion, false)
293 KERNEL_FILM_CONVERT_FUNCTION(cryptomatte, false)
294 KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher, false)
295 KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow, false)
296 KERNEL_FILM_CONVERT_FUNCTION(combined, false)
298 
299 #undef KERNEL_FILM_CONVERT_FUNCTION
300 
301 #undef KERNEL_INVOKE
302 #undef DEFINE_INTEGRATOR_KERNEL
303 #undef DEFINE_INTEGRATOR_SHADE_KERNEL
304 #undef DEFINE_INTEGRATOR_INIT_KERNEL
305 
306 #undef KERNEL_STUB
307 #undef STUB_ASSERT
308 #undef KERNEL_ARCH
309 
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei height
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei width
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei stride
float float4[4]
#define ccl_global
Definition: cuda/compat.h:43
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
SyclQueue void void size_t num_bytes void
ccl_device_inline void kernel_cryptomatte_post(KernelGlobals kg, ccl_global float *render_buffer, int pixel_index)
Definition: id_passes.h:82
CCL_NAMESPACE_BEGIN ccl_device void kernel_displace_evaluate(KernelGlobals kg, ccl_global const KernelShaderEvalInput *input, ccl_global float *output, const int offset)
ccl_device void kernel_curve_shadow_transparency_evaluate(KernelGlobals kg, ccl_global const KernelShaderEvalInput *input, ccl_global float *output, const int offset)
ccl_device void kernel_background_evaluate(KernelGlobals kg, ccl_global const KernelShaderEvalInput *input, ccl_global float *output, const int offset)
#define KERNEL_ARCH
#define KERNEL_FUNCTION_FULL_NAME(name)
ccl_gpu_kernel_postfix ccl_global float int int int int float threshold
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int ccl_global float * render_buffer
ccl_global KernelShaderEvalInput ccl_global float * output
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
ccl_global KernelShaderEvalInput * input
ccl_gpu_kernel_postfix ccl_global float int int int int float bool reset
clear internal cached data and reset random seed
ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg, ccl_global float *render_buffer, int y, int start_x, int width, int offset, int stride)
ccl_device void kernel_adaptive_sampling_filter_y(KernelGlobals kg, ccl_global float *render_buffer, int x, int start_y, int height, int offset, int stride)
ccl_device bool kernel_adaptive_sampling_convergence_check(KernelGlobals kg, ccl_global float *render_buffer, int x, int y, float threshold, bool reset, int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME() adaptive_sampling_filter_x(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int y, int start_x, int width, int offset, int stride)
#define DEFINE_INTEGRATOR_INIT_KERNEL(name)
#define KERNEL_FILM_CONVERT_FUNCTION(name, is_float)
bool KERNEL_FUNCTION_FULL_NAME() adaptive_sampling_convergence_check(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int x, int y, float threshold, bool reset, int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME() shader_eval_displace(const KernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
#define DEFINE_INTEGRATOR_SHADE_KERNEL(name)
void KERNEL_FUNCTION_FULL_NAME() cryptomatte_postprocess(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int pixel_index)
#define DEFINE_INTEGRATOR_KERNEL(name)
void KERNEL_FUNCTION_FULL_NAME() shader_eval_background(const KernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
#define DEFINE_INTEGRATOR_SHADOW_SHADE_KERNEL(name)
#define DEFINE_INTEGRATOR_SHADOW_KERNEL(name)
void KERNEL_FUNCTION_FULL_NAME() shader_eval_curve_shadow_transparency(const KernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
void KERNEL_FUNCTION_FULL_NAME() adaptive_sampling_filter_y(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int x, int start_y, int height, int offset, int stride)