Blender  V3.3
path_trace_work_cpu.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
5 
6 #include "device/cpu/kernel.h"
7 #include "device/device.h"
8 
10 
13 
14 #include "scene/scene.h"
15 #include "session/buffers.h"
16 
17 #include "util/atomic.h"
18 #include "util/log.h"
19 #include "util/tbb.h"
20 
22 
23 /* Create TBB arena for execution of path tracing and rendering tasks. */
24 static inline tbb::task_arena local_tbb_arena_create(const Device *device)
25 {
26  /* TODO: limit this to number of threads of CPU device, it may be smaller than
27  * the system number of threads when we reduce the number of CPU threads in
28  * CPU + GPU rendering to dedicate some cores to handling the GPU device. */
29  return tbb::task_arena(device->info.cpu_threads);
30 }
31 
32 /* Get CPUKernelThreadGlobals for the current thread. */
34  vector<CPUKernelThreadGlobals> &kernel_thread_globals)
35 {
36  const int thread_index = tbb::this_task_arena::current_thread_index();
37  DCHECK_GE(thread_index, 0);
38  DCHECK_LE(thread_index, kernel_thread_globals.size());
39 
40  return &kernel_thread_globals[thread_index];
41 }
42 
44  Film *film,
45  DeviceScene *device_scene,
46  bool *cancel_requested_flag)
47  : PathTraceWork(device, film, device_scene, cancel_requested_flag),
48  kernels_(Device::get_cpu_kernels())
49 {
50  DCHECK_EQ(device->info.type, DEVICE_CPU);
51 }
52 
54 {
55  /* Cache per-thread kernel globals. */
57 }
58 
60  int start_sample,
61  int samples_num,
62  int sample_offset)
63 {
64  const int64_t image_width = effective_buffer_params_.width;
65  const int64_t image_height = effective_buffer_params_.height;
66  const int64_t total_pixels_num = image_width * image_height;
67 
68  if (device_->profiler.active()) {
70  kernel_globals.start_profiling();
71  }
72  }
73 
74  tbb::task_arena local_arena = local_tbb_arena_create(device_);
75  local_arena.execute([&]() {
76  parallel_for(int64_t(0), total_pixels_num, [&](int64_t work_index) {
77  if (is_cancel_requested()) {
78  return;
79  }
80 
81  const int y = work_index / image_width;
82  const int x = work_index - y * image_width;
83 
84  KernelWorkTile work_tile;
85  work_tile.x = effective_buffer_params_.full_x + x;
86  work_tile.y = effective_buffer_params_.full_y + y;
87  work_tile.w = 1;
88  work_tile.h = 1;
89  work_tile.start_sample = start_sample;
90  work_tile.sample_offset = sample_offset;
91  work_tile.num_samples = 1;
94 
96 
97  render_samples_full_pipeline(kernel_globals, work_tile, samples_num);
98  });
99  });
100  if (device_->profiler.active()) {
102  kernel_globals.stop_profiling();
103  }
104  }
105 
106  statistics.occupancy = 1.0f;
107 }
108 
110  const KernelWorkTile &work_tile,
111  const int samples_num)
112 {
113  const bool has_bake = device_scene_->data.bake.use;
114 
115  IntegratorStateCPU integrator_states[2];
116 
117  IntegratorStateCPU *state = &integrator_states[0];
118  IntegratorStateCPU *shadow_catcher_state = nullptr;
119 
120  if (device_scene_->data.integrator.has_shadow_catcher) {
121  shadow_catcher_state = &integrator_states[1];
122  path_state_init_queues(shadow_catcher_state);
123  }
124 
125  KernelWorkTile sample_work_tile = work_tile;
126  float *render_buffer = buffers_->buffer.data();
127 
128  for (int sample = 0; sample < samples_num; ++sample) {
129  if (is_cancel_requested()) {
130  break;
131  }
132 
133  if (has_bake) {
135  kernel_globals, state, &sample_work_tile, render_buffer)) {
136  break;
137  }
138  }
139  else {
141  kernel_globals, state, &sample_work_tile, render_buffer)) {
142  break;
143  }
144  }
145 
147 
148  if (shadow_catcher_state) {
150  }
151 
152  ++sample_work_tile.start_sample;
153  }
154 }
155 
157  PassMode pass_mode,
158  int num_samples)
159 {
160  half4 *rgba_half = display->map_texture_buffer();
161  if (!rgba_half) {
162  /* TODO(sergey): Look into using copy_to_display() if mapping failed. Might be needed for
163  * some implementations of PathTraceDisplay which can not map memory? */
164  return;
165  }
166 
167  const KernelFilm &kfilm = device_scene_->data.film;
168 
169  const PassAccessor::PassAccessInfo pass_access_info = get_display_pass_access_info(pass_mode);
170 
171  const PassAccessorCPU pass_accessor(pass_access_info, kfilm.exposure, num_samples);
172 
174  destination.pixels_half_rgba = rgba_half;
175 
176  tbb::task_arena local_arena = local_tbb_arena_create(device_);
177  local_arena.execute([&]() {
178  pass_accessor.get_render_tile_pixels(buffers_.get(), effective_buffer_params_, destination);
179  });
180 
181  display->unmap_texture_buffer();
182 }
183 
185 {
186 }
187 
189 {
190  return buffers_->copy_from_device();
191 }
192 
194 {
195  buffers_->buffer.copy_to_device();
196  return true;
197 }
198 
200 {
201  buffers_->zero();
202  return true;
203 }
204 
206 {
213 
214  float *render_buffer = buffers_->buffer.data();
215 
217 
218  tbb::task_arena local_arena = local_tbb_arena_create(device_);
219 
220  /* Check convergency and do x-filter in a single `parallel_for`, to reduce threading overhead. */
221  local_arena.execute([&]() {
222  parallel_for(full_y, full_y + height, [&](int y) {
224 
225  bool row_converged = true;
226  uint num_row_pixels_active = 0;
227  for (int x = 0; x < width; ++x) {
230  ++num_row_pixels_active;
231  row_converged = false;
232  }
233  }
234 
235  atomic_fetch_and_add_uint32(&num_active_pixels, num_row_pixels_active);
236 
237  if (!row_converged) {
240  }
241  });
242  });
243 
244  if (num_active_pixels) {
245  local_arena.execute([&]() {
246  parallel_for(full_x, full_x + width, [&](int x) {
250  });
251  });
252  }
253 
254  return num_active_pixels;
255 }
256 
258 {
261 
262  float *render_buffer = buffers_->buffer.data();
263 
264  tbb::task_arena local_arena = local_tbb_arena_create(device_);
265 
266  /* Check convergency and do x-filter in a single `parallel_for`, to reduce threading overhead. */
267  local_arena.execute([&]() {
268  parallel_for(0, height, [&](int y) {
270  int pixel_index = y * width;
271 
272  for (int x = 0; x < width; ++x, ++pixel_index) {
274  }
275  });
276  });
277 }
278 
unsigned int uint
Definition: BLI_sys_types.h:67
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei height
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei width
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei stride
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
int offset
Definition: buffers.h:90
int full_x
Definition: buffers.h:84
int stride
Definition: buffers.h:90
int height
Definition: buffers.h:72
NODE_DECLARE int width
Definition: buffers.h:71
int full_y
Definition: buffers.h:85
AdaptiveSamplingConvergenceCheckFunction adaptive_sampling_convergence_check
CryptomattePostprocessFunction cryptomatte_postprocess
IntegratorShadeFunction integrator_megakernel
AdaptiveSamplingFilterYFunction adaptive_sampling_filter_y
IntegratorInitFunction integrator_init_from_bake
AdaptiveSamplingFilterXFunction adaptive_sampling_filter_x
IntegratorInitFunction integrator_init_from_camera
DeviceType type
Definition: device/device.h:62
KernelData data
Definition: scene.h:130
virtual void get_cpu_kernel_thread_globals(vector< CPUKernelThreadGlobals > &)
Profiler & profiler
DeviceInfo info
Definition: film.h:29
bool get_render_tile_pixels(const RenderBuffers *render_buffers, const Destination &destination) const
virtual bool copy_render_buffers_to_device() override
virtual void render_samples(RenderStatistics &statistics, int start_sample, int samples_num, int sample_offset) override
vector< CPUKernelThreadGlobals > kernel_thread_globals_
virtual void init_execution() override
virtual void copy_to_display(PathTraceDisplay *display, PassMode pass_mode, int num_samples) override
virtual bool copy_render_buffers_from_device() override
virtual void destroy_gpu_resources(PathTraceDisplay *display) override
virtual int adaptive_sampling_converge_filter_count_active(float threshold, bool reset) override
virtual void cryptomatte_postproces() override
const CPUKernels & kernels_
PathTraceWorkCPU(Device *device, Film *film, DeviceScene *device_scene, bool *cancel_requested_flag)
void render_samples_full_pipeline(KernelGlobalsCPU *kernel_globals, const KernelWorkTile &work_tile, const int samples_num)
virtual bool zero_render_buffers() override
unique_ptr< RenderBuffers > buffers_
PassAccessor::PassAccessInfo get_display_pass_access_info(PassMode pass_mode) const
PassAccessor::Destination get_display_destination_template(const PathTraceDisplay *display) const
BufferParams effective_buffer_params_
DeviceScene * device_scene_
bool is_cancel_requested() const
bool active() const
Definition: profiling.cpp:161
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
@ DEVICE_CPU
Definition: device/device.h:38
SyclQueue void void size_t num_bytes SyclQueue void * kernel_globals
ccl_gpu_kernel_postfix ccl_global float int full_x
ccl_gpu_kernel_postfix ccl_global float int int int int float threshold
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int ccl_global float * render_buffer
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int int ccl_global uint * num_active_pixels
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
const int state
ccl_gpu_kernel_postfix ccl_global float int int int int float bool reset
clear internal cached data and reset random seed
const int work_index
ccl_gpu_kernel_postfix ccl_global float int int full_y
ccl_gpu_kernel_postfix ccl_global float int int int int ccl_global const float int int int int int int int int int int int int num_samples
#define DCHECK_EQ(a, b)
Definition: log.h:64
#define DCHECK_GE(a, b)
Definition: log.h:62
#define DCHECK_LE(a, b)
Definition: log.h:67
void parallel_for(IndexRange range, int64_t grain_size, const Function &function)
Definition: BLI_task.hh:51
PassMode
Definition: pass.h:19
CCL_NAMESPACE_BEGIN ccl_device_inline void path_state_init_queues(IntegratorState state)
Definition: path_state.h:13
static CPUKernelThreadGlobals * kernel_thread_globals_get(vector< CPUKernelThreadGlobals > &kernel_thread_globals)
static CCL_NAMESPACE_BEGIN tbb::task_arena local_tbb_arena_create(const Device *device)
__int64 int64_t
Definition: stdint.h:89
KernelBake bake
Definition: half.h:64