Blender  V3.3
state_flow.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 #pragma once
5 
6 #include "kernel/types.h"
7 #include "util/atomic.h"
8 
10 
11 /* Control Flow
12  *
13  * Utilities for control flow between kernels. The implementation is different between CPU and
14  * GPU devices. For the latter part of the logic is handled on the host side with wavefronts.
15  *
16  * There is a main path for regular path tracing camera for path tracing. Shadows for next
17  * event estimation branch off from this into their own path, that may be computed in
18  * parallel while the main path continues. Additionally, shading kernels are sorted using
19  * a key for coherence.
20  *
21  * Each kernel on the main path must call one of these functions. These may not be called
22  * multiple times from the same kernel.
23  *
24  * integrator_path_init(kg, state, next_kernel)
25  * integrator_path_next(kg, state, current_kernel, next_kernel)
26  * integrator_path_terminate(kg, state, current_kernel)
27  *
28  * For the shadow path similar functions are used, and again each shadow kernel must call
29  * one of them, and only once.
30  */
31 
33 {
34  return INTEGRATOR_STATE(state, path, queued_kernel) == 0;
35 }
36 
38 {
39  return INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0;
40 }
41 
42 #ifdef __KERNEL_GPU__
43 
46  const DeviceKernel next_kernel)
47 {
48  atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
49  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
50 }
51 
54  const DeviceKernel current_kernel,
55  const DeviceKernel next_kernel)
56 {
57  atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
58  1);
59  atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
60  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
61 }
62 
65  const DeviceKernel current_kernel)
66 {
67  atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
68  1);
69  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
70 }
71 
73  KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
74 {
76  &kernel_integrator_state.next_shadow_path_index[0], 1);
77  atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
78  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
79  return shadow_state;
80 }
81 
84  const DeviceKernel current_kernel,
85  const DeviceKernel next_kernel)
86 {
87  atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
88  1);
89  atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
90  INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
91 }
92 
95  const DeviceKernel current_kernel)
96 {
97  atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
98  1);
99  INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
100 }
101 
102 /* Sort first by truncated state index (for good locality), then by key (for good coherence). */
103 # define INTEGRATOR_SORT_KEY(key, state) \
104  (key + kernel_data.max_shaders * (state / kernel_integrator_state.sort_partition_divisor))
105 
108  const DeviceKernel next_kernel,
109  const uint32_t key)
110 {
111  const int key_ = INTEGRATOR_SORT_KEY(key, state);
112  atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
113  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
114  INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
115  atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
116 }
117 
120  const DeviceKernel current_kernel,
121  const DeviceKernel next_kernel,
122  const uint32_t key)
123 {
124  const int key_ = INTEGRATOR_SORT_KEY(key, state);
125  atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
126  1);
127  atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
128  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
129  INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
130  atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
131 }
132 
133 #else
134 
137  const DeviceKernel next_kernel)
138 {
139  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
140 }
141 
144  const DeviceKernel next_kernel,
145  const uint32_t key)
146 {
147  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
148  (void)key;
149 }
150 
153  const DeviceKernel current_kernel,
154  const DeviceKernel next_kernel)
155 {
156  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
157  (void)current_kernel;
158 }
159 
162  const DeviceKernel current_kernel)
163 {
164  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
165  (void)current_kernel;
166 }
167 
170  const DeviceKernel current_kernel,
171  const DeviceKernel next_kernel,
172  const uint32_t key)
173 {
174  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
175  (void)key;
176  (void)current_kernel;
177 }
178 
180  KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
181 {
182  IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow;
183  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
184  return shadow_state;
185 }
186 
189  const DeviceKernel current_kernel,
190  const DeviceKernel next_kernel)
191 {
192  INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
193  (void)current_kernel;
194 }
195 
198  const DeviceKernel current_kernel)
199 {
200  INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
201  (void)current_kernel;
202 }
203 
204 #endif
205 
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
#define ccl_device_forceinline
Definition: cuda/compat.h:35
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
const KernelGlobalsCPU *ccl_restrict KernelGlobals
#define kernel_integrator_state
SyclQueue void void size_t num_bytes void
const int state
DeviceKernel
const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState
Definition: state.h:150
IntegratorStateCPU *ccl_restrict IntegratorState
Definition: state.h:147
#define INTEGRATOR_STATE_WRITE(state, nested_struct, member)
Definition: state.h:155
const IntegratorStateCPU *ccl_restrict ConstIntegratorState
Definition: state.h:148
#define INTEGRATOR_STATE(state, nested_struct, member)
Definition: state.h:154
IntegratorShadowStateCPU *ccl_restrict IntegratorShadowState
Definition: state.h:149
ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg, IntegratorShadowState state, const DeviceKernel current_kernel)
Definition: state_flow.h:196
CCL_NAMESPACE_BEGIN ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state)
Definition: state_flow.h:32
ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg, IntegratorState state, const DeviceKernel current_kernel, const DeviceKernel next_kernel, const uint32_t key)
Definition: state_flow.h:168
ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg, IntegratorState state, const DeviceKernel current_kernel)
Definition: state_flow.h:160
ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state)
Definition: state_flow.h:37
ccl_device_forceinline void integrator_path_next(KernelGlobals kg, IntegratorState state, const DeviceKernel current_kernel, const DeviceKernel next_kernel)
Definition: state_flow.h:151
ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const uint32_t key)
Definition: state_flow.h:142
ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
Definition: state_flow.h:179
ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg, IntegratorShadowState state, const DeviceKernel current_kernel, const DeviceKernel next_kernel)
Definition: state_flow.h:187
ccl_device_forceinline void integrator_path_init(KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel)
Definition: state_flow.h:135
unsigned int uint32_t
Definition: stdint.h:80