Blender  V3.3
cpu/device_impl.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
5 
6 #include <stdlib.h>
7 #include <string.h>
8 
9 /* So ImathMath is included before our kernel_cpu_compat. */
10 #ifdef WITH_OSL
11 /* So no context pollution happens from indirectly included windows.h */
12 # include "util/windows.h"
13 # include <OSL/oslexec.h>
14 #endif
15 
16 #ifdef WITH_EMBREE
17 # include <embree3/rtcore.h>
18 #endif
19 
20 #include "device/cpu/kernel.h"
22 
23 #include "device/device.h"
24 
25 // clang-format off
29 #include "kernel/types.h"
30 
31 #include "kernel/osl/shader.h"
32 #include "kernel/osl/globals.h"
33 // clang-format on
34 
35 #include "bvh/embree.h"
36 
37 #include "session/buffers.h"
38 
39 #include "util/debug.h"
40 #include "util/foreach.h"
41 #include "util/function.h"
42 #include "util/log.h"
43 #include "util/map.h"
44 #include "util/openimagedenoise.h"
45 #include "util/optimization.h"
46 #include "util/progress.h"
47 #include "util/system.h"
48 #include "util/task.h"
49 #include "util/thread.h"
50 
52 
53 CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
54  : Device(info_, stats_, profiler_), texture_info(this, "texture_info", MEM_GLOBAL)
55 {
56  /* Pick any kernel, all of them are supposed to have same level of microarchitecture
57  * optimization. */
59  << " CPU kernels.";
60 
61  if (info.cpu_threads == 0) {
63  }
64 
65 #ifdef WITH_OSL
66  kernel_globals.osl = &osl_globals;
67 #endif
68 #ifdef WITH_EMBREE
69  embree_device = rtcNewDevice("verbose=0");
70 #endif
71  need_texture_info = false;
72 }
73 
75 {
76 #ifdef WITH_EMBREE
77  rtcReleaseDevice(embree_device);
78 #endif
79 
81 }
82 
84 {
85  BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
86 #ifdef WITH_EMBREE
87  bvh_layout_mask |= BVH_LAYOUT_EMBREE;
88 #endif /* WITH_EMBREE */
89  return bvh_layout_mask;
90 }
91 
93 {
94  if (!need_texture_info) {
95  return false;
96  }
97 
99  need_texture_info = false;
100 
101  return true;
102 }
103 
105 {
106  if (mem.type == MEM_TEXTURE) {
107  assert(!"mem_alloc not supported for textures.");
108  }
109  else if (mem.type == MEM_GLOBAL) {
110  assert(!"mem_alloc not supported for global memory.");
111  }
112  else {
113  if (mem.name) {
114  VLOG_WORK << "Buffer allocate: " << mem.name << ", "
115  << string_human_readable_number(mem.memory_size()) << " bytes. ("
116  << string_human_readable_size(mem.memory_size()) << ")";
117  }
118 
119  if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
120  size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
121  void *data = util_aligned_malloc(mem.memory_size(), alignment);
123  }
124  else {
126  }
127 
128  mem.device_size = mem.memory_size();
130  }
131 }
132 
134 {
135  if (mem.type == MEM_GLOBAL) {
136  global_free(mem);
137  global_alloc(mem);
138  }
139  else if (mem.type == MEM_TEXTURE) {
140  tex_free((device_texture &)mem);
141  tex_alloc((device_texture &)mem);
142  }
143  else {
144  if (!mem.device_pointer) {
145  mem_alloc(mem);
146  }
147 
148  /* copy is no-op */
149  }
150 }
151 
153  device_memory & /*mem*/, size_t /*y*/, size_t /*w*/, size_t /*h*/, size_t /*elem*/)
154 {
155  /* no-op */
156 }
157 
159 {
160  if (!mem.device_pointer) {
161  mem_alloc(mem);
162  }
163 
164  if (mem.device_pointer) {
165  memset((void *)mem.device_pointer, 0, mem.memory_size());
166  }
167 }
168 
170 {
171  if (mem.type == MEM_GLOBAL) {
172  global_free(mem);
173  }
174  else if (mem.type == MEM_TEXTURE) {
175  tex_free((device_texture &)mem);
176  }
177  else if (mem.device_pointer) {
178  if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
179  util_aligned_free((void *)mem.device_pointer);
180  }
181  mem.device_pointer = 0;
183  mem.device_size = 0;
184  }
185 }
186 
188 {
189  return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
190 }
191 
192 void CPUDevice::const_copy_to(const char *name, void *host, size_t size)
193 {
194 #ifdef WITH_EMBREE
195  if (strcmp(name, "data") == 0) {
196  assert(size <= sizeof(KernelData));
197 
198  // Update scene handle (since it is different for each device on multi devices)
199  KernelData *const data = (KernelData *)host;
200  data->device_bvh = embree_scene;
201  }
202 #endif
203  kernel_const_copy(&kernel_globals, name, host, size);
204 }
205 
207 {
208  VLOG_WORK << "Global memory allocate: " << mem.name << ", "
209  << string_human_readable_number(mem.memory_size()) << " bytes. ("
210  << string_human_readable_size(mem.memory_size()) << ")";
211 
213 
215  mem.device_size = mem.memory_size();
217 }
218 
220 {
221  if (mem.device_pointer) {
222  mem.device_pointer = 0;
224  mem.device_size = 0;
225  }
226 }
227 
229 {
230  VLOG_WORK << "Texture allocate: " << mem.name << ", "
231  << string_human_readable_number(mem.memory_size()) << " bytes. ("
232  << string_human_readable_size(mem.memory_size()) << ")";
233 
235  mem.device_size = mem.memory_size();
237 
238  const uint slot = mem.slot;
239  if (slot >= texture_info.size()) {
240  /* Allocate some slots in advance, to reduce amount of re-allocations. */
241  texture_info.resize(slot + 128);
242  }
243 
244  texture_info[slot] = mem.info;
245  texture_info[slot].data = (uint64_t)mem.host_pointer;
246  need_texture_info = true;
247 }
248 
250 {
251  if (mem.device_pointer) {
252  mem.device_pointer = 0;
254  mem.device_size = 0;
255  need_texture_info = true;
256  }
257 }
258 
259 void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
260 {
261 #ifdef WITH_EMBREE
262  if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE ||
265  BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
266  if (refit) {
267  bvh_embree->refit(progress);
268  }
269  else {
270  bvh_embree->build(progress, &stats, embree_device);
271  }
272 
273  if (bvh->params.top_level) {
274  embree_scene = bvh_embree->scene;
275  }
276  }
277  else
278 #endif
279  Device::build_bvh(bvh, progress, refit);
280 }
281 
283  vector<CPUKernelThreadGlobals> &kernel_thread_globals)
284 {
285  /* Ensure latest texture info is loaded into kernel globals before returning. */
287 
288  kernel_thread_globals.clear();
289  void *osl_memory = get_cpu_osl_memory();
290  for (int i = 0; i < info.cpu_threads; i++) {
291  kernel_thread_globals.emplace_back(kernel_globals, osl_memory, profiler);
292  }
293 }
294 
296 {
297 #ifdef WITH_OSL
298  return &osl_globals;
299 #else
300  return NULL;
301 #endif
302 }
303 
304 bool CPUDevice::load_kernels(const uint /*kernel_features*/)
305 {
306  return true;
307 }
308 
unsigned int uint
Definition: BLI_sys_types.h:67
void util_aligned_free(void *ptr)
CCL_NAMESPACE_BEGIN void * util_aligned_malloc(size_t size, int alignment)
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
BVHLayout bvh_layout
Definition: params.h:80
bool top_level
Definition: params.h:77
Definition: bvh/bvh.h:63
BVHParams params
Definition: bvh/bvh.h:65
virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override
virtual bool load_kernels(uint) override
virtual BVHLayoutMask get_bvh_layout_mask() const override
bool need_texture_info
bool load_texture_info()
virtual void mem_free(device_memory &mem) override
virtual void const_copy_to(const char *name, void *host, size_t size) override
virtual void mem_alloc(device_memory &mem) override
virtual void * get_cpu_osl_memory() override
void tex_alloc(device_texture &mem)
virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t) override
void tex_free(device_texture &mem)
void global_alloc(device_memory &mem)
device_vector< TextureInfo > texture_info
virtual void mem_zero(device_memory &mem) override
KernelGlobalsCPU kernel_globals
virtual void get_cpu_kernel_thread_globals(vector< CPUKernelThreadGlobals > &kernel_thread_globals) override
void build_bvh(BVH *bvh, Progress &progress, bool refit) override
virtual void mem_copy_to(device_memory &mem) override
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
void global_free(device_memory &mem)
const char * get_uarch_name() const
IntegratorInitFunction integrator_init_from_camera
static const CPUKernels & get_cpu_kernels()
Profiler & profiler
Stats & stats
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
DeviceInfo info
void mem_free(size_t size)
Definition: util/stats.h:29
void mem_alloc(size_t size)
Definition: util/stats.h:23
static int max_concurrency()
Definition: task.cpp:99
size_t memory_elements_size(int elements)
device_ptr device_pointer
size_t size() const
T * resize(size_t width, size_t height=0, size_t depth=0)
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
@ MEM_GLOBAL
@ MEM_TEXTURE
@ MEM_DEVICE_ONLY
#define MIN_ALIGNMENT_CPU_DATA_TYPES
void kernel_global_memory_copy(KernelGlobalsCPU *kg, const char *name, void *mem, size_t size)
CCL_NAMESPACE_BEGIN void kernel_const_copy(KernelGlobalsCPU *kg, const char *name, void *host, size_t)
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
@ BVH_LAYOUT_EMBREE
@ BVH_LAYOUT_BVH2
@ BVH_LAYOUT_MULTI_METAL_EMBREE
@ BVH_LAYOUT_MULTI_OPTIX_EMBREE
#define VLOG_INFO
Definition: log.h:77
#define VLOG_WORK
Definition: log.h:80
int BVHLayoutMask
Definition: params.h:47
unsigned __int64 uint64_t
Definition: stdint.h:90
string string_human_readable_size(size_t size)
Definition: string.cpp:229
string string_human_readable_number(size_t num)
Definition: string.cpp:248
uint64_t device_ptr
Definition: util/types.h:43