Blender  V3.3
device/device.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 #include <stdlib.h>
5 #include <string.h>
6 
7 #include "bvh/bvh2.h"
8 
9 #include "device/device.h"
10 #include "device/queue.h"
11 
12 #include "device/cpu/device.h"
13 #include "device/cpu/kernel.h"
14 #include "device/cuda/device.h"
15 #include "device/dummy/device.h"
16 #include "device/hip/device.h"
17 #include "device/metal/device.h"
18 #include "device/multi/device.h"
19 #include "device/oneapi/device.h"
20 #include "device/optix/device.h"
21 
22 #include "util/foreach.h"
23 #include "util/half.h"
24 #include "util/log.h"
25 #include "util/math.h"
26 #include "util/string.h"
27 #include "util/system.h"
28 #include "util/task.h"
29 #include "util/time.h"
30 #include "util/types.h"
31 #include "util/vector.h"
32 
34 
35 bool Device::need_types_update = true;
36 bool Device::need_devices_update = true;
37 thread_mutex Device::device_mutex;
38 vector<DeviceInfo> Device::cuda_devices;
39 vector<DeviceInfo> Device::optix_devices;
40 vector<DeviceInfo> Device::cpu_devices;
41 vector<DeviceInfo> Device::hip_devices;
42 vector<DeviceInfo> Device::metal_devices;
43 vector<DeviceInfo> Device::oneapi_devices;
44 uint Device::devices_initialized_mask = 0;
45 
46 /* Device */
47 
48 Device::~Device() noexcept(false)
49 {
50 }
51 
52 void Device::build_bvh(BVH *bvh, Progress &progress, bool refit)
53 {
54  assert(bvh->params.bvh_layout == BVH_LAYOUT_BVH2);
55 
56  BVH2 *const bvh2 = static_cast<BVH2 *>(bvh);
57  if (refit) {
58  bvh2->refit(progress);
59  }
60  else {
61  bvh2->build(progress, &stats);
62  }
63 }
64 
65 Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
66 {
67  if (!info.multi_devices.empty()) {
68  /* Always create a multi device when info contains multiple devices.
69  * This is done so that the type can still be e.g. DEVICE_CPU to indicate
70  * that it is a homogeneous collection of devices, which simplifies checks. */
72  }
73 
74  Device *device = NULL;
75 
76  switch (info.type) {
77  case DEVICE_CPU:
79  break;
80 #ifdef WITH_CUDA
81  case DEVICE_CUDA:
82  if (device_cuda_init())
84  break;
85 #endif
86 #ifdef WITH_OPTIX
87  case DEVICE_OPTIX:
88  if (device_optix_init())
90  break;
91 #endif
92 
93 #ifdef WITH_HIP
94  case DEVICE_HIP:
95  if (device_hip_init())
97  break;
98 #endif
99 
100 #ifdef WITH_METAL
101  case DEVICE_METAL:
102  if (device_metal_init())
104  break;
105 #endif
106 
107 #ifdef WITH_ONEAPI
108  case DEVICE_ONEAPI:
110  break;
111 #endif
112 
113  default:
114  break;
115  }
116 
117  if (device == NULL) {
119  }
120 
121  return device;
122 }
123 
125 {
126  if (strcmp(name, "CPU") == 0)
127  return DEVICE_CPU;
128  else if (strcmp(name, "CUDA") == 0)
129  return DEVICE_CUDA;
130  else if (strcmp(name, "OPTIX") == 0)
131  return DEVICE_OPTIX;
132  else if (strcmp(name, "MULTI") == 0)
133  return DEVICE_MULTI;
134  else if (strcmp(name, "HIP") == 0)
135  return DEVICE_HIP;
136  else if (strcmp(name, "METAL") == 0)
137  return DEVICE_METAL;
138  else if (strcmp(name, "ONEAPI") == 0)
139  return DEVICE_ONEAPI;
140 
141  return DEVICE_NONE;
142 }
143 
145 {
146  if (type == DEVICE_CPU)
147  return "CPU";
148  else if (type == DEVICE_CUDA)
149  return "CUDA";
150  else if (type == DEVICE_OPTIX)
151  return "OPTIX";
152  else if (type == DEVICE_MULTI)
153  return "MULTI";
154  else if (type == DEVICE_HIP)
155  return "HIP";
156  else if (type == DEVICE_METAL)
157  return "METAL";
158  else if (type == DEVICE_ONEAPI)
159  return "ONEAPI";
160 
161  return "";
162 }
163 
165 {
167  types.push_back(DEVICE_CPU);
168 #ifdef WITH_CUDA
169  types.push_back(DEVICE_CUDA);
170 #endif
171 #ifdef WITH_OPTIX
172  types.push_back(DEVICE_OPTIX);
173 #endif
174 #ifdef WITH_HIP
175  types.push_back(DEVICE_HIP);
176 #endif
177 #ifdef WITH_METAL
178  types.push_back(DEVICE_METAL);
179 #endif
180 #ifdef WITH_ONEAPI
181  types.push_back(DEVICE_ONEAPI);
182 #endif
183  return types;
184 }
185 
187 {
188  /* Lazy initialize devices. On some platforms OpenCL or CUDA drivers can
189  * be broken and cause crashes when only trying to get device info, so
190  * we don't want to do any initialization until the user chooses to. */
191  thread_scoped_lock lock(device_mutex);
193 
194 #if defined(WITH_CUDA) || defined(WITH_OPTIX)
196  if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
197  if (device_cuda_init()) {
198  device_cuda_info(cuda_devices);
199  }
200  devices_initialized_mask |= DEVICE_MASK_CUDA;
201  }
202  if (mask & DEVICE_MASK_CUDA) {
203  foreach (DeviceInfo &info, cuda_devices) {
204  devices.push_back(info);
205  }
206  }
207  }
208 #endif
209 
210 #ifdef WITH_OPTIX
211  if (mask & DEVICE_MASK_OPTIX) {
212  if (!(devices_initialized_mask & DEVICE_MASK_OPTIX)) {
213  if (device_optix_init()) {
214  device_optix_info(cuda_devices, optix_devices);
215  }
216  devices_initialized_mask |= DEVICE_MASK_OPTIX;
217  }
218  foreach (DeviceInfo &info, optix_devices) {
219  devices.push_back(info);
220  }
221  }
222 #endif
223 
224 #ifdef WITH_HIP
225  if (mask & DEVICE_MASK_HIP) {
226  if (!(devices_initialized_mask & DEVICE_MASK_HIP)) {
227  if (device_hip_init()) {
228  device_hip_info(hip_devices);
229  }
230  devices_initialized_mask |= DEVICE_MASK_HIP;
231  }
232  foreach (DeviceInfo &info, hip_devices) {
233  devices.push_back(info);
234  }
235  }
236 #endif
237 
238 #ifdef WITH_ONEAPI
239  if (mask & DEVICE_MASK_ONEAPI) {
240  if (!(devices_initialized_mask & DEVICE_MASK_ONEAPI)) {
241  if (device_oneapi_init()) {
242  device_oneapi_info(oneapi_devices);
243  }
244  devices_initialized_mask |= DEVICE_MASK_ONEAPI;
245  }
246  foreach (DeviceInfo &info, oneapi_devices) {
247  devices.push_back(info);
248  }
249  }
250 #endif
251 
252  if (mask & DEVICE_MASK_CPU) {
253  if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
254  device_cpu_info(cpu_devices);
255  devices_initialized_mask |= DEVICE_MASK_CPU;
256  }
257  foreach (DeviceInfo &info, cpu_devices) {
258  devices.push_back(info);
259  }
260  }
261 
262 #ifdef WITH_METAL
263  if (mask & DEVICE_MASK_METAL) {
264  if (!(devices_initialized_mask & DEVICE_MASK_METAL)) {
265  if (device_metal_init()) {
266  device_metal_info(metal_devices);
267  }
268  devices_initialized_mask |= DEVICE_MASK_METAL;
269  }
270  foreach (DeviceInfo &info, metal_devices) {
271  devices.push_back(info);
272  }
273  }
274 #endif
275 
276  return devices;
277 }
278 
279 DeviceInfo Device::dummy_device(const string &error_msg)
280 {
284  return info;
285 }
286 
288 {
289  thread_scoped_lock lock(device_mutex);
290  string capabilities = "";
291 
292  if (mask & DEVICE_MASK_CPU) {
293  capabilities += "\nCPU device capabilities: ";
294  capabilities += device_cpu_capabilities() + "\n";
295  }
296 
297 #ifdef WITH_CUDA
298  if (mask & DEVICE_MASK_CUDA) {
299  if (device_cuda_init()) {
300  capabilities += "\nCUDA device capabilities:\n";
301  capabilities += device_cuda_capabilities();
302  }
303  }
304 #endif
305 
306 #ifdef WITH_HIP
307  if (mask & DEVICE_MASK_HIP) {
308  if (device_hip_init()) {
309  capabilities += "\nHIP device capabilities:\n";
310  capabilities += device_hip_capabilities();
311  }
312  }
313 #endif
314 
315 #ifdef WITH_ONEAPI
316  if (mask & DEVICE_MASK_ONEAPI) {
317  if (device_oneapi_init()) {
318  capabilities += "\noneAPI device capabilities:\n";
319  capabilities += device_oneapi_capabilities();
320  }
321  }
322 #endif
323 
324 #ifdef WITH_METAL
325  if (mask & DEVICE_MASK_METAL) {
326  if (device_metal_init()) {
327  capabilities += "\nMetal device capabilities:\n";
328  capabilities += device_metal_capabilities();
329  }
330  }
331 #endif
332 
333  return capabilities;
334 }
335 
337  int threads,
338  bool background)
339 {
340  assert(subdevices.size() > 0);
341 
342  if (subdevices.size() == 1) {
343  /* No multi device needed. */
344  return subdevices.front();
345  }
346 
349  info.id = "MULTI";
350  info.description = "Multi Device";
351  info.num = 0;
352 
353  info.has_nanovdb = true;
354  info.has_osl = true;
355  info.has_profiling = true;
356  info.has_peer_memory = false;
357  info.use_metalrt = false;
359 
360  foreach (const DeviceInfo &device, subdevices) {
361  /* Ensure CPU device does not slow down GPU. */
362  if (device.type == DEVICE_CPU && subdevices.size() > 1) {
363  if (background) {
364  int orig_cpu_threads = (threads) ? threads : TaskScheduler::max_concurrency();
365  int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), size_t(0));
366 
367  VLOG_INFO << "CPU render threads reduced from " << orig_cpu_threads << " to "
368  << cpu_threads << ", to dedicate to GPU.";
369 
370  if (cpu_threads >= 1) {
371  DeviceInfo cpu_device = device;
372  cpu_device.cpu_threads = cpu_threads;
373  info.multi_devices.push_back(cpu_device);
374  }
375  else {
376  continue;
377  }
378  }
379  else {
380  VLOG_INFO << "CPU render threads disabled for interactive render.";
381  continue;
382  }
383  }
384  else {
385  info.multi_devices.push_back(device);
386  }
387 
388  /* Create unique ID for this combination of devices. */
389  info.id += device.id;
390 
391  /* Set device type to MULTI if subdevices are not of a common type. */
392  if (info.type == DEVICE_NONE) {
393  info.type = device.type;
394  }
395  else if (device.type != info.type) {
397  }
398 
399  /* Accumulate device info. */
400  info.has_nanovdb &= device.has_nanovdb;
401  info.has_osl &= device.has_osl;
402  info.has_profiling &= device.has_profiling;
404  info.use_metalrt |= device.use_metalrt;
405  info.denoisers &= device.denoisers;
406  }
407 
408  return info;
409 }
410 
412 {
413  free_memory();
414 }
415 
417 {
418  devices_initialized_mask = 0;
419  cuda_devices.free_memory();
420  optix_devices.free_memory();
421  hip_devices.free_memory();
422  oneapi_devices.free_memory();
423  cpu_devices.free_memory();
424  metal_devices.free_memory();
425 }
426 
427 unique_ptr<DeviceQueue> Device::gpu_queue_create()
428 {
429  LOG(FATAL) << "Device does not support queues.";
430  return nullptr;
431 }
432 
434 {
435  /* Initialize CPU kernels once and reuse. */
436  static CPUKernels kernels;
437  return kernels;
438 }
439 
441  vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/)
442 {
443  LOG(FATAL) << "Device does not support CPU kernels.";
444 }
445 
447 {
448  return nullptr;
449 }
450 
451 /* DeviceInfo */
452 
unsigned int uint
Definition: BLI_sys_types.h:67
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum type
volatile int lock
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
Definition: bvh2.h:33
void refit(Progress &progress)
Definition: bvh2.cpp:89
void build(Progress &progress, Stats *stats)
Definition: bvh2.cpp:37
BVHLayout bvh_layout
Definition: params.h:80
Definition: bvh/bvh.h:63
BVHParams params
Definition: bvh/bvh.h:65
vector< DeviceInfo > multi_devices
Definition: device/device.h:75
DenoiserTypeMask denoisers
Definition: device/device.h:73
string error_msg
Definition: device/device.h:76
bool has_peer_memory
Definition: device/device.h:70
bool has_nanovdb
Definition: device/device.h:67
bool has_profiling
Definition: device/device.h:69
DeviceType type
Definition: device/device.h:62
bool use_metalrt
Definition: device/device.h:72
string description
Definition: device/device.h:63
static void free_memory()
static DeviceInfo dummy_device(const string &error_msg="")
static void tag_update()
static vector< DeviceInfo > available_devices(uint device_type_mask=DEVICE_MASK_ALL)
static const CPUKernels & get_cpu_kernels()
virtual void get_cpu_kernel_thread_globals(vector< CPUKernelThreadGlobals > &)
string error_msg
static Device * create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
virtual ~Device() noexcept(false)
virtual unique_ptr< DeviceQueue > gpu_queue_create()
Profiler & profiler
Stats & stats
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
static DeviceType type_from_string(const char *name)
virtual void * get_cpu_osl_memory()
static string device_capabilities(uint device_type_mask=DEVICE_MASK_ALL)
DeviceInfo info
static vector< DeviceType > available_types()
static string string_from_type(DeviceType type)
static DeviceInfo get_multi_device(const vector< DeviceInfo > &subdevices, int threads, bool background)
static int max_concurrency()
Definition: task.cpp:99
void free_memory()
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
@ DENOISER_ALL
Definition: denoise.h:18
void device_cpu_info(vector< DeviceInfo > &devices)
CCL_NAMESPACE_BEGIN Device * device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
string device_cpu_capabilities()
void device_cuda_info(vector< DeviceInfo > &devices)
string device_cuda_capabilities()
Device * device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
CCL_NAMESPACE_BEGIN bool device_cuda_init()
@ DEVICE_MASK_OPTIX
Definition: device/device.h:51
@ DEVICE_MASK_CPU
Definition: device/device.h:49
@ DEVICE_MASK_HIP
Definition: device/device.h:52
@ DEVICE_MASK_CUDA
Definition: device/device.h:50
@ DEVICE_MASK_METAL
Definition: device/device.h:53
@ DEVICE_MASK_ONEAPI
Definition: device/device.h:54
DeviceType
Definition: device/device.h:36
@ DEVICE_DUMMY
Definition: device/device.h:45
@ DEVICE_NONE
Definition: device/device.h:37
@ DEVICE_METAL
Definition: device/device.h:43
@ DEVICE_MULTI
Definition: device/device.h:40
@ DEVICE_CUDA
Definition: device/device.h:39
@ DEVICE_CPU
Definition: device/device.h:38
@ DEVICE_OPTIX
Definition: device/device.h:41
@ DEVICE_HIP
Definition: device/device.h:42
@ DEVICE_ONEAPI
Definition: device/device.h:44
Device * device_dummy_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
void device_hip_info(vector< DeviceInfo > &devices)
Device * device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
string device_hip_capabilities()
CCL_NAMESPACE_BEGIN bool device_hip_init()
string device_metal_capabilities()
Definition: device.mm:97
Device * device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
Definition: device.mm:83
bool device_metal_init()
Definition: device.mm:88
void device_metal_info(vector< DeviceInfo > &devices)
Definition: device.mm:93
Device * device_multi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
Device * device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
void device_oneapi_info(vector< DeviceInfo > &devices)
CCL_NAMESPACE_BEGIN bool device_oneapi_init()
string device_oneapi_capabilities()
Device * device_optix_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
CCL_NAMESPACE_BEGIN bool device_optix_init()
void device_optix_info(const vector< DeviceInfo > &cuda_devices, vector< DeviceInfo > &devices)
@ BVH_LAYOUT_BVH2
#define VLOG_INFO
Definition: log.h:77
#define LOG(severity)
Definition: log.h:36
static char ** types
Definition: makesdna.c:67
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
Definition: math_float4.h:513
Vector< CPUDevice > devices
list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
ListBase threads
list of all thread for every CPUDevice in cpudevices a thread exists.
std::unique_lock< std::mutex > thread_scoped_lock
Definition: thread.h:28
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex
Definition: thread.h:27
float max