Blender  V3.3
oneapi/device_impl.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2021-2022 Intel Corporation */
3 
4 #ifdef WITH_ONEAPI
5 
7 
8 # include "util/debug.h"
9 # include "util/log.h"
10 
12 
14 
15 static void queue_error_cb(const char *message, void *user_ptr)
16 {
17  if (user_ptr) {
18  *reinterpret_cast<std::string *>(user_ptr) = message;
19  }
20 }
21 
22 OneapiDevice::OneapiDevice(const DeviceInfo &info,
23  OneAPIDLLInterface &oneapi_dll_object,
24  Stats &stats,
25  Profiler &profiler)
26  : Device(info, stats, profiler),
27  device_queue_(nullptr),
28  texture_info_(this, "texture_info", MEM_GLOBAL),
29  kg_memory_(nullptr),
30  kg_memory_device_(nullptr),
31  kg_memory_size_(0),
32  oneapi_dll_(oneapi_dll_object)
33 {
34  need_texture_info_ = false;
35 
36  oneapi_dll_.oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
37 
38  /* OneAPI calls should be initialized on this moment. */
39  assert(oneapi_dll_.oneapi_create_queue != nullptr);
40 
41  bool is_finished_ok = oneapi_dll_.oneapi_create_queue(device_queue_, info.num);
42  if (is_finished_ok == false) {
43  set_error("oneAPI queue initialization error: got runtime exception \"" +
44  oneapi_error_string_ + "\"");
45  }
46  else {
47  VLOG_DEBUG << "oneAPI queue has been successfully created for the device \""
48  << info.description << "\"";
49  assert(device_queue_);
50  }
51 
52  size_t globals_segment_size;
53  is_finished_ok = oneapi_dll_.oneapi_kernel_globals_size(device_queue_, globals_segment_size);
54  if (is_finished_ok == false) {
55  set_error("oneAPI constant memory initialization got runtime exception \"" +
56  oneapi_error_string_ + "\"");
57  }
58  else {
59  VLOG_DEBUG << "Successfully created global/constant memory segment (kernel globals object)";
60  }
61 
62  kg_memory_ = oneapi_dll_.oneapi_usm_aligned_alloc_host(device_queue_, globals_segment_size, 16);
63  oneapi_dll_.oneapi_usm_memset(device_queue_, kg_memory_, 0, globals_segment_size);
64 
65  kg_memory_device_ = oneapi_dll_.oneapi_usm_alloc_device(device_queue_, globals_segment_size);
66 
67  kg_memory_size_ = globals_segment_size;
68 
69  max_memory_on_device_ = oneapi_dll_.oneapi_get_memcapacity(device_queue_);
70 }
71 
72 OneapiDevice::~OneapiDevice()
73 {
74  texture_info_.free();
75  oneapi_dll_.oneapi_usm_free(device_queue_, kg_memory_);
76  oneapi_dll_.oneapi_usm_free(device_queue_, kg_memory_device_);
77 
78  for (ConstMemMap::iterator mt = const_mem_map_.begin(); mt != const_mem_map_.end(); mt++)
79  delete mt->second;
80 
81  if (device_queue_)
82  oneapi_dll_.oneapi_free_queue(device_queue_);
83 }
84 
85 bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
86 {
87  return false;
88 }
89 
90 BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const
91 {
92  return BVH_LAYOUT_BVH2;
93 }
94 
95 bool OneapiDevice::load_kernels(const uint requested_features)
96 {
97  assert(device_queue_);
98  /* NOTE(@nsirgien): oneAPI can support compilation of kernel code with certain feature set
99  * with specialization constants, but it hasn't been implemented yet. */
100  (void)requested_features;
101 
102  bool is_finished_ok = oneapi_dll_.oneapi_run_test_kernel(device_queue_);
103  if (is_finished_ok == false) {
104  set_error("oneAPI kernel load: got runtime exception \"" + oneapi_error_string_ + "\"");
105  }
106  else {
107  VLOG_INFO << "Runtime compilation done for \"" << info.description << "\"";
108  assert(device_queue_);
109  }
110  return is_finished_ok;
111 }
112 
113 void OneapiDevice::load_texture_info()
114 {
115  if (need_texture_info_) {
116  need_texture_info_ = false;
117  texture_info_.copy_to_device();
118  }
119 }
120 
121 void OneapiDevice::generic_alloc(device_memory &mem)
122 {
123  size_t memory_size = mem.memory_size();
124 
125  /* TODO(@nsirgien): In future, if scene doesn't fit into device memory, then
126  * we can use USM host memory.
127  * Because of the expected performance impact, implementation of this has had a low priority
128  * and is not implemented yet. */
129 
130  assert(device_queue_);
131  /* NOTE(@nsirgien): There are three types of Unified Shared Memory (USM) in oneAPI: host, device
132  * and shared. For new project it maybe more beneficial to use USM shared memory, because it
133  * provides automatic migration mechanism in order to allow to use the same pointer on host and
134  * on device, without need to worry about explicit memory transfer operations. But for
135  * Blender/Cycles this type of memory is not very suitable in current application architecture,
136  * because Cycles already uses two different pointer for host activity and device activity, and
137  * also has to perform all needed memory transfer operations. So, USM device memory
138  * type has been used for oneAPI device in order to better fit in Cycles architecture. */
139  void *device_pointer = nullptr;
140  if (mem.memory_size() + stats.mem_used < max_memory_on_device_)
141  device_pointer = oneapi_dll_.oneapi_usm_alloc_device(device_queue_, memory_size);
142  if (device_pointer == nullptr) {
143  set_error("oneAPI kernel - device memory allocation error for " +
145  ", possibly caused by lack of available memory space on the device: " +
146  string_human_readable_size(stats.mem_used) + " of " +
147  string_human_readable_size(max_memory_on_device_) + " is already allocated");
148  }
149 
150  mem.device_pointer = reinterpret_cast<ccl::device_ptr>(device_pointer);
151  mem.device_size = memory_size;
152 
153  stats.mem_alloc(memory_size);
154 }
155 
156 void OneapiDevice::generic_copy_to(device_memory &mem)
157 {
158  if (!mem.device_pointer) {
159  return;
160  }
161  size_t memory_size = mem.memory_size();
162 
163  /* Copy operation from host shouldn't be requested if there is no memory allocated on host. */
164  assert(mem.host_pointer);
165  assert(device_queue_);
166  oneapi_dll_.oneapi_usm_memcpy(
167  device_queue_, (void *)mem.device_pointer, (void *)mem.host_pointer, memory_size);
168 }
169 
170 /* TODO: Make sycl::queue part of OneapiQueue and avoid using pointers to sycl::queue. */
171 SyclQueue *OneapiDevice::sycl_queue()
172 {
173  return device_queue_;
174 }
175 
176 string OneapiDevice::oneapi_error_message()
177 {
178  return string(oneapi_error_string_);
179 }
180 
181 OneAPIDLLInterface OneapiDevice::oneapi_dll_object()
182 {
183  return oneapi_dll_;
184 }
185 
186 void *OneapiDevice::kernel_globals_device_pointer()
187 {
188  return kg_memory_device_;
189 }
190 
191 void OneapiDevice::generic_free(device_memory &mem)
192 {
193  if (!mem.device_pointer) {
194  return;
195  }
196 
197  stats.mem_free(mem.device_size);
198  mem.device_size = 0;
199 
200  assert(device_queue_);
201  oneapi_dll_.oneapi_usm_free(device_queue_, (void *)mem.device_pointer);
202  mem.device_pointer = 0;
203 }
204 
205 void OneapiDevice::mem_alloc(device_memory &mem)
206 {
207  if (mem.type == MEM_TEXTURE) {
208  assert(!"mem_alloc not supported for textures.");
209  }
210  else if (mem.type == MEM_GLOBAL) {
211  assert(!"mem_alloc not supported for global memory.");
212  }
213  else {
214  if (mem.name) {
215  VLOG_DEBUG << "OneapiDevice::mem_alloc: \"" << mem.name << "\", "
216  << string_human_readable_number(mem.memory_size()) << " bytes. ("
217  << string_human_readable_size(mem.memory_size()) << ")";
218  }
219  generic_alloc(mem);
220  }
221 }
222 
223 void OneapiDevice::mem_copy_to(device_memory &mem)
224 {
225  if (mem.name) {
226  VLOG_DEBUG << "OneapiDevice::mem_copy_to: \"" << mem.name << "\", "
227  << string_human_readable_number(mem.memory_size()) << " bytes. ("
228  << string_human_readable_size(mem.memory_size()) << ")";
229  }
230 
231  if (mem.type == MEM_GLOBAL) {
232  global_free(mem);
233  global_alloc(mem);
234  }
235  else if (mem.type == MEM_TEXTURE) {
236  tex_free((device_texture &)mem);
237  tex_alloc((device_texture &)mem);
238  }
239  else {
240  if (!mem.device_pointer)
241  mem_alloc(mem);
242 
243  generic_copy_to(mem);
244  }
245 }
246 
247 void OneapiDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)
248 {
249  if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
250  assert(!"mem_copy_from not supported for textures.");
251  }
252  else if (mem.host_pointer) {
253  const size_t size = (w > 0 || h > 0 || elem > 0) ? (elem * w * h) : mem.memory_size();
254  const size_t offset = elem * y * w;
255 
256  if (mem.name) {
257  VLOG_DEBUG << "OneapiDevice::mem_copy_from: \"" << mem.name << "\" object of "
258  << string_human_readable_number(mem.memory_size()) << " bytes. ("
259  << string_human_readable_size(mem.memory_size()) << ") from offset " << offset
260  << " data " << size << " bytes";
261  }
262 
263  assert(device_queue_);
264 
265  assert(size != 0);
266  if (mem.device_pointer) {
267  char *shifted_host = reinterpret_cast<char *>(mem.host_pointer) + offset;
268  char *shifted_device = reinterpret_cast<char *>(mem.device_pointer) + offset;
269  bool is_finished_ok = oneapi_dll_.oneapi_usm_memcpy(
270  device_queue_, shifted_host, shifted_device, size);
271  if (is_finished_ok == false) {
272  set_error("oneAPI memory operation error: got runtime exception \"" +
273  oneapi_error_string_ + "\"");
274  }
275  }
276  }
277 }
278 
279 void OneapiDevice::mem_zero(device_memory &mem)
280 {
281  if (mem.name) {
282  VLOG_DEBUG << "OneapiDevice::mem_zero: \"" << mem.name << "\", "
283  << string_human_readable_number(mem.memory_size()) << " bytes. ("
284  << string_human_readable_size(mem.memory_size()) << ")\n";
285  }
286 
287  if (!mem.device_pointer) {
288  mem_alloc(mem);
289  }
290  if (!mem.device_pointer) {
291  return;
292  }
293 
294  assert(device_queue_);
295  bool is_finished_ok = oneapi_dll_.oneapi_usm_memset(
296  device_queue_, (void *)mem.device_pointer, 0, mem.memory_size());
297  if (is_finished_ok == false) {
298  set_error("oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ +
299  "\"");
300  }
301 }
302 
303 void OneapiDevice::mem_free(device_memory &mem)
304 {
305  if (mem.name) {
306  VLOG_DEBUG << "OneapiDevice::mem_free: \"" << mem.name << "\", "
307  << string_human_readable_number(mem.device_size) << " bytes. ("
308  << string_human_readable_size(mem.device_size) << ")\n";
309  }
310 
311  if (mem.type == MEM_GLOBAL) {
312  global_free(mem);
313  }
314  else if (mem.type == MEM_TEXTURE) {
315  tex_free((device_texture &)mem);
316  }
317  else {
318  generic_free(mem);
319  }
320 }
321 
322 device_ptr OneapiDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
323 {
324  return reinterpret_cast<device_ptr>(reinterpret_cast<char *>(mem.device_pointer) +
326 }
327 
328 void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
329 {
330  assert(name);
331 
332  VLOG_DEBUG << "OneapiDevice::const_copy_to \"" << name << "\" object "
333  << string_human_readable_number(size) << " bytes. ("
334  << string_human_readable_size(size) << ")";
335 
336  ConstMemMap::iterator i = const_mem_map_.find(name);
338 
339  if (i == const_mem_map_.end()) {
340  data = new device_vector<uchar>(this, name, MEM_READ_ONLY);
341  data->alloc(size);
342  const_mem_map_.insert(ConstMemMap::value_type(name, data));
343  }
344  else {
345  data = i->second;
346  }
347 
348  assert(data->memory_size() <= size);
349  memcpy(data->data(), host, size);
350  data->copy_to_device();
351 
352  oneapi_dll_.oneapi_set_global_memory(
353  device_queue_, kg_memory_, name, (void *)data->device_pointer);
354 
355  oneapi_dll_.oneapi_usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
356 }
357 
358 void OneapiDevice::global_alloc(device_memory &mem)
359 {
360  assert(mem.name);
361 
362  size_t size = mem.memory_size();
363  VLOG_DEBUG << "OneapiDevice::global_alloc \"" << mem.name << "\" object "
364  << string_human_readable_number(size) << " bytes. ("
365  << string_human_readable_size(size) << ")";
366 
367  generic_alloc(mem);
368  generic_copy_to(mem);
369 
370  oneapi_dll_.oneapi_set_global_memory(
371  device_queue_, kg_memory_, mem.name, (void *)mem.device_pointer);
372 
373  oneapi_dll_.oneapi_usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
374 }
375 
376 void OneapiDevice::global_free(device_memory &mem)
377 {
378  if (mem.device_pointer) {
379  generic_free(mem);
380  }
381 }
382 
383 void OneapiDevice::tex_alloc(device_texture &mem)
384 {
385  generic_alloc(mem);
386  generic_copy_to(mem);
387 
388  /* Resize if needed. Also, in case of resize - allocate in advance for future allocs. */
389  const uint slot = mem.slot;
390  if (slot >= texture_info_.size()) {
391  texture_info_.resize(slot + 128);
392  }
393 
394  texture_info_[slot] = mem.info;
395  need_texture_info_ = true;
396 
397  texture_info_[slot].data = (uint64_t)mem.device_pointer;
398 }
399 
400 void OneapiDevice::tex_free(device_texture &mem)
401 {
402  /* There is no texture memory in SYCL. */
403  if (mem.device_pointer) {
404  generic_free(mem);
405  }
406 }
407 
408 unique_ptr<DeviceQueue> OneapiDevice::gpu_queue_create()
409 {
410  return make_unique<OneapiDeviceQueue>(this);
411 }
412 
413 int OneapiDevice::get_num_multiprocessors()
414 {
415  assert(device_queue_);
416  return oneapi_dll_.oneapi_get_num_multiprocessors(device_queue_);
417 }
418 
419 int OneapiDevice::get_max_num_threads_per_multiprocessor()
420 {
421  assert(device_queue_);
422  return oneapi_dll_.oneapi_get_max_num_threads_per_multiprocessor(device_queue_);
423 }
424 
425 bool OneapiDevice::should_use_graphics_interop()
426 {
427  /* NOTE(@nsirgien): oneAPI doesn't yet support direct writing into graphics API objects, so
428  * return false. */
429  return false;
430 }
431 
432 void *OneapiDevice::usm_aligned_alloc_host(size_t memory_size, size_t alignment)
433 {
434  assert(device_queue_);
435  return oneapi_dll_.oneapi_usm_aligned_alloc_host(device_queue_, memory_size, alignment);
436 }
437 
438 void OneapiDevice::usm_free(void *usm_ptr)
439 {
440  assert(device_queue_);
441  return oneapi_dll_.oneapi_usm_free(device_queue_, usm_ptr);
442 }
443 
445 
446 #endif
unsigned int uint
Definition: BLI_sys_types.h:67
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
string description
Definition: device/device.h:63
size_t mem_used
Definition: util/stats.h:35
void mem_free(size_t size)
Definition: util/stats.h:29
void mem_alloc(size_t size)
Definition: util/stats.h:23
size_t memory_elements_size(int elements)
device_ptr device_pointer
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
@ MEM_GLOBAL
@ MEM_TEXTURE
@ MEM_READ_ONLY
SyclQueue void void size_t num_bytes void
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
@ BVH_LAYOUT_BVH2
#define VLOG_INFO
Definition: log.h:77
#define VLOG_DEBUG
Definition: log.h:86
int BVHLayoutMask
Definition: params.h:47
unsigned __int64 uint64_t
Definition: stdint.h:90
string string_human_readable_size(size_t size)
Definition: string.cpp:229
string string_human_readable_number(size_t num)
Definition: string.cpp:248
uint64_t data
Definition: util/texture.h:74
uint64_t device_ptr
Definition: util/types.h:43