Blender  V3.3
cycles/device/memory.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 #ifndef __DEVICE_MEMORY_H__
5 #define __DEVICE_MEMORY_H__
6 
7 /* Device Memory
8  *
9  * Data types for allocating, copying and freeing device memory. */
10 
11 #include "util/array.h"
12 #include "util/half.h"
13 #include "util/string.h"
14 #include "util/texture.h"
15 #include "util/types.h"
16 #include "util/vector.h"
17 
19 
20 class Device;
21 
22 enum MemoryType {
28 };
29 
30 /* Supported Data Types */
31 
32 enum DataType {
41 };
42 
43 static constexpr size_t datatype_size(DataType datatype)
44 {
45  switch (datatype) {
46  case TYPE_UNKNOWN:
47  return 1;
48  case TYPE_UCHAR:
49  return sizeof(uchar);
50  case TYPE_FLOAT:
51  return sizeof(float);
52  case TYPE_UINT:
53  return sizeof(uint);
54  case TYPE_UINT16:
55  return sizeof(uint16_t);
56  case TYPE_INT:
57  return sizeof(int);
58  case TYPE_HALF:
59  return sizeof(half);
60  case TYPE_UINT64:
61  return sizeof(uint64_t);
62  default:
63  return 0;
64  }
65 }
66 
67 /* Traits for data types */
68 
69 template<typename T> struct device_type_traits {
71  static const size_t num_elements = sizeof(T);
72 };
73 
74 template<> struct device_type_traits<uchar> {
75  static const DataType data_type = TYPE_UCHAR;
76  static const size_t num_elements = 1;
77  static_assert(sizeof(uchar) == num_elements * datatype_size(data_type));
78 };
79 
80 template<> struct device_type_traits<uchar2> {
81  static const DataType data_type = TYPE_UCHAR;
82  static const size_t num_elements = 2;
83  static_assert(sizeof(uchar2) == num_elements * datatype_size(data_type));
84 };
85 
86 template<> struct device_type_traits<uchar3> {
87  static const DataType data_type = TYPE_UCHAR;
88  static const size_t num_elements = 3;
89  static_assert(sizeof(uchar3) == num_elements * datatype_size(data_type));
90 };
91 
92 template<> struct device_type_traits<uchar4> {
93  static const DataType data_type = TYPE_UCHAR;
94  static const size_t num_elements = 4;
95  static_assert(sizeof(uchar4) == num_elements * datatype_size(data_type));
96 };
97 
98 template<> struct device_type_traits<uint> {
99  static const DataType data_type = TYPE_UINT;
100  static const size_t num_elements = 1;
101  static_assert(sizeof(uint) == num_elements * datatype_size(data_type));
102 };
103 
104 template<> struct device_type_traits<uint2> {
105  static const DataType data_type = TYPE_UINT;
106  static const size_t num_elements = 2;
107  static_assert(sizeof(uint2) == num_elements * datatype_size(data_type));
108 };
109 
110 template<> struct device_type_traits<uint3> {
111  static const DataType data_type = TYPE_UINT;
112  static const size_t num_elements = 3;
113  static_assert(sizeof(uint3) == num_elements * datatype_size(data_type));
114 };
115 
116 template<> struct device_type_traits<uint4> {
117  static const DataType data_type = TYPE_UINT;
118  static const size_t num_elements = 4;
119  static_assert(sizeof(uint4) == num_elements * datatype_size(data_type));
120 };
121 
122 template<> struct device_type_traits<int> {
123  static const DataType data_type = TYPE_INT;
124  static const size_t num_elements = 1;
125  static_assert(sizeof(int) == num_elements * datatype_size(data_type));
126 };
127 
128 template<> struct device_type_traits<int2> {
129  static const DataType data_type = TYPE_INT;
130  static const size_t num_elements = 2;
131  static_assert(sizeof(int2) == num_elements * datatype_size(data_type));
132 };
133 
134 template<> struct device_type_traits<int3> {
135  static const DataType data_type = TYPE_INT;
136  static const size_t num_elements = 4;
137  static_assert(sizeof(int3) == num_elements * datatype_size(data_type));
138 };
139 
140 template<> struct device_type_traits<int4> {
141  static const DataType data_type = TYPE_INT;
142  static const size_t num_elements = 4;
143  static_assert(sizeof(int4) == num_elements * datatype_size(data_type));
144 };
145 
146 template<> struct device_type_traits<float> {
147  static const DataType data_type = TYPE_FLOAT;
148  static const size_t num_elements = 1;
149  static_assert(sizeof(float) == num_elements * datatype_size(data_type));
150 };
151 
152 template<> struct device_type_traits<float2> {
153  static const DataType data_type = TYPE_FLOAT;
154  static const size_t num_elements = 2;
155  static_assert(sizeof(float2) == num_elements * datatype_size(data_type));
156 };
157 
158 template<> struct device_type_traits<float3> {
159  /* float3 has different size depending on the device, can't use it for interchanging
160  * memory between CPU and GPU.
161  *
162  * Leave body empty to trigger a compile error if used. */
163 };
164 
165 template<> struct device_type_traits<packed_float3> {
166  static const DataType data_type = TYPE_FLOAT;
167  static const size_t num_elements = 3;
168  static_assert(sizeof(packed_float3) == num_elements * datatype_size(data_type));
169 };
170 
171 template<> struct device_type_traits<float4> {
172  static const DataType data_type = TYPE_FLOAT;
173  static const size_t num_elements = 4;
174  static_assert(sizeof(float4) == num_elements * datatype_size(data_type));
175 };
176 
177 template<> struct device_type_traits<half> {
178  static const DataType data_type = TYPE_HALF;
179  static const size_t num_elements = 1;
180  static_assert(sizeof(half) == num_elements * datatype_size(data_type));
181 };
182 
183 template<> struct device_type_traits<ushort4> {
185  static const size_t num_elements = 4;
186  static_assert(sizeof(ushort4) == num_elements * datatype_size(data_type));
187 };
188 
189 template<> struct device_type_traits<uint16_t> {
191  static const size_t num_elements = 1;
192  static_assert(sizeof(uint16_t) == num_elements * datatype_size(data_type));
193 };
194 
195 template<> struct device_type_traits<half4> {
196  static const DataType data_type = TYPE_HALF;
197  static const size_t num_elements = 4;
198  static_assert(sizeof(half4) == num_elements * datatype_size(data_type));
199 };
200 
201 template<> struct device_type_traits<uint64_t> {
203  static const size_t num_elements = 1;
204  static_assert(sizeof(uint64_t) == num_elements * datatype_size(data_type));
205 };
206 
207 /* Device Memory
208  *
209  * Base class for all device memory. This should not be allocated directly,
210  * instead the appropriate subclass can be used. */
211 
213  public:
214  size_t memory_size()
215  {
217  }
218  size_t memory_elements_size(int elements)
219  {
220  return elements * data_elements * datatype_size(data_type);
221  }
222 
223  /* Data information. */
226  size_t data_size;
227  size_t device_size;
228  size_t data_width;
229  size_t data_height;
230  size_t data_depth;
232  const char *name;
233 
234  /* Pointers. */
239  /* reference counter for shared_pointer */
241 
242  virtual ~device_memory();
243 
244  void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr);
245  void restore_device();
246 
247  bool is_resident(Device *sub_device) const;
248 
249  protected:
250  friend class CUDADevice;
251  friend class OptiXDevice;
252  friend class HIPDevice;
253  friend class MetalDevice;
254 
255  /* Only create through subclasses. */
256  device_memory(Device *device, const char *name, MemoryType type);
257 
258  /* No copying and allowed.
259  *
260  * This is because device implementation might need to register device memory in an allocation
261  * map of some sort and use pointer as a key to identify blocks. Moving data from one place to
262  * another bypassing device allocation routines will make those maps hard to maintain. */
263  device_memory(const device_memory &) = delete;
264  device_memory(device_memory &&other) noexcept = delete;
267 
268  /* Host allocation on the device. All host_pointer memory should be
269  * allocated with these functions, for devices that support using
270  * the same pointer for host and device. */
271  void *host_alloc(size_t size);
272  void host_free();
273 
274  /* Device memory allocation and copying. */
275  void device_alloc();
276  void device_free();
277  void device_copy_to();
278  void device_copy_from(size_t y, size_t w, size_t h, size_t elem);
279  void device_zero();
280 
281  bool device_is_cpu();
282 
287  bool modified;
288 };
289 
290 /* Device Only Memory
291  *
292  * Working memory only needed by the device, with no corresponding allocation
293  * on the host. Only used internally in the device implementations. */
294 
295 template<typename T> class device_only_memory : public device_memory {
296  public:
297  device_only_memory(Device *device, const char *name, bool allow_host_memory_fallback = false)
298  : device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY)
299  {
302  }
303 
304  device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other))
305  {
306  }
307 
309  {
310  free();
311  }
312 
313  void alloc_to_device(size_t num, bool shrink_to_fit = true)
314  {
315  size_t new_size = num;
316  bool reallocate;
317 
318  if (shrink_to_fit) {
319  reallocate = (data_size != new_size);
320  }
321  else {
322  reallocate = (data_size < new_size);
323  }
324 
325  if (reallocate) {
326  device_free();
327  data_size = new_size;
328  device_alloc();
329  }
330  }
331 
332  void free()
333  {
334  device_free();
335  data_size = 0;
336  }
337 
339  {
340  device_zero();
341  }
342 };
343 
344 /* Device Vector
345  *
346  * Data vector to exchange data between host and device. Memory will be
347  * allocated on the host first with alloc() and resize, and then filled
348  * in and copied to the device with copy_to_device(). Or alternatively
349  * allocated and set to zero on the device with zero_to_device().
350  *
351  * When using memory type MEM_GLOBAL, a pointer to this memory will be
352  * automatically attached to kernel globals, using the provided name
353  * matching an entry in kernel/data_arrays.h. */
354 
355 template<typename T> class device_vector : public device_memory {
356  public:
359  {
362  modified = true;
363  need_realloc_ = true;
364 
365  assert(data_elements > 0);
366  }
367 
368  virtual ~device_vector()
369  {
370  free();
371  }
372 
373  /* Host memory allocation. */
374  T *alloc(size_t width, size_t height = 0, size_t depth = 0)
375  {
376  size_t new_size = size(width, height, depth);
377 
378  if (new_size != data_size) {
379  device_free();
380  host_free();
381  host_pointer = host_alloc(sizeof(T) * new_size);
382  modified = true;
383  assert(device_pointer == 0);
384  }
385 
386  data_size = new_size;
387  data_width = width;
389  data_depth = depth;
390 
391  return data();
392  }
393 
394  /* Host memory resize. Only use this if the original data needs to be
395  * preserved, it is faster to call alloc() if it can be discarded. */
396  T *resize(size_t width, size_t height = 0, size_t depth = 0)
397  {
398  size_t new_size = size(width, height, depth);
399 
400  if (new_size != data_size) {
401  void *new_ptr = host_alloc(sizeof(T) * new_size);
402 
403  if (new_size && data_size) {
404  size_t min_size = ((new_size < data_size) ? new_size : data_size);
405  memcpy((T *)new_ptr, (T *)host_pointer, sizeof(T) * min_size);
406  }
407 
408  device_free();
409  host_free();
410  host_pointer = new_ptr;
411  assert(device_pointer == 0);
412  }
413 
414  data_size = new_size;
415  data_width = width;
417  data_depth = depth;
418 
419  return data();
420  }
421 
422  /* Take over data from an existing array. */
424  {
425  device_free();
426  host_free();
427 
428  data_size = from.size();
429  data_width = 0;
430  data_height = 0;
431  data_depth = 0;
432  host_pointer = from.steal_pointer();
433  assert(device_pointer == 0);
434  }
435 
436  void give_data(array<T> &to)
437  {
438  device_free();
439 
441  data_size = 0;
442  data_width = 0;
443  data_height = 0;
444  data_depth = 0;
445  host_pointer = 0;
446  assert(device_pointer == 0);
447  }
448 
449  /* Free device and host memory. */
450  void free()
451  {
452  device_free();
453  host_free();
454 
455  data_size = 0;
456  data_width = 0;
457  data_height = 0;
458  data_depth = 0;
459  host_pointer = 0;
460  modified = true;
461  need_realloc_ = true;
462  assert(device_pointer == 0);
463  }
464 
465  void free_if_need_realloc(bool force_free)
466  {
467  if (need_realloc_ || force_free) {
468  free();
469  }
470  }
471 
472  bool is_modified() const
473  {
474  return modified;
475  }
476 
478  {
479  return need_realloc_;
480  }
481 
483  {
484  modified = true;
485  }
486 
487  void tag_realloc()
488  {
489  need_realloc_ = true;
490  tag_modified();
491  }
492 
493  size_t size() const
494  {
495  return data_size;
496  }
497 
498  T *data()
499  {
500  return (T *)host_pointer;
501  }
502 
503  const T *data() const
504  {
505  return (T *)host_pointer;
506  }
507 
508  T &operator[](size_t i)
509  {
510  assert(i < data_size);
511  return data()[i];
512  }
513 
515  {
516  if (data_size != 0) {
517  device_copy_to();
518  }
519  }
520 
522  {
523  if (!modified) {
524  return;
525  }
526 
527  copy_to_device();
528  }
529 
531  {
532  modified = false;
533  need_realloc_ = false;
534  }
535 
537  {
538  device_copy_from(0, data_width, (data_height == 0) ? 1 : data_height, sizeof(T));
539  }
540 
541  void copy_from_device(size_t y, size_t w, size_t h)
542  {
543  device_copy_from(y, w, h, sizeof(T));
544  }
545 
547  {
548  device_zero();
549  }
550 
551  void move_device(Device *new_device)
552  {
554  device_free();
555  device = new_device;
556  copy_to_device();
557  }
558 
559  protected:
560  size_t size(size_t width, size_t height, size_t depth)
561  {
562  return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
563  }
564 };
565 
566 /* Device Sub Memory
567  *
568  * Pointer into existing memory. It is not allocated separately, but created
569  * from an already allocated base memory. It is freed automatically when it
570  * goes out of scope, which should happen before base memory is freed.
571  *
572  * NOTE: some devices require offset and size of the sub_ptr to be properly
573  * aligned to device->mem_address_alingment(). */
574 
576  public:
577  device_sub_ptr(device_memory &mem, size_t offset, size_t size);
578  ~device_sub_ptr();
579 
581  {
582  return ptr;
583  }
584 
585  protected:
586  /* No copying. */
588 
591 };
592 
593 /* Device Texture
594  *
595  * 2D or 3D image texture memory. */
596 
598  public:
600  const char *name,
601  const uint slot,
602  ImageDataType image_data_type,
603  InterpolationType interpolation,
604  ExtensionType extension);
605  ~device_texture();
606 
607  void *alloc(const size_t width, const size_t height, const size_t depth = 0);
608  void copy_to_device();
609 
612 
613  protected:
614  size_t size(const size_t width, const size_t height, const size_t depth)
615  {
616  return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
617  }
618 };
619 
621 
622 #endif /* __DEVICE_MEMORY_H__ */
typedef float(TangentPoint)[2]
unsigned char uchar
Definition: BLI_sys_types.h:70
unsigned int uint
Definition: BLI_sys_types.h:67
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei height
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei width
float float4[4]
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
void set_data(T *ptr_, size_t datasize)
friend class CUDADevice
bool is_resident(Device *sub_device) const
Definition: memory.cpp:125
device_memory & operator=(const device_memory &)=delete
size_t memory_elements_size(int elements)
device_memory(const device_memory &)=delete
friend class HIPDevice
void device_zero()
Definition: memory.cpp:93
void device_alloc()
Definition: memory.cpp:67
void device_copy_to()
Definition: memory.cpp:80
void device_free()
Definition: memory.cpp:73
device_memory(Device *device, const char *name, MemoryType type)
Definition: memory.cpp:11
device_ptr original_device_ptr
device_memory & operator=(device_memory &&)=delete
device_memory(device_memory &&other) noexcept=delete
bool device_is_cpu()
Definition: memory.cpp:100
device_ptr device_pointer
void restore_device()
Definition: memory.cpp:118
void * host_alloc(size_t size)
Definition: memory.cpp:40
void host_free()
Definition: memory.cpp:58
virtual ~device_memory()
Definition: memory.cpp:34
void device_copy_from(size_t y, size_t w, size_t h, size_t elem)
Definition: memory.cpp:87
friend class MetalDevice
void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr)
Definition: memory.cpp:105
friend class OptiXDevice
device_only_memory(device_only_memory &&other) noexcept
void alloc_to_device(size_t num, bool shrink_to_fit=true)
device_only_memory(Device *device, const char *name, bool allow_host_memory_fallback=false)
device_sub_ptr(device_memory &mem, size_t offset, size_t size)
Definition: memory.cpp:132
device_sub_ptr & operator=(const device_sub_ptr &)
device_ptr operator*() const
size_t size(const size_t width, const size_t height, const size_t depth)
void copy_to_device()
Definition: memory.cpp:230
void * alloc(const size_t width, const size_t height, const size_t depth=0)
Definition: memory.cpp:207
device_texture(Device *device, const char *name, const uint slot, ImageDataType image_data_type, InterpolationType interpolation, ExtensionType extension)
Definition: memory.cpp:144
T * alloc(size_t width, size_t height=0, size_t depth=0)
size_t size() const
device_vector(Device *device, const char *name, MemoryType type)
void copy_to_device_if_modified()
size_t size(size_t width, size_t height, size_t depth)
void move_device(Device *new_device)
T & operator[](size_t i)
const T * data() const
void give_data(array< T > &to)
T * resize(size_t width, size_t height=0, size_t depth=0)
bool is_modified() const
void steal_data(array< T > &from)
void free_if_need_realloc(bool force_free)
void copy_from_device(size_t y, size_t w, size_t h)
Definition: half.h:41
unsigned short half
Definition: cuda/compat.h:110
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
static constexpr size_t datatype_size(DataType datatype)
@ MEM_GLOBAL
@ MEM_TEXTURE
@ MEM_READ_WRITE
@ MEM_DEVICE_ONLY
@ MEM_READ_ONLY
@ TYPE_FLOAT
@ TYPE_INT
@ TYPE_UNKNOWN
@ TYPE_HALF
@ TYPE_UINT
@ TYPE_UINT16
@ TYPE_UINT64
@ TYPE_UCHAR
StackEntry * from
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
#define T
unsigned short uint16_t
Definition: stdint.h:79
unsigned __int64 uint64_t
Definition: stdint.h:90
static const size_t num_elements
static const DataType data_type
Definition: half.h:64
float max
ImageDataType
Definition: util/texture.h:29
InterpolationType
Definition: util/texture.h:19
ExtensionType
Definition: util/texture.h:61
uint64_t device_ptr
Definition: util/types.h:43