Blender  V3.3
mtl_memory.mm
Go to the documentation of this file.
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 
3 #include "BKE_global.h"
4 
5 #include "DNA_userdef_types.h"
6 
7 #include "mtl_context.hh"
8 #include "mtl_debug.hh"
9 #include "mtl_memory.hh"
10 
11 using namespace blender;
12 using namespace blender::gpu;
13 
14 namespace blender::gpu {
15 
16 /* -------------------------------------------------------------------- */
19 void MTLBufferPool::init(id<MTLDevice> mtl_device)
20 {
21  if (!ensure_initialised_) {
22  BLI_assert(mtl_device);
23  ensure_initialised_ = true;
24  device_ = mtl_device;
25 
26 #if MTL_DEBUG_MEMORY_STATISTICS == 1
27  /* Debug statistics. */
28  per_frame_allocation_count_ = 0;
29  allocations_in_pool_ = 0;
30  buffers_in_pool_ = 0;
31 #endif
32 
33  /* Free pools -- Create initial safe free pool */
34  BLI_assert(current_free_list_ == nullptr);
35  this->begin_new_safe_list();
36  }
37 }
38 
40 {
41  this->free();
42 }
43 
44 void MTLBufferPool::free()
45 {
46 
47  for (auto buffer : allocations_) {
49  delete buffer;
50  }
51  allocations_.clear();
52 
53  for (std::multiset<blender::gpu::MTLBufferHandle, blender::gpu::CompareMTLBuffer> *buffer_pool :
54  buffer_pools_.values()) {
55  delete buffer_pool;
56  }
57  buffer_pools_.clear();
58 }
59 
61 {
62  /* Allocate buffer with default HW-compatible alignment of 256 bytes.
63  * See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */
64  return this->allocate_aligned(size, 256, cpu_visible);
65 }
66 
68  bool cpu_visible,
69  const void *data)
70 {
71  /* Allocate buffer with default HW-compatible alignment of 256 bytes.
72  * See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */
73  return this->allocate_aligned_with_data(size, 256, cpu_visible, data);
74 }
75 
77 {
78  /* Check not required. Main GPU module usage considered thread-safe. */
79  // BLI_assert(BLI_thread_is_main());
80 
81  /* Calculate aligned size */
82  BLI_assert(alignment > 0);
83  uint64_t aligned_alloc_size = ceil_to_multiple_ul(size, alignment);
84 
85  /* Allocate new MTL Buffer */
86  MTLResourceOptions options;
87  if (cpu_visible) {
88  options = ([device_ hasUnifiedMemory]) ? MTLResourceStorageModeShared :
89  MTLResourceStorageModeManaged;
90  }
91  else {
92  options = MTLResourceStorageModePrivate;
93  }
94 
95  /* Check if we have a suitable buffer */
96  gpu::MTLBuffer *new_buffer = nullptr;
97  std::multiset<MTLBufferHandle, CompareMTLBuffer> **pool_search = buffer_pools_.lookup_ptr(
98  (uint64_t)options);
99 
100  if (pool_search != nullptr) {
101  std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool = *pool_search;
102  MTLBufferHandle size_compare(aligned_alloc_size);
103  auto result = pool->lower_bound(size_compare);
104  if (result != pool->end()) {
105  /* Potential buffer found, check if within size threshold requirements. */
106  gpu::MTLBuffer *found_buffer = result->buffer;
107  BLI_assert(found_buffer);
108  BLI_assert(found_buffer->get_metal_buffer());
109 
110  uint64_t found_size = found_buffer->get_size();
111 
112  if (found_size >= aligned_alloc_size &&
113  found_size <= (aligned_alloc_size * mtl_buffer_size_threshold_factor_)) {
114  MTL_LOG_INFO(
115  "[MemoryAllocator] Suitable Buffer of size %lld found, for requested size: %lld\n",
116  found_size,
117  aligned_alloc_size);
118 
119  new_buffer = found_buffer;
120  BLI_assert(!new_buffer->get_in_use());
121 
122  /* Remove buffer from free set. */
123  pool->erase(result);
124  }
125  else {
126  MTL_LOG_INFO(
127  "[MemoryAllocator] Buffer of size %lld found, but was incompatible with requested "
128  "size: "
129  "%lld\n",
130  found_size,
131  aligned_alloc_size);
132  new_buffer = nullptr;
133  }
134  }
135  }
136 
137  /* Allocate new buffer. */
138  if (new_buffer == nullptr) {
139  new_buffer = new gpu::MTLBuffer(device_, size, options, alignment);
140 
141  /* Track allocation in context. */
142  allocations_.append(new_buffer);
143  total_allocation_bytes_ += aligned_alloc_size;
144  }
145  else {
146  /* Re-use suitable buffer. */
147  new_buffer->set_usage_size(aligned_alloc_size);
148 
149 #if MTL_DEBUG_MEMORY_STATISTICS == 1
150  /* Debug. */
151  allocations_in_pool_ -= new_buffer->get_size();
152  buffers_in_pool_--;
153  BLI_assert(allocations_in_pool_ >= 0);
154 #endif
155 
156  /* Ensure buffer memory is correctly backed. */
157  BLI_assert(new_buffer->get_metal_buffer());
158  }
159  /* Flag buffer as actively in-use. */
160  new_buffer->flag_in_use(true);
161 
162 #if MTL_DEBUG_MEMORY_STATISTICS == 1
163  this->per_frame_allocation_count++;
164 #endif
165 
166  return new_buffer;
167 }
168 
170  uint alignment,
171  bool cpu_visible,
172  const void *data)
173 {
174  gpu::MTLBuffer *buf = this->allocate_aligned(size, 256, cpu_visible);
175 
176  /* Upload initial data. */
177  BLI_assert(data != nullptr);
178  BLI_assert(!(buf->get_resource_options() & MTLResourceStorageModePrivate));
179  BLI_assert(size <= buf->get_size());
180  BLI_assert(size <= [buf->get_metal_buffer() length]);
181  memcpy(buf->get_host_ptr(), data, size);
182  buf->flush_range(0, size);
183  return buf;
184 }
185 
187 {
188  /* Ensure buffer is flagged as in-use. I.e. has not already been returned to memory pools. */
189  bool buffer_in_use = buffer->get_in_use();
190  BLI_assert(buffer_in_use);
191  if (buffer_in_use) {
192 
193  /* Fetch active safe pool from atomic ptr. */
194  MTLSafeFreeList *current_pool = this->get_current_safe_list();
195 
196  /* Place buffer in safe_free_pool before returning to MemoryManager buffer pools. */
197  BLI_assert(current_pool);
198  current_pool->insert_buffer(buffer);
199  buffer->flag_in_use(false);
200 
201  return true;
202  }
203  return false;
204 }
205 
207 {
208  /* Ensure thread-safe access to `completed_safelist_queue_`, which contains
209  * the list of MTLSafeFreeList's whose buffers are ready to be
210  * re-inserted into the Memory Manager pools. */
211  safelist_lock_.lock();
212 
213 #if MTL_DEBUG_MEMORY_STATISTICS == 1
214  int num_buffers_added = 0;
215 #endif
216 
217  /* Always free oldest MTLSafeFreeList first. */
218  for (int safe_pool_free_index = 0; safe_pool_free_index < completed_safelist_queue_.size();
219  safe_pool_free_index++) {
220  MTLSafeFreeList *current_pool = completed_safelist_queue_[safe_pool_free_index];
221 
222  /* Iterate through all MTLSafeFreeList linked-chunks. */
223  while (current_pool != nullptr) {
224  current_pool->lock_.lock();
225  BLI_assert(current_pool);
226  BLI_assert(current_pool->in_free_queue_);
227  int counter = 0;
228  int size = min_ii(current_pool->current_list_index_, MTLSafeFreeList::MAX_NUM_BUFFERS_);
229 
230  /* Re-add all buffers within frame index to MemoryManager pools. */
231  while (counter < size) {
232 
233  gpu::MTLBuffer *buf = current_pool->safe_free_pool_[counter];
234 
235  /* Insert buffer back into open pools. */
236  BLI_assert(buf->get_in_use() == false);
237  this->insert_buffer_into_pool(buf->get_resource_options(), buf);
238  counter++;
239 
240 #if MTL_DEBUG_MEMORY_STATISTICS == 1
241  num_buffers_added++;
242 #endif
243  }
244 
245  /* Fetch next MTLSafeFreeList chunk, if any. */
246  MTLSafeFreeList *next_list = nullptr;
247  if (current_pool->has_next_pool_ > 0) {
248  next_list = current_pool->next_.load();
249  }
250 
251  /* Delete current MTLSafeFreeList */
252  current_pool->lock_.unlock();
253  delete current_pool;
254  current_pool = nullptr;
255 
256  /* Move onto next chunk. */
257  if (next_list != nullptr) {
258  current_pool = next_list;
259  }
260  }
261  }
262 
263 #if MTL_DEBUG_MEMORY_STATISTICS == 1
264  printf("--- Allocation Stats ---\n");
265  printf(" Num buffers processed in pool (this frame): %u\n", num_buffers_added);
266 
267  uint framealloc = (uint)this->per_frame_allocation_count;
268  printf(" Allocations in frame: %u\n", framealloc);
269  printf(" Total Buffers allocated: %u\n", (uint)allocations_.size());
270  printf(" Total Memory allocated: %u MB\n", (uint)total_allocation_bytes_ / (1024 * 1024));
271 
272  uint allocs = (uint)(allocations_in_pool_) / 1024 / 2024;
273  printf(" Free memory in pools: %u MB\n", allocs);
274 
275  uint buffs = (uint)buffers_in_pool_;
276  printf(" Buffers in pools: %u\n", buffs);
277 
278  printf(" Pools %u:\n", (uint)buffer_pools_.size());
279  auto key_iterator = buffer_pools_.keys().begin();
280  auto value_iterator = buffer_pools_.values().begin();
281  while (key_iterator != buffer_pools_.keys().end()) {
282  uint64_t mem_in_pool = 0;
283  uint64_t iters = 0;
284  for (auto it = (*value_iterator)->begin(); it != (*value_iterator)->end(); it++) {
285  mem_in_pool += it->buffer_size;
286  iters++;
287  }
288 
289  printf(" Buffers in pool (%u)(%llu): %u (%u MB)\n",
290  (uint)*key_iterator,
291  iters,
292  (uint)((*value_iterator)->size()),
293  (uint)mem_in_pool / 1024 / 1024);
294  ++key_iterator;
295  ++value_iterator;
296  }
297 
298  this->per_frame_allocation_count = 0;
299 #endif
300 
301  /* Clear safe pools list */
302  completed_safelist_queue_.clear();
303  safelist_lock_.unlock();
304 }
305 
307 {
308  /* When an MTLSafeFreeList has been released by the GPU, and buffers are ready to
309  * be re-inserted into the MemoryManager pools for future use, add the MTLSafeFreeList
310  * to the `completed_safelist_queue_` for flushing at a controlled point in time. */
311  safe_list->lock_.lock();
312  BLI_assert(safe_list);
313  BLI_assert(safe_list->reference_count_ == 0 &&
314  "Pool must be fully dereferenced by all in-use cmd buffers before returning.\n");
315  BLI_assert(safe_list->in_free_queue_ == false && "Pool must not already be in queue");
316 
317  /* Flag MTLSafeFreeList as having been added, and insert into SafeFreePool queue. */
318  safe_list->flag_in_queue();
319  safelist_lock_.lock();
320  completed_safelist_queue_.append(safe_list);
321  safelist_lock_.unlock();
322  safe_list->lock_.unlock();
323 }
324 
326 {
327  /* Thread-safe access via atomic ptr. */
328  return current_free_list_;
329 }
330 
332 {
333  safelist_lock_.lock();
334  current_free_list_ = new MTLSafeFreeList();
335  safelist_lock_.unlock();
336 }
337 
338 void MTLBufferPool::ensure_buffer_pool(MTLResourceOptions options)
339 {
340  std::multiset<MTLBufferHandle, CompareMTLBuffer> **pool_search = buffer_pools_.lookup_ptr(
341  (uint64_t)options);
342  if (pool_search == nullptr) {
343  std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool =
344  new std::multiset<MTLBufferHandle, CompareMTLBuffer>();
345  buffer_pools_.add_new((uint64_t)options, pool);
346  }
347 }
348 
349 void MTLBufferPool::insert_buffer_into_pool(MTLResourceOptions options, gpu::MTLBuffer *buffer)
350 {
351  /* Ensure `safelist_lock_` is locked in calling code before modifying. */
353 
354  /* Reset usage size to actual size of allocation. */
355  buffer->set_usage_size(buffer->get_size());
356 
357  /* Ensure pool exists. */
358  this->ensure_buffer_pool(options);
359 
360  /* TODO(Metal): Support purgeability - Allow buffer in pool to have its memory taken back by the
361  * OS if needed. As we keep allocations around, they may not actually be in use, but we can
362  * ensure they do not block other apps from using memory. Upon a buffer being needed again, we
363  * can reset this state.
364  * TODO(Metal): Purgeability state does not update instantly, so this requires a deferral. */
365  BLI_assert(buffer->get_metal_buffer());
366  /* buffer->metal_buffer); [buffer->metal_buffer setPurgeableState:MTLPurgeableStateVolatile]; */
367 
368  std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool = buffer_pools_.lookup(options);
369  pool->insert(MTLBufferHandle(buffer));
370 
371 #if MTL_DEBUG_MEMORY_STATISTICS == 1
372  /* Debug statistics. */
373  allocations_in_pool_ += buffer->get_size();
374  buffers_in_pool_++;
375 #endif
376 }
377 
379 {
380  reference_count_ = 1;
381  in_free_queue_ = false;
382  current_list_index_ = 0;
383  next_ = nullptr;
384  has_next_pool_ = 0;
385 }
386 
388 {
389  BLI_assert(in_free_queue_ == false);
390 
391  /* Lockless list insert. */
392  uint insert_index = current_list_index_++;
393 
394  /* If the current MTLSafeFreeList size is exceeded, we ripple down the linked-list chain and
395  * insert the buffer into the next available chunk. */
396  if (insert_index >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {
397 
398  /* Check if first caller to generate next pool. */
399  int has_next = has_next_pool_++;
400  if (has_next == 0) {
401  next_ = new MTLSafeFreeList();
402  }
403  MTLSafeFreeList *next_list = next_.load();
404  BLI_assert(next_list);
405  next_list->insert_buffer(buffer);
406 
407  /* Clamp index to chunk limit if overflowing. */
408  current_list_index_ = MTLSafeFreeList::MAX_NUM_BUFFERS_;
409  return;
410  }
411 
412  safe_free_pool_[insert_index] = buffer;
413 }
414 
415 /* Increments from active GPUContext thread. */
417 {
418  lock_.lock();
419  BLI_assert(in_free_queue_ == false);
420  reference_count_++;
421  lock_.unlock();
422 }
423 
424 /* Reference decrements and addition to completed list queue can occur from MTLCommandBuffer
425  * completion callback thread. */
427 {
428  lock_.lock();
429  BLI_assert(in_free_queue_ == false);
430  int ref_count = --reference_count_;
431 
432  if (ref_count == 0) {
434  }
435  lock_.unlock();
436 }
437 
440 /* -------------------------------------------------------------------- */
444 /* Construct a gpu::MTLBuffer wrapper around a newly created metal::MTLBuffer. */
445 MTLBuffer::MTLBuffer(id<MTLDevice> mtl_device,
446  uint64_t size,
447  MTLResourceOptions options,
448  uint alignment)
449 {
450  /* Calculate aligned allocation size. */
451  BLI_assert(alignment > 0);
452  uint64_t aligned_alloc_size = ceil_to_multiple_ul(size, alignment);
453 
454  alignment_ = alignment;
455  device_ = mtl_device;
456  is_external_ = false;
457 
458  options_ = options;
459  this->flag_in_use(false);
460 
461  metal_buffer_ = [device_ newBufferWithLength:aligned_alloc_size options:options];
462  BLI_assert(metal_buffer_);
463  [metal_buffer_ retain];
464 
465  size_ = aligned_alloc_size;
466  this->set_usage_size(size_);
467  if (!(options_ & MTLResourceStorageModePrivate)) {
468  data_ = [metal_buffer_ contents];
469  }
470  else {
471  data_ = nullptr;
472  }
473 }
474 
475 MTLBuffer::MTLBuffer(id<MTLBuffer> external_buffer)
476 {
477  BLI_assert(external_buffer != nil);
478 
479  /* Ensure external_buffer remains referenced while in-use. */
480  metal_buffer_ = external_buffer;
481  [metal_buffer_ retain];
482 
483  /* Extract properties. */
484  is_external_ = true;
485  device_ = nil;
486  alignment_ = 1;
487  options_ = [metal_buffer_ resourceOptions];
488  size_ = [metal_buffer_ allocatedSize];
489  this->set_usage_size(size_);
490  data_ = [metal_buffer_ contents];
491  in_use_ = true;
492 }
493 
495 {
496  if (metal_buffer_ != nil) {
497  [metal_buffer_ release];
498  metal_buffer_ = nil;
499  }
500 }
501 
503 {
504  if (!is_external_) {
506  }
507  else {
508  if (metal_buffer_ != nil) {
509  [metal_buffer_ release];
510  metal_buffer_ = nil;
511  }
512  }
513 }
514 
515 id<MTLBuffer> gpu::MTLBuffer::get_metal_buffer() const
516 {
517  return metal_buffer_;
518 }
519 
521 {
522  BLI_assert(!(options_ & MTLResourceStorageModePrivate));
523  BLI_assert(data_);
524  return data_;
525 }
526 
528 {
529  return size_;
530 }
531 
533 {
534  return usage_size_;
535 }
536 
538 {
539  /* We do not need to flush shared memory, as addressable buffer is shared. */
540  return options_ & MTLResourceStorageModeManaged;
541 }
542 
544 {
545  metal_buffer_.label = str;
546 }
547 
549 {
550  /* Debug: If buffer is not flagged as in-use, this is a problem. */
551  BLI_assert(in_use_ &&
552  "Buffer should be marked as 'in-use' if being actively used by an instance. Buffer "
553  "has likely already been freed.");
554 }
555 
557 {
558  this->debug_ensure_used();
559  if (this->requires_flush()) {
560  [metal_buffer_ didModifyRange:NSMakeRange(0, size_)];
561  }
562 }
563 
565 {
566  this->debug_ensure_used();
567  if (this->requires_flush()) {
568  BLI_assert((offset + length) <= size_);
569  [metal_buffer_ didModifyRange:NSMakeRange(offset, length)];
570  }
571 }
572 
574 {
575  in_use_ = used;
576 }
577 
579 {
580  return in_use_;
581 }
582 
584 {
585  BLI_assert(size_used > 0 && size_used <= size_);
586  usage_size_ = size_used;
587 }
588 
590 {
591  return options_;
592 }
593 
595 {
596  return alignment_;
597 }
598 
600 {
601  /* We do not need to flush shared memory. */
602  return this->options & MTLResourceStorageModeManaged;
603 }
604 
606 {
607  if (this->requires_flush()) {
608  BLI_assert(this->metal_buffer);
609  BLI_assert((this->buffer_offset + this->size) <= [this->metal_buffer length]);
610  BLI_assert(this->buffer_offset >= 0);
611  [this->metal_buffer
612  didModifyRange:NSMakeRange(this->buffer_offset, this->size - this->buffer_offset)];
613  }
614 }
615 
618 /* -------------------------------------------------------------------- */
623 {
624  this->free();
625 }
626 
628 {
629 
630  if (!this->initialised_) {
631  BLI_assert(context_.device);
632 
633  /* Initialize Scratch buffers. */
634  for (int sb = 0; sb < mtl_max_scratch_buffers_; sb++) {
635  scratch_buffers_[sb] = new MTLCircularBuffer(
636  context_, mtl_scratch_buffer_initial_size_, true);
637  BLI_assert(scratch_buffers_[sb]);
638  BLI_assert(&(scratch_buffers_[sb]->own_context_) == &context_);
639  }
640  current_scratch_buffer_ = 0;
641  initialised_ = true;
642  }
643 }
644 
646 {
647  initialised_ = false;
648 
649  /* Release Scratch buffers */
650  for (int sb = 0; sb < mtl_max_scratch_buffers_; sb++) {
651  delete scratch_buffers_[sb];
652  scratch_buffers_[sb] = nullptr;
653  }
654  current_scratch_buffer_ = 0;
655 }
656 
658 {
659  return this->scratch_buffer_allocate_range_aligned(alloc_size, 1);
660 }
661 
663  uint64_t alloc_size, uint alignment)
664 {
665  /* Ensure scratch buffer allocation alignment adheres to offset alignment requirements. */
666  alignment = max_uu(alignment, 256);
667 
668  BLI_assert(current_scratch_buffer_ >= 0 && "Scratch Buffer index not set");
669  MTLCircularBuffer *current_scratch_buff = this->scratch_buffers_[current_scratch_buffer_];
670  BLI_assert(current_scratch_buff != nullptr && "Scratch Buffer does not exist");
671  MTLTemporaryBuffer allocated_range = current_scratch_buff->allocate_range_aligned(alloc_size,
672  alignment);
673  BLI_assert(allocated_range.size >= alloc_size && allocated_range.size <= alloc_size + alignment);
674  BLI_assert(allocated_range.metal_buffer != nil);
675  return allocated_range;
676 }
677 
679 {
680  /* Fetch active scratch buffer. */
681  MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
682  BLI_assert(&active_scratch_buf->own_context_ == &context_);
683 
684  /* Ensure existing scratch buffer is no longer in use. MTL_MAX_SCRATCH_BUFFERS specifies
685  * the number of allocated scratch buffers. This value should be equal to the number of
686  * simultaneous frames in-flight. I.e. the maximal number of scratch buffers which are
687  * simultaneously in-use. */
688  if (active_scratch_buf->used_frame_index_ < context_.get_current_frame_index()) {
689  current_scratch_buffer_ = (current_scratch_buffer_ + 1) % mtl_max_scratch_buffers_;
690  active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
691  active_scratch_buf->reset();
692  BLI_assert(&active_scratch_buf->own_context_ == &context_);
693  MTL_LOG_INFO("Scratch buffer %d reset - (ctx %p)(Frame index: %d)\n",
694  current_scratch_buffer_,
695  &context_,
696  context_.get_current_frame_index());
697  }
698 }
699 
701 {
702  /* Fetch active scratch buffer and verify context. */
703  MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
704  BLI_assert(&active_scratch_buf->own_context_ == &context_);
705  active_scratch_buf->flush();
706 }
707 
708 /* MTLCircularBuffer implementation. */
709 MTLCircularBuffer::MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow)
710  : own_context_(ctx)
711 {
712  BLI_assert(this);
713  MTLResourceOptions options = ([own_context_.device hasUnifiedMemory]) ?
714  MTLResourceStorageModeShared :
715  MTLResourceStorageModeManaged;
716  cbuffer_ = new gpu::MTLBuffer(own_context_.device, initial_size, options, 256);
717  current_offset_ = 0;
718  can_resize_ = allow_grow;
719  cbuffer_->flag_in_use(true);
720 
721  used_frame_index_ = ctx.get_current_frame_index();
722  last_flush_base_offset_ = 0;
723 
724  /* Debug label. */
725  if (G.debug & G_DEBUG_GPU) {
726  cbuffer_->set_label(@"Circular Scratch Buffer");
727  }
728 }
729 
731 {
732  delete cbuffer_;
733 }
734 
736 {
737  return this->allocate_range_aligned(alloc_size, 1);
738 }
739 
741 {
742  BLI_assert(this);
743 
744  /* Ensure alignment of an allocation is aligned to compatible offset boundaries. */
745  BLI_assert(alignment > 0);
746  alignment = max_ulul(alignment, 256);
747 
748  /* Align current offset and allocation size to desired alignment */
749  uint64_t aligned_current_offset = ceil_to_multiple_ul(current_offset_, alignment);
750  uint64_t aligned_alloc_size = ceil_to_multiple_ul(alloc_size, alignment);
751  bool can_allocate = (aligned_current_offset + aligned_alloc_size) < cbuffer_->get_size();
752 
753  BLI_assert(aligned_current_offset >= current_offset_);
754  BLI_assert(aligned_alloc_size >= alloc_size);
755 
756  BLI_assert(aligned_current_offset % alignment == 0);
757  BLI_assert(aligned_alloc_size % alignment == 0);
758 
759  /* Recreate Buffer */
760  if (!can_allocate) {
761  uint64_t new_size = cbuffer_->get_size();
762  if (can_resize_) {
763  /* Resize to the maximum of basic resize heuristic OR the size of the current offset +
764  * requested allocation -- we want the buffer to grow to a large enough size such that it
765  * does not need to resize mid-frame. */
766  new_size = max_ulul(
768  aligned_current_offset + aligned_alloc_size);
769 
770 #if MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION == 1
771  /* IF a requested allocation EXCEEDS the maximum supported size, temporarily allocate up to
772  * this, but shrink down ASAP. */
774 
775  /* If new requested allocation is bigger than maximum allowed size, temporarily resize to
776  * maximum allocation size -- Otherwise, clamp the buffer size back down to the defined
777  * maximum */
779  new_size = aligned_alloc_size;
780  MTL_LOG_INFO("Temporarily growing Scratch buffer to %d MB\n",
781  (int)new_size / 1024 / 1024);
782  }
783  else {
785  MTL_LOG_INFO("Shrinking Scratch buffer back to %d MB\n", (int)new_size / 1024 / 1024);
786  }
787  }
788  BLI_assert(aligned_alloc_size <= new_size);
789 #else
791 
792  if (aligned_alloc_size > new_size) {
793  BLI_assert(false);
794 
795  /* Cannot allocate */
796  MTLTemporaryBuffer alloc_range;
797  alloc_range.metal_buffer = nil;
798  alloc_range.data = nullptr;
799  alloc_range.buffer_offset = 0;
800  alloc_range.size = 0;
801  alloc_range.options = cbuffer_->options;
802  }
803 #endif
804  }
805  else {
807  "Performance Warning: Reached the end of circular buffer of size: %llu, but cannot "
808  "resize. Starting new buffer\n",
809  cbuffer_->get_size());
810  BLI_assert(aligned_alloc_size <= new_size);
811 
812  /* Cannot allocate. */
813  MTLTemporaryBuffer alloc_range;
814  alloc_range.metal_buffer = nil;
815  alloc_range.data = nullptr;
816  alloc_range.buffer_offset = 0;
817  alloc_range.size = 0;
818  alloc_range.options = cbuffer_->get_resource_options();
819  }
820 
821  /* Flush current buffer to ensure changes are visible on the GPU. */
822  this->flush();
823 
824  /* Discard old buffer and create a new one - Relying on Metal reference counting to track
825  * in-use buffers */
826  MTLResourceOptions prev_options = cbuffer_->get_resource_options();
827  uint prev_alignment = cbuffer_->get_alignment();
828  delete cbuffer_;
829  cbuffer_ = new gpu::MTLBuffer(own_context_.device, new_size, prev_options, prev_alignment);
830  cbuffer_->flag_in_use(true);
831  current_offset_ = 0;
832  last_flush_base_offset_ = 0;
833 
834  /* Debug label. */
835  if (G.debug & G_DEBUG_GPU) {
836  cbuffer_->set_label(@"Circular Scratch Buffer");
837  }
838  MTL_LOG_INFO("Resized Metal circular buffer to %llu bytes\n", new_size);
839 
840  /* Reset allocation Status. */
841  aligned_current_offset = 0;
842  BLI_assert((aligned_current_offset + aligned_alloc_size) <= cbuffer_->get_size());
843  }
844 
845  /* Allocate chunk. */
846  MTLTemporaryBuffer alloc_range;
847  alloc_range.metal_buffer = cbuffer_->get_metal_buffer();
848  alloc_range.data = (void *)((uint8_t *)([alloc_range.metal_buffer contents]) +
849  aligned_current_offset);
850  alloc_range.buffer_offset = aligned_current_offset;
851  alloc_range.size = aligned_alloc_size;
852  alloc_range.options = cbuffer_->get_resource_options();
853  BLI_assert(alloc_range.data);
854 
855  /* Shift offset to match alignment. */
856  current_offset_ = aligned_current_offset + aligned_alloc_size;
857  BLI_assert(current_offset_ <= cbuffer_->get_size());
858  return alloc_range;
859 }
860 
862 {
863  BLI_assert(this);
864 
865  uint64_t len = current_offset_ - last_flush_base_offset_;
866  if (len > 0) {
867  cbuffer_->flush_range(last_flush_base_offset_, len);
868  last_flush_base_offset_ = current_offset_;
869  }
870 }
871 
873 {
874  BLI_assert(this);
875 
876  /* If circular buffer has data written to it, offset will be greater than zero. */
877  if (current_offset_ > 0) {
878 
879  /* Ensure the circular buffer is no longer being used by an in-flight frame. */
880  BLI_assert((own_context_.get_current_frame_index() >=
881  (used_frame_index_ + MTL_NUM_SAFE_FRAMES - 1)) &&
882  "Trying to reset Circular scratch buffer's while its data is still being used by "
883  "an in-flight frame");
884 
885  current_offset_ = 0;
886  last_flush_base_offset_ = 0;
887  }
888 
889  /* Update used frame index to current. */
890  used_frame_index_ = own_context_.get_current_frame_index();
891 }
892 
895 } // blender::gpu
@ G_DEBUG_GPU
Definition: BKE_global.h:193
#define BLI_assert(a)
Definition: BLI_assert.h:46
MINLINE int min_ii(int a, int b)
MINLINE uint max_uu(uint a, uint b)
MINLINE uint64_t ceil_to_multiple_ul(uint64_t a, uint64_t b)
unsigned int uint
Definition: BLI_sys_types.h:67
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
void clear()
Definition: BLI_map.hh:963
KeyIterator keys() const
Definition: BLI_map.hh:831
ValueIterator values() const
Definition: BLI_map.hh:840
const Value & lookup(const Key &key) const
Definition: BLI_map.hh:485
void add_new(const Key &key, const Value &value)
Definition: BLI_map.hh:220
int64_t size() const
Definition: BLI_map.hh:901
const Value * lookup_ptr(const Key &key) const
Definition: BLI_map.hh:463
void push_completed_safe_list(MTLSafeFreeList *list)
Definition: mtl_memory.mm:306
void init(id< MTLDevice > device)
Definition: mtl_memory.mm:19
gpu::MTLBuffer * allocate_with_data(uint64_t size, bool cpu_visible, const void *data=nullptr)
Definition: mtl_memory.mm:67
MTLSafeFreeList * get_current_safe_list()
Definition: mtl_memory.mm:325
gpu::MTLBuffer * allocate(uint64_t size, bool cpu_visible)
Definition: mtl_memory.mm:60
gpu::MTLBuffer * allocate_aligned(uint64_t size, uint alignment, bool cpu_visible)
Definition: mtl_memory.mm:76
gpu::MTLBuffer * allocate_aligned_with_data(uint64_t size, uint alignment, bool cpu_visible, const void *data=nullptr)
Definition: mtl_memory.mm:169
bool free_buffer(gpu::MTLBuffer *buffer)
Definition: mtl_memory.mm:186
void flag_in_use(bool used)
Definition: mtl_memory.mm:573
id< MTLBuffer > get_metal_buffer() const
Definition: mtl_memory.mm:515
void flush_range(uint64_t offset, uint64_t length)
Definition: mtl_memory.mm:564
void set_usage_size(uint64_t size_used)
Definition: mtl_memory.mm:583
MTLResourceOptions get_resource_options()
Definition: mtl_memory.mm:589
void * get_host_ptr() const
Definition: mtl_memory.mm:520
MTLBuffer(id< MTLDevice > device, uint64_t size, MTLResourceOptions options, uint alignment=1)
Definition: mtl_memory.mm:445
uint64_t get_size_used() const
Definition: mtl_memory.mm:532
uint64_t get_size() const
Definition: mtl_memory.mm:527
void set_label(NSString *str)
Definition: mtl_memory.mm:543
MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow)
Definition: mtl_memory.mm:709
MTLTemporaryBuffer allocate_range_aligned(uint64_t alloc_size, uint alignment)
Definition: mtl_memory.mm:740
MTLTemporaryBuffer allocate_range(uint64_t alloc_size)
Definition: mtl_memory.mm:735
id< MTLDevice > device
Definition: mtl_context.hh:604
static MTLBufferPool & get_global_memory_manager()
Definition: mtl_context.hh:713
void insert_buffer(gpu::MTLBuffer *buffer)
Definition: mtl_memory.mm:387
static constexpr uint mtl_scratch_buffer_max_size_
Definition: mtl_memory.hh:441
static constexpr uint mtl_scratch_buffer_initial_size_
Definition: mtl_memory.hh:444
MTLTemporaryBuffer scratch_buffer_allocate_range_aligned(uint64_t alloc_size, uint alignment)
Definition: mtl_memory.mm:662
MTLTemporaryBuffer scratch_buffer_allocate_range(uint64_t alloc_size)
Definition: mtl_memory.mm:657
CCL_NAMESPACE_BEGIN struct Options options
int len
Definition: draw_manager.c:108
T * data_
Definition: eval_output.h:163
#define str(s)
ccl_global float * buffer
ccl_gpu_kernel_postfix ccl_global int * counter
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
MINLINE unsigned long long min_ulul(unsigned long long a, unsigned long long b)
MINLINE unsigned long long max_ulul(unsigned long long a, unsigned long long b)
#define G(x, y, z)
#define MTL_NUM_SAFE_FRAMES
Definition: mtl_common.hh:14
#define MTL_LOG_INFO(info,...)
Definition: mtl_debug.hh:47
#define MTL_LOG_WARNING(info,...)
Definition: mtl_debug.hh:36
T length(const vec_base< T, Size > &a)
unsigned char uint8_t
Definition: stdint.h:78
unsigned __int64 uint64_t
Definition: stdint.h:90
MTLResourceOptions options
Definition: mtl_memory.hh:187
id< MTLBuffer > metal_buffer
Definition: mtl_memory.hh:183