16 HIPDeviceQueue::HIPDeviceQueue(HIPDevice *device)
17 :
DeviceQueue(device), hip_device_(device), hip_stream_(nullptr)
19 const HIPContextScope scope(hip_device_);
20 hip_device_assert(hip_device_, hipStreamCreateWithFlags(&hip_stream_, hipStreamNonBlocking));
23 HIPDeviceQueue::~HIPDeviceQueue()
25 const HIPContextScope scope(hip_device_);
26 hipStreamDestroy(hip_stream_);
29 int HIPDeviceQueue::num_concurrent_states(
const size_t state_size)
const
31 const int max_num_threads = hip_device_->get_num_multiprocessors() *
32 hip_device_->get_max_num_threads_per_multiprocessor();
33 int num_states = ((max_num_threads == 0) ? 65536 : max_num_threads) * 16;
35 const char *factor_str = getenv(
"CYCLES_CONCURRENT_STATES_FACTOR");
37 const float factor = (
float)atof(factor_str);
52 int HIPDeviceQueue::num_concurrent_busy_states()
const
54 const int max_num_threads = hip_device_->get_num_multiprocessors() *
55 hip_device_->get_max_num_threads_per_multiprocessor();
57 if (max_num_threads == 0) {
61 return 4 * max_num_threads;
67 HIPContextScope scope(hip_device_);
68 hip_device_->load_texture_info();
69 hip_device_assert(hip_device_, hipDeviceSynchronize());
71 debug_init_execution();
78 if (hip_device_->have_error()) {
84 const HIPContextScope scope(hip_device_);
85 const HIPDeviceKernel &hip_kernel = hip_device_->kernels.get(
kernel);
88 const int num_threads_per_block = hip_kernel.num_threads_per_block;
91 int shared_mem_bytes = 0;
103 shared_mem_bytes = (num_threads_per_block + 1) *
sizeof(
int);
110 assert_success(hipModuleLaunchKernel(hip_kernel.function,
114 num_threads_per_block,
119 const_cast<void **
>(args.
values),
123 return !(hip_device_->have_error());
126 bool HIPDeviceQueue::synchronize()
128 if (hip_device_->have_error()) {
132 const HIPContextScope scope(hip_device_);
133 assert_success(hipStreamSynchronize(hip_stream_),
"synchronize");
136 return !(hip_device_->have_error());
149 hip_device_->mem_alloc(mem);
155 const HIPContextScope scope(hip_device_);
171 hip_device_->mem_alloc(mem);
178 const HIPContextScope scope(hip_device_);
197 const HIPContextScope scope(hip_device_);
204 void HIPDeviceQueue::assert_success(hipError_t
result,
const char *operation)
206 if (
result != hipSuccess) {
207 const char *name = hipewErrorString(
result);
208 hip_device_->set_error(
209 string_printf(
"%s in HIP queue %s (%s)", name, operation, debug_active_kernels().c_str()));
213 unique_ptr<DeviceGraphicsInterop> HIPDeviceQueue::graphics_interop_create()
215 return make_unique<HIPDeviceGraphicsInterop>(
this);
typedef float(TangentPoint)[2]
device_ptr device_pointer
#define CCL_NAMESPACE_END
SyclQueue void void size_t num_bytes SyclQueue void const char void *memory_device_pointer KernelContext int kernel
static struct ImBuf * init_execution(const SeqRenderData *context, ImBuf *ibuf1, ImBuf *ibuf2, ImBuf *ibuf3)
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY
#define VLOG_DEVICE_STATS
string string_human_readable_size(size_t size)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
ccl_device_inline size_t divide_up(size_t x, size_t y)