16 CUDADeviceQueue::CUDADeviceQueue(CUDADevice *device)
17 :
DeviceQueue(device), cuda_device_(device), cuda_stream_(nullptr)
19 const CUDAContextScope scope(cuda_device_);
20 cuda_device_assert(cuda_device_, cuStreamCreate(&cuda_stream_, CU_STREAM_NON_BLOCKING));
23 CUDADeviceQueue::~CUDADeviceQueue()
25 const CUDAContextScope scope(cuda_device_);
26 cuStreamDestroy(cuda_stream_);
29 int CUDADeviceQueue::num_concurrent_states(
const size_t state_size)
const
31 const int max_num_threads = cuda_device_->get_num_multiprocessors() *
32 cuda_device_->get_max_num_threads_per_multiprocessor();
35 const char *factor_str = getenv(
"CYCLES_CONCURRENT_STATES_FACTOR");
37 const float factor = (
float)atof(factor_str);
52 int CUDADeviceQueue::num_concurrent_busy_states()
const
54 const int max_num_threads = cuda_device_->get_num_multiprocessors() *
55 cuda_device_->get_max_num_threads_per_multiprocessor();
57 if (max_num_threads == 0) {
61 return 4 * max_num_threads;
67 CUDAContextScope scope(cuda_device_);
68 cuda_device_->load_texture_info();
69 cuda_device_assert(cuda_device_, cuCtxSynchronize());
71 debug_init_execution();
78 if (cuda_device_->have_error()) {
84 const CUDAContextScope scope(cuda_device_);
85 const CUDADeviceKernel &cuda_kernel = cuda_device_->kernels.get(
kernel);
88 const int num_threads_per_block = cuda_kernel.num_threads_per_block;
91 int shared_mem_bytes = 0;
103 shared_mem_bytes = (num_threads_per_block + 1) *
sizeof(
int);
111 assert_success(cuLaunchKernel(cuda_kernel.function,
115 num_threads_per_block,
120 const_cast<void **
>(args.
values),
124 return !(cuda_device_->have_error());
127 bool CUDADeviceQueue::synchronize()
129 if (cuda_device_->have_error()) {
133 const CUDAContextScope scope(cuda_device_);
134 assert_success(cuStreamSynchronize(cuda_stream_),
"synchronize");
138 return !(cuda_device_->have_error());
151 cuda_device_->mem_alloc(mem);
157 const CUDAContextScope scope(cuda_device_);
173 cuda_device_->mem_alloc(mem);
180 const CUDAContextScope scope(cuda_device_);
199 const CUDAContextScope scope(cuda_device_);
206 void CUDADeviceQueue::assert_success(CUresult
result,
const char *operation)
208 if (
result != CUDA_SUCCESS) {
209 const char *name = cuewErrorString(
result);
211 "%s in CUDA queue %s (%s)", name, operation, debug_active_kernels().c_str()));
215 unique_ptr<DeviceGraphicsInterop> CUDADeviceQueue::graphics_interop_create()
217 return make_unique<CUDADeviceGraphicsInterop>(
this);
typedef float(TangentPoint)[2]
device_ptr device_pointer
#define CCL_NAMESPACE_END
SyclQueue void void size_t num_bytes SyclQueue void const char void *memory_device_pointer KernelContext int kernel
static struct ImBuf * init_execution(const SeqRenderData *context, ImBuf *ibuf1, ImBuf *ibuf2, ImBuf *ibuf3)
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY
#define VLOG_DEVICE_STATS
string string_human_readable_size(size_t size)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
ccl_device_inline size_t divide_up(size_t x, size_t y)