20 #if !defined(WITH_CUDA)
22 #elif defined(WITH_CUDA_DYNLOAD)
24 static bool result =
false;
30 int cuew_result = cuewInit(CUEW_INIT_CUDA);
31 if (cuew_result == CUEW_SUCCESS) {
32 VLOG_INFO <<
"CUEW initialization succeeded";
33 if (CUDADevice::have_precompiled_kernels()) {
37 else if (cuewCompilerPath() !=
NULL) {
38 VLOG_INFO <<
"Found CUDA compiler " << cuewCompilerPath();
42 VLOG_INFO <<
"Neither precompiled kernels nor CUDA compiler was found,"
43 <<
" unable to use CUDA";
48 << ((cuew_result == CUEW_ERROR_ATEXIT_FAILED) ?
49 "Error setting up atexit() handler" :
50 "Error opening the library");
62 return new CUDADevice(info, stats, profiler);
68 LOG(FATAL) <<
"Request to create CUDA device without compiled-in support. Should never happen.";
75 static CUresult device_cuda_safe_init()
81 __except (EXCEPTION_EXECUTE_HANDLER) {
84 fprintf(stderr,
"Cycles CUDA: driver crashed, continuing without CUDA.\n");
87 return CUDA_ERROR_NO_DEVICE;
97 CUresult
result = device_cuda_safe_init();
98 if (
result != CUDA_SUCCESS) {
99 if (
result != CUDA_ERROR_NO_DEVICE)
100 fprintf(stderr,
"CUDA cuInit: %s\n", cuewErrorString(
result));
106 if (
result != CUDA_SUCCESS) {
107 fprintf(stderr,
"CUDA cuDeviceGetCount: %s\n", cuewErrorString(
result));
113 for (
int num = 0; num <
count; num++) {
116 result = cuDeviceGetName(name, 256, num);
117 if (
result != CUDA_SUCCESS) {
118 fprintf(stderr,
"CUDA cuDeviceGetName: %s\n", cuewErrorString(
result));
123 cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num);
125 VLOG_INFO <<
"Ignoring device \"" << name
126 <<
"\", this graphics card is no longer supported.";
143 if (num != peer_num) {
145 cuDeviceCanAccessPeer(&can_access, num, peer_num);
150 int pci_location[3] = {0, 0, 0};
151 cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
152 cuDeviceGetAttribute(&pci_location[1], CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, num);
153 cuDeviceGetAttribute(&pci_location[2], CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, num);
156 (
unsigned int)pci_location[0],
157 (
unsigned int)pci_location[1],
158 (
unsigned int)pci_location[2]);
163 int timeout_attr = 0, preempt_attr = 0;
164 cuDeviceGetAttribute(&timeout_attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num);
165 cuDeviceGetAttribute(&preempt_attr, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, num);
171 VLOG_INFO <<
"Assuming device has compute preemption on Windows 10.";
175 if (timeout_attr && !preempt_attr) {
176 VLOG_INFO <<
"Device is recognized as display.";
179 display_devices.push_back(info);
182 VLOG_INFO <<
"Device has compute preemption or is not used for display.";
185 VLOG_INFO <<
"Added device \"" << name <<
"\" with id \"" << info.
id <<
"\".";
188 if (!display_devices.empty())
189 devices.insert(
devices.end(), display_devices.begin(), display_devices.end());
198 CUresult
result = device_cuda_safe_init();
199 if (
result != CUDA_SUCCESS) {
200 if (
result != CUDA_ERROR_NO_DEVICE) {
201 return string(
"Error initializing CUDA: ") + cuewErrorString(
result);
203 return "No CUDA device found\n";
208 if (
result != CUDA_SUCCESS) {
209 return string(
"Error getting devices: ") + cuewErrorString(
result);
212 string capabilities =
"";
213 for (
int num = 0; num <
count; num++) {
215 if (cuDeviceGetName(name, 256, num) != CUDA_SUCCESS) {
218 capabilities += string(
"\t") + name +
"\n";
220 # define GET_ATTR(attr) \
222 if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \
223 capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \
230 GET_ATTR(MAX_THREADS_PER_BLOCK);
231 GET_ATTR(MAX_BLOCK_DIM_X);
232 GET_ATTR(MAX_BLOCK_DIM_Y);
233 GET_ATTR(MAX_BLOCK_DIM_Z);
234 GET_ATTR(MAX_GRID_DIM_X);
235 GET_ATTR(MAX_GRID_DIM_Y);
236 GET_ATTR(MAX_GRID_DIM_Z);
237 GET_ATTR(MAX_SHARED_MEMORY_PER_BLOCK);
238 GET_ATTR(SHARED_MEMORY_PER_BLOCK);
239 GET_ATTR(TOTAL_CONSTANT_MEMORY);
242 GET_ATTR(MAX_REGISTERS_PER_BLOCK);
243 GET_ATTR(REGISTERS_PER_BLOCK);
244 GET_ATTR(CLOCK_RATE);
245 GET_ATTR(TEXTURE_ALIGNMENT);
246 GET_ATTR(GPU_OVERLAP);
247 GET_ATTR(MULTIPROCESSOR_COUNT);
248 GET_ATTR(KERNEL_EXEC_TIMEOUT);
249 GET_ATTR(INTEGRATED);
250 GET_ATTR(CAN_MAP_HOST_MEMORY);
251 GET_ATTR(COMPUTE_MODE);
252 GET_ATTR(MAXIMUM_TEXTURE1D_WIDTH);
253 GET_ATTR(MAXIMUM_TEXTURE2D_WIDTH);
254 GET_ATTR(MAXIMUM_TEXTURE2D_HEIGHT);
255 GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH);
256 GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT);
257 GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH);
258 GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_WIDTH);
259 GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_HEIGHT);
260 GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_LAYERS);
261 GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_WIDTH);
262 GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_HEIGHT);
263 GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES);
264 GET_ATTR(SURFACE_ALIGNMENT);
265 GET_ATTR(CONCURRENT_KERNELS);
266 GET_ATTR(ECC_ENABLED);
267 GET_ATTR(TCC_DRIVER);
268 GET_ATTR(MEMORY_CLOCK_RATE);
269 GET_ATTR(GLOBAL_MEMORY_BUS_WIDTH);
270 GET_ATTR(L2_CACHE_SIZE);
271 GET_ATTR(MAX_THREADS_PER_MULTIPROCESSOR);
272 GET_ATTR(ASYNC_ENGINE_COUNT);
273 GET_ATTR(UNIFIED_ADDRESSING);
274 GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_WIDTH);
275 GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_LAYERS);
276 GET_ATTR(CAN_TEX2D_GATHER);
277 GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_WIDTH);
278 GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_HEIGHT);
279 GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE);
280 GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE);
281 GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE);
282 GET_ATTR(TEXTURE_PITCH_ALIGNMENT);
283 GET_ATTR(MAXIMUM_TEXTURECUBEMAP_WIDTH);
284 GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH);
285 GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS);
286 GET_ATTR(MAXIMUM_SURFACE1D_WIDTH);
287 GET_ATTR(MAXIMUM_SURFACE2D_WIDTH);
288 GET_ATTR(MAXIMUM_SURFACE2D_HEIGHT);
289 GET_ATTR(MAXIMUM_SURFACE3D_WIDTH);
290 GET_ATTR(MAXIMUM_SURFACE3D_HEIGHT);
291 GET_ATTR(MAXIMUM_SURFACE3D_DEPTH);
292 GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_WIDTH);
293 GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_LAYERS);
294 GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_WIDTH);
295 GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_HEIGHT);
296 GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_LAYERS);
297 GET_ATTR(MAXIMUM_SURFACECUBEMAP_WIDTH);
298 GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH);
299 GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS);
300 GET_ATTR(MAXIMUM_TEXTURE1D_LINEAR_WIDTH);
301 GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_WIDTH);
302 GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_HEIGHT);
303 GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_PITCH);
304 GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH);
305 GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT);
306 GET_ATTR(COMPUTE_CAPABILITY_MAJOR);
307 GET_ATTR(COMPUTE_CAPABILITY_MINOR);
308 GET_ATTR(MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH);
309 GET_ATTR(STREAM_PRIORITIES_SUPPORTED);
310 GET_ATTR(GLOBAL_L1_CACHE_SUPPORTED);
311 GET_ATTR(LOCAL_L1_CACHE_SUPPORTED);
312 GET_ATTR(MAX_SHARED_MEMORY_PER_MULTIPROCESSOR);
313 GET_ATTR(MAX_REGISTERS_PER_MULTIPROCESSOR);
314 GET_ATTR(MANAGED_MEMORY);
315 GET_ATTR(MULTI_GPU_BOARD);
316 GET_ATTR(MULTI_GPU_BOARD_GROUP_ID);
318 capabilities +=
"\n";
DenoiserTypeMask denoisers
#define CCL_NAMESPACE_END
void device_cuda_info(vector< DeviceInfo > &devices)
string device_cuda_capabilities()
Device * device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
CCL_NAMESPACE_BEGIN bool device_cuda_init()
SyclQueue void void size_t num_bytes void
Vector< CPUDevice > devices
list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
CCL_NAMESPACE_BEGIN bool system_windows_version_at_least(int major, int build)