Blender  V3.3
optix/device_impl.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2019, NVIDIA Corporation.
3  * Copyright 2019-2022 Blender Foundation. */
4 
5 #ifdef WITH_OPTIX
6 
8 
9 # include "bvh/bvh.h"
10 # include "bvh/optix.h"
11 
13 
14 # include "scene/hair.h"
15 # include "scene/mesh.h"
16 # include "scene/object.h"
17 # include "scene/pass.h"
18 # include "scene/pointcloud.h"
19 # include "scene/scene.h"
20 
21 # include "util/debug.h"
22 # include "util/log.h"
23 # include "util/md5.h"
24 # include "util/path.h"
25 # include "util/progress.h"
26 # include "util/task.h"
27 # include "util/time.h"
28 
29 # define __KERNEL_OPTIX__
31 
32 # include <optix_denoiser_tiling.h>
33 
35 
36 // A minimal copy of functionality `optix_denoiser_tiling.h` which allows to fix integer overflow
37 // issues without bumping SDK or driver requirement.
38 //
39 // The original code is Copyright NVIDIA Corporation, BSD-3-Clause.
40 namespace {
41 
42 # if OPTIX_ABI_VERSION >= 60
43 using ::optixUtilDenoiserInvokeTiled;
44 # else
45 static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D &input,
46  const OptixImage2D &output,
47  unsigned int overlapWindowSizeInPixels,
48  unsigned int tileWidth,
49  unsigned int tileHeight,
50  std::vector<OptixUtilDenoiserImageTile> &tiles)
51 {
52  if (tileWidth == 0 || tileHeight == 0)
53  return OPTIX_ERROR_INVALID_VALUE;
54 
55  unsigned int inPixelStride = optixUtilGetPixelStride(input);
56  unsigned int outPixelStride = optixUtilGetPixelStride(output);
57 
58  int inp_w = std::min(tileWidth + 2 * overlapWindowSizeInPixels, input.width);
59  int inp_h = std::min(tileHeight + 2 * overlapWindowSizeInPixels, input.height);
60  int inp_y = 0, copied_y = 0;
61 
62  do {
63  int inputOffsetY = inp_y == 0 ? 0 :
64  std::max((int)overlapWindowSizeInPixels,
65  inp_h - ((int)input.height - inp_y));
66  int copy_y = inp_y == 0 ? std::min(input.height, tileHeight + overlapWindowSizeInPixels) :
67  std::min(tileHeight, input.height - copied_y);
68 
69  int inp_x = 0, copied_x = 0;
70  do {
71  int inputOffsetX = inp_x == 0 ? 0 :
72  std::max((int)overlapWindowSizeInPixels,
73  inp_w - ((int)input.width - inp_x));
74  int copy_x = inp_x == 0 ? std::min(input.width, tileWidth + overlapWindowSizeInPixels) :
75  std::min(tileWidth, input.width - copied_x);
76 
77  OptixUtilDenoiserImageTile tile;
78  tile.input.data = input.data + (size_t)(inp_y - inputOffsetY) * input.rowStrideInBytes +
79  +(size_t)(inp_x - inputOffsetX) * inPixelStride;
80  tile.input.width = inp_w;
81  tile.input.height = inp_h;
82  tile.input.rowStrideInBytes = input.rowStrideInBytes;
83  tile.input.pixelStrideInBytes = input.pixelStrideInBytes;
84  tile.input.format = input.format;
85 
86  tile.output.data = output.data + (size_t)inp_y * output.rowStrideInBytes +
87  (size_t)inp_x * outPixelStride;
88  tile.output.width = copy_x;
89  tile.output.height = copy_y;
90  tile.output.rowStrideInBytes = output.rowStrideInBytes;
91  tile.output.pixelStrideInBytes = output.pixelStrideInBytes;
92  tile.output.format = output.format;
93 
94  tile.inputOffsetX = inputOffsetX;
95  tile.inputOffsetY = inputOffsetY;
96  tiles.push_back(tile);
97 
98  inp_x += inp_x == 0 ? tileWidth + overlapWindowSizeInPixels : tileWidth;
99  copied_x += copy_x;
100  } while (inp_x < static_cast<int>(input.width));
101 
102  inp_y += inp_y == 0 ? tileHeight + overlapWindowSizeInPixels : tileHeight;
103  copied_y += copy_y;
104  } while (inp_y < static_cast<int>(input.height));
105 
106  return OPTIX_SUCCESS;
107 }
108 
109 static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
110  CUstream stream,
111  const OptixDenoiserParams *params,
112  CUdeviceptr denoiserState,
113  size_t denoiserStateSizeInBytes,
114  const OptixDenoiserGuideLayer *guideLayer,
115  const OptixDenoiserLayer *layers,
116  unsigned int numLayers,
117  CUdeviceptr scratch,
118  size_t scratchSizeInBytes,
119  unsigned int overlapWindowSizeInPixels,
120  unsigned int tileWidth,
121  unsigned int tileHeight)
122 {
123  if (!guideLayer || !layers)
124  return OPTIX_ERROR_INVALID_VALUE;
125 
126  std::vector<std::vector<OptixUtilDenoiserImageTile>> tiles(numLayers);
127  std::vector<std::vector<OptixUtilDenoiserImageTile>> prevTiles(numLayers);
128  for (unsigned int l = 0; l < numLayers; l++) {
129  if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].input,
130  layers[l].output,
131  overlapWindowSizeInPixels,
132  tileWidth,
133  tileHeight,
134  tiles[l]))
135  return res;
136 
137  if (layers[l].previousOutput.data) {
138  OptixImage2D dummyOutput = layers[l].previousOutput;
139  if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].previousOutput,
140  dummyOutput,
141  overlapWindowSizeInPixels,
142  tileWidth,
143  tileHeight,
144  prevTiles[l]))
145  return res;
146  }
147  }
148 
149  std::vector<OptixUtilDenoiserImageTile> albedoTiles;
150  if (guideLayer->albedo.data) {
151  OptixImage2D dummyOutput = guideLayer->albedo;
152  if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->albedo,
153  dummyOutput,
154  overlapWindowSizeInPixels,
155  tileWidth,
156  tileHeight,
157  albedoTiles))
158  return res;
159  }
160 
161  std::vector<OptixUtilDenoiserImageTile> normalTiles;
162  if (guideLayer->normal.data) {
163  OptixImage2D dummyOutput = guideLayer->normal;
164  if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->normal,
165  dummyOutput,
166  overlapWindowSizeInPixels,
167  tileWidth,
168  tileHeight,
169  normalTiles))
170  return res;
171  }
172  std::vector<OptixUtilDenoiserImageTile> flowTiles;
173  if (guideLayer->flow.data) {
174  OptixImage2D dummyOutput = guideLayer->flow;
175  if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->flow,
176  dummyOutput,
177  overlapWindowSizeInPixels,
178  tileWidth,
179  tileHeight,
180  flowTiles))
181  return res;
182  }
183 
184  for (size_t t = 0; t < tiles[0].size(); t++) {
185  std::vector<OptixDenoiserLayer> tlayers;
186  for (unsigned int l = 0; l < numLayers; l++) {
187  OptixDenoiserLayer layer = {};
188  layer.input = (tiles[l])[t].input;
189  layer.output = (tiles[l])[t].output;
190  if (layers[l].previousOutput.data)
191  layer.previousOutput = (prevTiles[l])[t].input;
192  tlayers.push_back(layer);
193  }
194 
195  OptixDenoiserGuideLayer gl = {};
196  if (guideLayer->albedo.data)
197  gl.albedo = albedoTiles[t].input;
198 
199  if (guideLayer->normal.data)
200  gl.normal = normalTiles[t].input;
201 
202  if (guideLayer->flow.data)
203  gl.flow = flowTiles[t].input;
204 
205  if (const OptixResult res = optixDenoiserInvoke(denoiser,
206  stream,
207  params,
208  denoiserState,
209  denoiserStateSizeInBytes,
210  &gl,
211  &tlayers[0],
212  numLayers,
213  (tiles[0])[t].inputOffsetX,
214  (tiles[0])[t].inputOffsetY,
215  scratch,
216  scratchSizeInBytes))
217  return res;
218  }
219  return OPTIX_SUCCESS;
220 }
221 # endif
222 
223 # if OPTIX_ABI_VERSION >= 55
224 static void execute_optix_task(TaskPool &pool, OptixTask task, OptixResult &failure_reason)
225 {
226  OptixTask additional_tasks[16];
227  unsigned int num_additional_tasks = 0;
228 
229  const OptixResult result = optixTaskExecute(task, additional_tasks, 16, &num_additional_tasks);
230  if (result == OPTIX_SUCCESS) {
231  for (unsigned int i = 0; i < num_additional_tasks; ++i) {
233  &execute_optix_task, std::ref(pool), additional_tasks[i], std::ref(failure_reason)));
234  }
235  }
236  else {
237  failure_reason = result;
238  }
239 }
240 # endif
241 
242 } // namespace
243 
244 OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
245  : device(device), queue(device), state(device, "__denoiser_state", true)
246 {
247 }
248 
249 OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
250  : CUDADevice(info, stats, profiler),
251  sbt_data(this, "__sbt", MEM_READ_ONLY),
252  launch_params(this, "kernel_params", false),
253  denoiser_(this)
254 {
255  /* Make the CUDA context current. */
256  if (!cuContext) {
257  /* Do not initialize if CUDA context creation failed already. */
258  return;
259  }
260  const CUDAContextScope scope(this);
261 
262  /* Create OptiX context for this device. */
263  OptixDeviceContextOptions options = {};
264 # ifdef WITH_CYCLES_LOGGING
265  options.logCallbackLevel = 4; /* Fatal = 1, Error = 2, Warning = 3, Print = 4. */
266  options.logCallbackFunction = [](unsigned int level, const char *, const char *message, void *) {
267  switch (level) {
268  case 1:
269  LOG_IF(FATAL, VLOG_IS_ON(1)) << message;
270  break;
271  case 2:
272  LOG_IF(ERROR, VLOG_IS_ON(1)) << message;
273  break;
274  case 3:
275  LOG_IF(WARNING, VLOG_IS_ON(1)) << message;
276  break;
277  case 4:
278  LOG_IF(INFO, VLOG_IS_ON(1)) << message;
279  break;
280  }
281  };
282 # endif
283  if (DebugFlags().optix.use_debug) {
284  VLOG_INFO << "Using OptiX debug mode.";
285  options.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL;
286  }
287  optix_assert(optixDeviceContextCreate(cuContext, &options, &context));
288 # ifdef WITH_CYCLES_LOGGING
289  optix_assert(optixDeviceContextSetLogCallback(
290  context, options.logCallbackFunction, options.logCallbackData, options.logCallbackLevel));
291 # endif
292 
293  /* Fix weird compiler bug that assigns wrong size. */
294  launch_params.data_elements = sizeof(KernelParamsOptiX);
295 
296  /* Allocate launch parameter buffer memory on device. */
297  launch_params.alloc_to_device(1);
298 }
299 
300 OptiXDevice::~OptiXDevice()
301 {
302  /* Make CUDA context current. */
303  const CUDAContextScope scope(this);
304 
305  free_bvh_memory_delayed();
306 
307  sbt_data.free();
308  texture_info.free();
309  launch_params.free();
310 
311  /* Unload modules. */
312  if (optix_module != NULL) {
313  optixModuleDestroy(optix_module);
314  }
315  for (unsigned int i = 0; i < 2; ++i) {
316  if (builtin_modules[i] != NULL) {
317  optixModuleDestroy(builtin_modules[i]);
318  }
319  }
320  for (unsigned int i = 0; i < NUM_PIPELINES; ++i) {
321  if (pipelines[i] != NULL) {
322  optixPipelineDestroy(pipelines[i]);
323  }
324  }
325 
326  /* Make sure denoiser is destroyed before device context! */
327  if (denoiser_.optix_denoiser != nullptr) {
328  optixDenoiserDestroy(denoiser_.optix_denoiser);
329  }
330 
331  optixDeviceContextDestroy(context);
332 }
333 
334 unique_ptr<DeviceQueue> OptiXDevice::gpu_queue_create()
335 {
336  return make_unique<OptiXDeviceQueue>(this);
337 }
338 
339 BVHLayoutMask OptiXDevice::get_bvh_layout_mask() const
340 {
341  /* OptiX has its own internal acceleration structure format. */
342  return BVH_LAYOUT_OPTIX;
343 }
344 
345 string OptiXDevice::compile_kernel_get_common_cflags(const uint kernel_features)
346 {
347  string common_cflags = CUDADevice::compile_kernel_get_common_cflags(kernel_features);
348 
349  /* Add OptiX SDK include directory to include paths. */
350  const char *optix_sdk_path = getenv("OPTIX_ROOT_DIR");
351  if (optix_sdk_path) {
352  common_cflags += string_printf(" -I\"%s/include\"", optix_sdk_path);
353  }
354 
355  /* Specialization for shader raytracing. */
356  if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
357  common_cflags += " --keep-device-functions";
358  }
359 
360  return common_cflags;
361 }
362 
363 bool OptiXDevice::load_kernels(const uint kernel_features)
364 {
365  if (have_error()) {
366  /* Abort early if context creation failed already. */
367  return false;
368  }
369 
370  /* Load CUDA modules because we need some of the utility kernels. */
371  if (!CUDADevice::load_kernels(kernel_features)) {
372  return false;
373  }
374 
375  /* Skip creating OptiX module if only doing denoising. */
376  if (!(kernel_features & (KERNEL_FEATURE_PATH_TRACING | KERNEL_FEATURE_BAKING))) {
377  return true;
378  }
379 
380  const CUDAContextScope scope(this);
381 
382  /* Unload existing OptiX module and pipelines first. */
383  if (optix_module != NULL) {
384  optixModuleDestroy(optix_module);
385  optix_module = NULL;
386  }
387  for (unsigned int i = 0; i < 2; ++i) {
388  if (builtin_modules[i] != NULL) {
389  optixModuleDestroy(builtin_modules[i]);
390  builtin_modules[i] = NULL;
391  }
392  }
393  for (unsigned int i = 0; i < NUM_PIPELINES; ++i) {
394  if (pipelines[i] != NULL) {
395  optixPipelineDestroy(pipelines[i]);
396  pipelines[i] = NULL;
397  }
398  }
399 
400  OptixModuleCompileOptions module_options = {};
401  module_options.maxRegisterCount = 0; /* Do not set an explicit register limit. */
402 
403  if (DebugFlags().optix.use_debug) {
404  module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0;
405  module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
406  }
407  else {
408  module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
409  module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
410  }
411 
412  module_options.boundValues = nullptr;
413  module_options.numBoundValues = 0;
414 # if OPTIX_ABI_VERSION >= 55
415  module_options.payloadTypes = nullptr;
416  module_options.numPayloadTypes = 0;
417 # endif
418 
419  OptixPipelineCompileOptions pipeline_options = {};
420  /* Default to no motion blur and two-level graph, since it is the fastest option. */
421  pipeline_options.usesMotionBlur = false;
422  pipeline_options.traversableGraphFlags =
423  OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING;
424  pipeline_options.numPayloadValues = 8;
425  pipeline_options.numAttributeValues = 2; /* u, v */
426  pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE;
427  pipeline_options.pipelineLaunchParamsVariableName = "kernel_params"; /* See globals.h */
428 
429  pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
430  if (kernel_features & KERNEL_FEATURE_HAIR) {
431  if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
432 # if OPTIX_ABI_VERSION >= 55
433  pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM;
434 # else
435  pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
436 # endif
437  }
438  else
439  pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
440  }
441  if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
442  pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
443  }
444 
445  /* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
446  * This is necessary since objects may be reported to have motion if the Vector pass is
447  * active, but may still need to be rendered without motion blur if that isn't active as well. */
448  motion_blur = (kernel_features & KERNEL_FEATURE_OBJECT_MOTION) != 0;
449 
450  if (motion_blur) {
451  pipeline_options.usesMotionBlur = true;
452  /* Motion blur can insert motion transforms into the traversal graph.
453  * It is no longer a two-level graph then, so need to set flags to allow any configuration. */
454  pipeline_options.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY;
455  }
456 
457  { /* Load and compile PTX module with OptiX kernels. */
458  string ptx_data, ptx_filename = path_get(
459  (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
460  "lib/kernel_optix_shader_raytrace.ptx" :
461  "lib/kernel_optix.ptx");
462  if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
463  if (!getenv("OPTIX_ROOT_DIR")) {
464  set_error(
465  "Missing OPTIX_ROOT_DIR environment variable (which must be set with the path to "
466  "the Optix SDK to be able to compile Optix kernels on demand).");
467  return false;
468  }
469  ptx_filename = compile_kernel(
470  kernel_features,
471  (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
472  "kernel_shader_raytrace" :
473  "kernel",
474  "optix",
475  true);
476  }
477  if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) {
478  set_error(string_printf("Failed to load OptiX kernel from '%s'", ptx_filename.c_str()));
479  return false;
480  }
481 
482 # if OPTIX_ABI_VERSION >= 84
483  OptixTask task = nullptr;
484  OptixResult result = optixModuleCreateWithTasks(context,
485  &module_options,
486  &pipeline_options,
487  ptx_data.data(),
488  ptx_data.size(),
489  nullptr,
490  nullptr,
491  &optix_module,
492  &task);
493  if (result == OPTIX_SUCCESS) {
494  TaskPool pool;
495  execute_optix_task(pool, task, result);
496  pool.wait_work();
497  }
498 # elif OPTIX_ABI_VERSION >= 55
499  OptixTask task = nullptr;
500  OptixResult result = optixModuleCreateFromPTXWithTasks(context,
501  &module_options,
502  &pipeline_options,
503  ptx_data.data(),
504  ptx_data.size(),
505  nullptr,
506  nullptr,
507  &optix_module,
508  &task);
509  if (result == OPTIX_SUCCESS) {
510  TaskPool pool;
511  execute_optix_task(pool, task, result);
512  pool.wait_work();
513  }
514 # else
515  const OptixResult result = optixModuleCreateFromPTX(context,
516  &module_options,
517  &pipeline_options,
518  ptx_data.data(),
519  ptx_data.size(),
520  nullptr,
521  0,
522  &optix_module);
523 # endif
524  if (result != OPTIX_SUCCESS) {
525  set_error(string_printf("Failed to load OptiX kernel from '%s' (%s)",
526  ptx_filename.c_str(),
527  optixGetErrorName(result)));
528  return false;
529  }
530  }
531 
532  /* Create program groups. */
533  OptixProgramGroup groups[NUM_PROGRAM_GROUPS] = {};
534  OptixProgramGroupDesc group_descs[NUM_PROGRAM_GROUPS] = {};
535  OptixProgramGroupOptions group_options = {}; /* There are no options currently. */
536  group_descs[PG_RGEN_INTERSECT_CLOSEST].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
537  group_descs[PG_RGEN_INTERSECT_CLOSEST].raygen.module = optix_module;
538  group_descs[PG_RGEN_INTERSECT_CLOSEST].raygen.entryFunctionName =
539  "__raygen__kernel_optix_integrator_intersect_closest";
540  group_descs[PG_RGEN_INTERSECT_SHADOW].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
541  group_descs[PG_RGEN_INTERSECT_SHADOW].raygen.module = optix_module;
542  group_descs[PG_RGEN_INTERSECT_SHADOW].raygen.entryFunctionName =
543  "__raygen__kernel_optix_integrator_intersect_shadow";
544  group_descs[PG_RGEN_INTERSECT_SUBSURFACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
545  group_descs[PG_RGEN_INTERSECT_SUBSURFACE].raygen.module = optix_module;
546  group_descs[PG_RGEN_INTERSECT_SUBSURFACE].raygen.entryFunctionName =
547  "__raygen__kernel_optix_integrator_intersect_subsurface";
548  group_descs[PG_RGEN_INTERSECT_VOLUME_STACK].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
549  group_descs[PG_RGEN_INTERSECT_VOLUME_STACK].raygen.module = optix_module;
550  group_descs[PG_RGEN_INTERSECT_VOLUME_STACK].raygen.entryFunctionName =
551  "__raygen__kernel_optix_integrator_intersect_volume_stack";
552  group_descs[PG_MISS].kind = OPTIX_PROGRAM_GROUP_KIND_MISS;
553  group_descs[PG_MISS].miss.module = optix_module;
554  group_descs[PG_MISS].miss.entryFunctionName = "__miss__kernel_optix_miss";
555  group_descs[PG_HITD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
556  group_descs[PG_HITD].hitgroup.moduleCH = optix_module;
557  group_descs[PG_HITD].hitgroup.entryFunctionNameCH = "__closesthit__kernel_optix_hit";
558  group_descs[PG_HITD].hitgroup.moduleAH = optix_module;
559  group_descs[PG_HITD].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_visibility_test";
560  group_descs[PG_HITS].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
561  group_descs[PG_HITS].hitgroup.moduleAH = optix_module;
562  group_descs[PG_HITS].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_shadow_all_hit";
563  group_descs[PG_HITV].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
564  group_descs[PG_HITV].hitgroup.moduleCH = optix_module;
565  group_descs[PG_HITV].hitgroup.entryFunctionNameCH = "__closesthit__kernel_optix_hit";
566  group_descs[PG_HITV].hitgroup.moduleAH = optix_module;
567  group_descs[PG_HITV].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_volume_test";
568 
569  if (kernel_features & KERNEL_FEATURE_HAIR) {
570  if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
571  /* Built-in thick curve intersection. */
572  OptixBuiltinISOptions builtin_options = {};
573 # if OPTIX_ABI_VERSION >= 55
574  builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
575  builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE |
576  OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
577  builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* Disable end-caps. */
578 # else
579  builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
580 # endif
581  builtin_options.usesMotionBlur = false;
582 
583  optix_assert(optixBuiltinISModuleGet(
584  context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[0]));
585 
586  group_descs[PG_HITD].hitgroup.moduleIS = builtin_modules[0];
587  group_descs[PG_HITD].hitgroup.entryFunctionNameIS = nullptr;
588  group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0];
589  group_descs[PG_HITS].hitgroup.entryFunctionNameIS = nullptr;
590 
591  if (motion_blur) {
592  builtin_options.usesMotionBlur = true;
593 
594  optix_assert(optixBuiltinISModuleGet(
595  context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[1]));
596 
597  group_descs[PG_HITD_MOTION] = group_descs[PG_HITD];
598  group_descs[PG_HITD_MOTION].hitgroup.moduleIS = builtin_modules[1];
599  group_descs[PG_HITS_MOTION] = group_descs[PG_HITS];
600  group_descs[PG_HITS_MOTION].hitgroup.moduleIS = builtin_modules[1];
601  }
602  }
603  else {
604  /* Custom ribbon intersection. */
605  group_descs[PG_HITD].hitgroup.moduleIS = optix_module;
606  group_descs[PG_HITS].hitgroup.moduleIS = optix_module;
607  group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon";
608  group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon";
609  }
610  }
611 
612  /* Pointclouds */
613  if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
614  group_descs[PG_HITD_POINTCLOUD] = group_descs[PG_HITD];
615  group_descs[PG_HITD_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
616  group_descs[PG_HITD_POINTCLOUD].hitgroup.moduleIS = optix_module;
617  group_descs[PG_HITD_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
618  group_descs[PG_HITS_POINTCLOUD] = group_descs[PG_HITS];
619  group_descs[PG_HITS_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
620  group_descs[PG_HITS_POINTCLOUD].hitgroup.moduleIS = optix_module;
621  group_descs[PG_HITS_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
622  }
623 
624  if (kernel_features & (KERNEL_FEATURE_SUBSURFACE | KERNEL_FEATURE_NODE_RAYTRACE)) {
625  /* Add hit group for local intersections. */
626  group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
627  group_descs[PG_HITL].hitgroup.moduleAH = optix_module;
628  group_descs[PG_HITL].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_local_hit";
629  }
630 
631  /* Shader raytracing replaces some functions with direct callables. */
632  if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
633  group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
634  group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.module = optix_module;
635  group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.entryFunctionName =
636  "__raygen__kernel_optix_integrator_shade_surface_raytrace";
637  group_descs[PG_CALL_SVM_AO].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
638  group_descs[PG_CALL_SVM_AO].callables.moduleDC = optix_module;
639  group_descs[PG_CALL_SVM_AO].callables.entryFunctionNameDC = "__direct_callable__svm_node_ao";
640  group_descs[PG_CALL_SVM_BEVEL].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
641  group_descs[PG_CALL_SVM_BEVEL].callables.moduleDC = optix_module;
642  group_descs[PG_CALL_SVM_BEVEL].callables.entryFunctionNameDC =
643  "__direct_callable__svm_node_bevel";
644  }
645 
646  /* MNEE. */
647  if (kernel_features & KERNEL_FEATURE_MNEE) {
648  group_descs[PG_RGEN_SHADE_SURFACE_MNEE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
649  group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.module = optix_module;
650  group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.entryFunctionName =
651  "__raygen__kernel_optix_integrator_shade_surface_mnee";
652  }
653 
654  optix_assert(optixProgramGroupCreate(
655  context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups));
656 
657  /* Get program stack sizes. */
658  OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {};
659  /* Set up SBT, which in this case is used only to select between different programs. */
660  sbt_data.alloc(NUM_PROGRAM_GROUPS);
661  memset(sbt_data.host_pointer, 0, sizeof(SbtRecord) * NUM_PROGRAM_GROUPS);
662  for (unsigned int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
663  optix_assert(optixSbtRecordPackHeader(groups[i], &sbt_data[i]));
664 # if OPTIX_ABI_VERSION >= 84
665  optix_assert(optixProgramGroupGetStackSize(groups[i], &stack_size[i], nullptr));
666 # else
667  optix_assert(optixProgramGroupGetStackSize(groups[i], &stack_size[i]));
668 # endif
669  }
670  sbt_data.copy_to_device(); /* Upload SBT to device. */
671 
672  /* Calculate maximum trace continuation stack size. */
673  unsigned int trace_css = stack_size[PG_HITD].cssCH;
674  /* This is based on the maximum of closest-hit and any-hit/intersection programs. */
675  trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
676  trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
677  trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
678  trace_css = std::max(trace_css, stack_size[PG_HITV].cssIS + stack_size[PG_HITV].cssAH);
679  trace_css = std::max(trace_css,
680  stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
681  trace_css = std::max(trace_css,
682  stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
683  trace_css = std::max(
684  trace_css, stack_size[PG_HITD_POINTCLOUD].cssIS + stack_size[PG_HITD_POINTCLOUD].cssAH);
685  trace_css = std::max(
686  trace_css, stack_size[PG_HITS_POINTCLOUD].cssIS + stack_size[PG_HITS_POINTCLOUD].cssAH);
687 
688  OptixPipelineLinkOptions link_options = {};
689  link_options.maxTraceDepth = 1;
690 # if OPTIX_ABI_VERSION < 84
691  if (DebugFlags().optix.use_debug) {
692  link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
693  }
694  else {
695  link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
696  }
697 # endif
698 
699  if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
700  /* Create shader raytracing pipeline. */
701  vector<OptixProgramGroup> pipeline_groups;
702  pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
703  pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_RAYTRACE]);
704  pipeline_groups.push_back(groups[PG_MISS]);
705  pipeline_groups.push_back(groups[PG_HITD]);
706  pipeline_groups.push_back(groups[PG_HITS]);
707  pipeline_groups.push_back(groups[PG_HITL]);
708  pipeline_groups.push_back(groups[PG_HITV]);
709  if (motion_blur) {
710  pipeline_groups.push_back(groups[PG_HITD_MOTION]);
711  pipeline_groups.push_back(groups[PG_HITS_MOTION]);
712  }
713  if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
714  pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
715  pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
716  }
717  pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
718  pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
719 
720  optix_assert(optixPipelineCreate(context,
721  &pipeline_options,
722  &link_options,
723  pipeline_groups.data(),
724  pipeline_groups.size(),
725  nullptr,
726  0,
727  &pipelines[PIP_SHADE_RAYTRACE]));
728 
729  /* Combine ray generation and trace continuation stack size. */
730  const unsigned int css = stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG +
731  link_options.maxTraceDepth * trace_css;
732  const unsigned int dss = std::max(stack_size[PG_CALL_SVM_AO].dssDC,
733  stack_size[PG_CALL_SVM_BEVEL].dssDC);
734 
735  /* Set stack size depending on pipeline options. */
736  optix_assert(optixPipelineSetStackSize(
737  pipelines[PIP_SHADE_RAYTRACE], 0, dss, css, motion_blur ? 3 : 2));
738  }
739 
740  if (kernel_features & KERNEL_FEATURE_MNEE) {
741  /* Create MNEE pipeline. */
742  vector<OptixProgramGroup> pipeline_groups;
743  pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
744  pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
745  pipeline_groups.push_back(groups[PG_MISS]);
746  pipeline_groups.push_back(groups[PG_HITD]);
747  pipeline_groups.push_back(groups[PG_HITS]);
748  pipeline_groups.push_back(groups[PG_HITL]);
749  pipeline_groups.push_back(groups[PG_HITV]);
750  if (motion_blur) {
751  pipeline_groups.push_back(groups[PG_HITD_MOTION]);
752  pipeline_groups.push_back(groups[PG_HITS_MOTION]);
753  }
754  if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
755  pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
756  pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
757  }
758  pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
759  pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
760 
761  optix_assert(optixPipelineCreate(context,
762  &pipeline_options,
763  &link_options,
764  pipeline_groups.data(),
765  pipeline_groups.size(),
766  nullptr,
767  0,
768  &pipelines[PIP_SHADE_MNEE]));
769 
770  /* Combine ray generation and trace continuation stack size. */
771  const unsigned int css = stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG +
772  link_options.maxTraceDepth * trace_css;
773  const unsigned int dss = 0;
774 
775  /* Set stack size depending on pipeline options. */
776  optix_assert(
777  optixPipelineSetStackSize(pipelines[PIP_SHADE_MNEE], 0, dss, css, motion_blur ? 3 : 2));
778  }
779 
780  { /* Create intersection-only pipeline. */
781  vector<OptixProgramGroup> pipeline_groups;
782  pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
783  pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_CLOSEST]);
784  pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SHADOW]);
785  pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SUBSURFACE]);
786  pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_VOLUME_STACK]);
787  pipeline_groups.push_back(groups[PG_MISS]);
788  pipeline_groups.push_back(groups[PG_HITD]);
789  pipeline_groups.push_back(groups[PG_HITS]);
790  pipeline_groups.push_back(groups[PG_HITL]);
791  pipeline_groups.push_back(groups[PG_HITV]);
792  if (motion_blur) {
793  pipeline_groups.push_back(groups[PG_HITD_MOTION]);
794  pipeline_groups.push_back(groups[PG_HITS_MOTION]);
795  }
796  if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
797  pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
798  pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
799  }
800 
801  optix_assert(optixPipelineCreate(context,
802  &pipeline_options,
803  &link_options,
804  pipeline_groups.data(),
805  pipeline_groups.size(),
806  nullptr,
807  0,
808  &pipelines[PIP_INTERSECT]));
809 
810  /* Calculate continuation stack size based on the maximum of all ray generation stack sizes. */
811  const unsigned int css =
812  std::max(stack_size[PG_RGEN_INTERSECT_CLOSEST].cssRG,
813  std::max(stack_size[PG_RGEN_INTERSECT_SHADOW].cssRG,
814  std::max(stack_size[PG_RGEN_INTERSECT_SUBSURFACE].cssRG,
815  stack_size[PG_RGEN_INTERSECT_VOLUME_STACK].cssRG))) +
816  link_options.maxTraceDepth * trace_css;
817 
818  optix_assert(
819  optixPipelineSetStackSize(pipelines[PIP_INTERSECT], 0, 0, css, motion_blur ? 3 : 2));
820  }
821 
822  /* Clean up program group objects. */
823  for (unsigned int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
824  optixProgramGroupDestroy(groups[i]);
825  }
826 
827  return true;
828 }
829 
830 /* --------------------------------------------------------------------
831  * Buffer denoising.
832  */
833 
834 class OptiXDevice::DenoiseContext {
835  public:
836  explicit DenoiseContext(OptiXDevice *device, const DeviceDenoiseTask &task)
837  : denoise_params(task.params),
838  render_buffers(task.render_buffers),
839  buffer_params(task.buffer_params),
840  guiding_buffer(device, "denoiser guiding passes buffer", true),
842  {
843  num_input_passes = 1;
844  if (denoise_params.use_pass_albedo) {
845  num_input_passes += 1;
846  use_pass_albedo = true;
847  pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO);
848  if (denoise_params.use_pass_normal) {
849  num_input_passes += 1;
850  use_pass_normal = true;
851  pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL);
852  }
853  }
854 
855  if (denoise_params.temporally_stable) {
856  prev_output.device_pointer = render_buffers->buffer.device_pointer;
857 
858  prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
859 
860  prev_output.stride = buffer_params.stride;
861  prev_output.pass_stride = buffer_params.pass_stride;
862 
863  num_input_passes += 1;
864  use_pass_flow = true;
865  pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
866  }
867 
868  use_guiding_passes = (num_input_passes - 1) > 0;
869 
870  if (use_guiding_passes) {
871  if (task.allow_inplace_modification) {
872  guiding_params.device_pointer = render_buffers->buffer.device_pointer;
873 
874  guiding_params.pass_albedo = pass_denoising_albedo;
875  guiding_params.pass_normal = pass_denoising_normal;
876  guiding_params.pass_flow = pass_motion;
877 
878  guiding_params.stride = buffer_params.stride;
879  guiding_params.pass_stride = buffer_params.pass_stride;
880  }
881  else {
882  guiding_params.pass_stride = 0;
883  if (use_pass_albedo) {
884  guiding_params.pass_albedo = guiding_params.pass_stride;
885  guiding_params.pass_stride += 3;
886  }
887  if (use_pass_normal) {
888  guiding_params.pass_normal = guiding_params.pass_stride;
889  guiding_params.pass_stride += 3;
890  }
891  if (use_pass_flow) {
892  guiding_params.pass_flow = guiding_params.pass_stride;
893  guiding_params.pass_stride += 2;
894  }
895 
896  guiding_params.stride = buffer_params.width;
897 
898  guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height *
899  guiding_params.pass_stride);
900  guiding_params.device_pointer = guiding_buffer.device_pointer;
901  }
902  }
903 
904  pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT);
905  }
906 
907  const DenoiseParams &denoise_params;
908 
909  RenderBuffers *render_buffers = nullptr;
910  const BufferParams &buffer_params;
911 
912  /* Previous output. */
913  struct {
914  device_ptr device_pointer = 0;
915 
916  int offset = PASS_UNUSED;
917 
918  int stride = -1;
919  int pass_stride = -1;
920  } prev_output;
921 
922  /* Device-side storage of the guiding passes. */
924 
925  struct {
926  device_ptr device_pointer = 0;
927 
928  /* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
929  int pass_albedo = PASS_UNUSED;
930  int pass_normal = PASS_UNUSED;
931  int pass_flow = PASS_UNUSED;
932 
933  int stride = -1;
934  int pass_stride = -1;
935  } guiding_params;
936 
937  /* Number of input passes. Including the color and extra auxiliary passes. */
938  int num_input_passes = 0;
939  bool use_guiding_passes = false;
940  bool use_pass_albedo = false;
941  bool use_pass_normal = false;
942  bool use_pass_flow = false;
943 
944  int num_samples = 0;
945 
947 
948  /* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
949  int pass_denoising_albedo = PASS_UNUSED;
950  int pass_denoising_normal = PASS_UNUSED;
951  int pass_motion = PASS_UNUSED;
952 
953  /* For passes which don't need albedo channel for denoising we replace the actual albedo with
954  * the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
955  * the fake values and denoising of passes which do need albedo can no longer happen. */
956  bool albedo_replaced_with_fake = false;
957 };
958 
959 class OptiXDevice::DenoisePass {
960  public:
961  DenoisePass(const PassType type, const BufferParams &buffer_params) : type(type)
962  {
963  noisy_offset = buffer_params.get_pass_offset(type, PassMode::NOISY);
964  denoised_offset = buffer_params.get_pass_offset(type, PassMode::DENOISED);
965 
966  const PassInfo pass_info = Pass::get_info(type);
967  num_components = pass_info.num_components;
968  use_compositing = pass_info.use_compositing;
969  use_denoising_albedo = pass_info.use_denoising_albedo;
970  }
971 
972  PassType type;
973 
974  int noisy_offset;
975  int denoised_offset;
976 
977  int num_components;
978  bool use_compositing;
979  bool use_denoising_albedo;
980 };
981 
982 bool OptiXDevice::denoise_buffer(const DeviceDenoiseTask &task)
983 {
984  const CUDAContextScope scope(this);
985 
986  DenoiseContext context(this, task);
987 
988  if (!denoise_ensure(context)) {
989  return false;
990  }
991 
992  if (!denoise_filter_guiding_preprocess(context)) {
993  LOG(ERROR) << "Error preprocessing guiding passes.";
994  return false;
995  }
996 
997  /* Passes which will use real albedo when it is available. */
998  denoise_pass(context, PASS_COMBINED);
999  denoise_pass(context, PASS_SHADOW_CATCHER_MATTE);
1000 
1001  /* Passes which do not need albedo and hence if real is present it needs to become fake. */
1002  denoise_pass(context, PASS_SHADOW_CATCHER);
1003 
1004  return true;
1005 }
1006 
1007 DeviceQueue *OptiXDevice::get_denoise_queue()
1008 {
1009  return &denoiser_.queue;
1010 }
1011 
1012 bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)
1013 {
1014  const BufferParams &buffer_params = context.buffer_params;
1015 
1016  const int work_size = buffer_params.width * buffer_params.height;
1017 
1018  DeviceKernelArguments args(&context.guiding_params.device_pointer,
1019  &context.guiding_params.pass_stride,
1020  &context.guiding_params.pass_albedo,
1021  &context.guiding_params.pass_normal,
1022  &context.guiding_params.pass_flow,
1023  &context.render_buffers->buffer.device_pointer,
1024  &buffer_params.offset,
1025  &buffer_params.stride,
1026  &buffer_params.pass_stride,
1027  &context.pass_sample_count,
1028  &context.pass_denoising_albedo,
1029  &context.pass_denoising_normal,
1030  &context.pass_motion,
1031  &buffer_params.full_x,
1032  &buffer_params.full_y,
1033  &buffer_params.width,
1034  &buffer_params.height,
1035  &context.num_samples);
1036 
1037  return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
1038 }
1039 
1040 bool OptiXDevice::denoise_filter_guiding_set_fake_albedo(DenoiseContext &context)
1041 {
1042  const BufferParams &buffer_params = context.buffer_params;
1043 
1044  const int work_size = buffer_params.width * buffer_params.height;
1045 
1046  DeviceKernelArguments args(&context.guiding_params.device_pointer,
1047  &context.guiding_params.pass_stride,
1048  &context.guiding_params.pass_albedo,
1049  &buffer_params.width,
1050  &buffer_params.height);
1051 
1052  return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
1053 }
1054 
1055 void OptiXDevice::denoise_pass(DenoiseContext &context, PassType pass_type)
1056 {
1057  const BufferParams &buffer_params = context.buffer_params;
1058 
1059  const DenoisePass pass(pass_type, buffer_params);
1060 
1061  if (pass.noisy_offset == PASS_UNUSED) {
1062  return;
1063  }
1064  if (pass.denoised_offset == PASS_UNUSED) {
1065  LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type);
1066  return;
1067  }
1068 
1069  if (pass.use_denoising_albedo) {
1070  if (context.albedo_replaced_with_fake) {
1071  LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set.";
1072  return;
1073  }
1074  }
1075  else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
1076  context.albedo_replaced_with_fake = true;
1077  if (!denoise_filter_guiding_set_fake_albedo(context)) {
1078  LOG(ERROR) << "Error replacing real albedo with the fake one.";
1079  return;
1080  }
1081  }
1082 
1083  /* Read and preprocess noisy color input pass. */
1084  denoise_color_read(context, pass);
1085  if (!denoise_filter_color_preprocess(context, pass)) {
1086  LOG(ERROR) << "Error connverting denoising passes to RGB buffer.";
1087  return;
1088  }
1089 
1090  if (!denoise_run(context, pass)) {
1091  LOG(ERROR) << "Error running OptiX denoiser.";
1092  return;
1093  }
1094 
1095  /* Store result in the combined pass of the render buffer.
1096  *
1097  * This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
1098  if (!denoise_filter_color_postprocess(context, pass)) {
1099  LOG(ERROR) << "Error copying denoiser result to the denoised pass.";
1100  return;
1101  }
1102 
1103  denoiser_.queue.synchronize();
1104 }
1105 
1106 void OptiXDevice::denoise_color_read(DenoiseContext &context, const DenoisePass &pass)
1107 {
1108  PassAccessor::PassAccessInfo pass_access_info;
1109  pass_access_info.type = pass.type;
1110  pass_access_info.mode = PassMode::NOISY;
1111  pass_access_info.offset = pass.noisy_offset;
1112 
1113  /* Denoiser operates on passes which are used to calculate the approximation, and is never used
1114  * on the approximation. The latter is not even possible because OptiX does not support
1115  * denoising of semi-transparent pixels. */
1116  pass_access_info.use_approximate_shadow_catcher = false;
1117  pass_access_info.use_approximate_shadow_catcher_background = false;
1118  pass_access_info.show_active_pixels = false;
1119 
1120  /* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
1121  */
1122  const PassAccessorGPU pass_accessor(
1123  &denoiser_.queue, pass_access_info, 1.0f, context.num_samples);
1124 
1125  PassAccessor::Destination destination(pass_access_info.type);
1126  destination.d_pixels = context.render_buffers->buffer.device_pointer +
1127  pass.denoised_offset * sizeof(float);
1128  destination.num_components = 3;
1129  destination.pixel_stride = context.buffer_params.pass_stride;
1130 
1131  BufferParams buffer_params = context.buffer_params;
1132  buffer_params.window_x = 0;
1133  buffer_params.window_y = 0;
1134  buffer_params.window_width = buffer_params.width;
1135  buffer_params.window_height = buffer_params.height;
1136 
1137  pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
1138 }
1139 
1140 bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const DenoisePass &pass)
1141 {
1142  const BufferParams &buffer_params = context.buffer_params;
1143 
1144  const int work_size = buffer_params.width * buffer_params.height;
1145 
1146  DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
1147  &buffer_params.full_x,
1148  &buffer_params.full_y,
1149  &buffer_params.width,
1150  &buffer_params.height,
1151  &buffer_params.offset,
1152  &buffer_params.stride,
1153  &buffer_params.pass_stride,
1154  &pass.denoised_offset);
1155 
1156  return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
1157 }
1158 
1159 bool OptiXDevice::denoise_filter_color_postprocess(DenoiseContext &context,
1160  const DenoisePass &pass)
1161 {
1162  const BufferParams &buffer_params = context.buffer_params;
1163 
1164  const int work_size = buffer_params.width * buffer_params.height;
1165 
1166  DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
1167  &buffer_params.full_x,
1168  &buffer_params.full_y,
1169  &buffer_params.width,
1170  &buffer_params.height,
1171  &buffer_params.offset,
1172  &buffer_params.stride,
1173  &buffer_params.pass_stride,
1174  &context.num_samples,
1175  &pass.noisy_offset,
1176  &pass.denoised_offset,
1177  &context.pass_sample_count,
1178  &pass.num_components,
1179  &pass.use_compositing);
1180 
1181  return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
1182 }
1183 
1184 bool OptiXDevice::denoise_ensure(DenoiseContext &context)
1185 {
1186  if (!denoise_create_if_needed(context)) {
1187  LOG(ERROR) << "OptiX denoiser creation has failed.";
1188  return false;
1189  }
1190 
1191  if (!denoise_configure_if_needed(context)) {
1192  LOG(ERROR) << "OptiX denoiser configuration has failed.";
1193  return false;
1194  }
1195 
1196  return true;
1197 }
1198 
1199 bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
1200 {
1201  const bool recreate_denoiser = (denoiser_.optix_denoiser == nullptr) ||
1202  (denoiser_.use_pass_albedo != context.use_pass_albedo) ||
1203  (denoiser_.use_pass_normal != context.use_pass_normal) ||
1204  (denoiser_.use_pass_flow != context.use_pass_flow);
1205  if (!recreate_denoiser) {
1206  return true;
1207  }
1208 
1209  /* Destroy existing handle before creating new one. */
1210  if (denoiser_.optix_denoiser) {
1211  optixDenoiserDestroy(denoiser_.optix_denoiser);
1212  }
1213 
1214  /* Create OptiX denoiser handle on demand when it is first used. */
1215  OptixDenoiserOptions denoiser_options = {};
1216  denoiser_options.guideAlbedo = context.use_pass_albedo;
1217  denoiser_options.guideNormal = context.use_pass_normal;
1218 
1219  OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
1220  if (context.use_pass_flow) {
1221  model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
1222  }
1223 
1224  const OptixResult result = optixDenoiserCreate(
1225  this->context, model, &denoiser_options, &denoiser_.optix_denoiser);
1226 
1227  if (result != OPTIX_SUCCESS) {
1228  set_error("Failed to create OptiX denoiser");
1229  return false;
1230  }
1231 
1232  /* OptiX denoiser handle was created with the requested number of input passes. */
1233  denoiser_.use_pass_albedo = context.use_pass_albedo;
1234  denoiser_.use_pass_normal = context.use_pass_normal;
1235  denoiser_.use_pass_flow = context.use_pass_flow;
1236 
1237  /* OptiX denoiser has been created, but it needs configuration. */
1238  denoiser_.is_configured = false;
1239 
1240  return true;
1241 }
1242 
1243 bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
1244 {
1245  /* Limit maximum tile size denoiser can be invoked with. */
1246  const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
1247  min(context.buffer_params.height, 4096));
1248 
1249  if (denoiser_.is_configured &&
1250  (denoiser_.configured_size.x == tile_size.x && denoiser_.configured_size.y == tile_size.y)) {
1251  return true;
1252  }
1253 
1254  optix_assert(optixDenoiserComputeMemoryResources(
1255  denoiser_.optix_denoiser, tile_size.x, tile_size.y, &denoiser_.sizes));
1256 
1257  /* Allocate denoiser state if tile size has changed since last setup. */
1258  denoiser_.state.alloc_to_device(denoiser_.sizes.stateSizeInBytes +
1259  denoiser_.sizes.withOverlapScratchSizeInBytes);
1260 
1261  /* Initialize denoiser state for the current tile size. */
1262  const OptixResult result = optixDenoiserSetup(
1263  denoiser_.optix_denoiser,
1264  0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
1265  * on a stream that is not the default stream. */
1266  tile_size.x + denoiser_.sizes.overlapWindowSizeInPixels * 2,
1267  tile_size.y + denoiser_.sizes.overlapWindowSizeInPixels * 2,
1268  denoiser_.state.device_pointer,
1269  denoiser_.sizes.stateSizeInBytes,
1270  denoiser_.state.device_pointer + denoiser_.sizes.stateSizeInBytes,
1271  denoiser_.sizes.withOverlapScratchSizeInBytes);
1272  if (result != OPTIX_SUCCESS) {
1273  set_error("Failed to set up OptiX denoiser");
1274  return false;
1275  }
1276 
1277  cuda_assert(cuCtxSynchronize());
1278 
1279  denoiser_.is_configured = true;
1280  denoiser_.configured_size = tile_size;
1281 
1282  return true;
1283 }
1284 
1285 bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
1286 {
1287  const BufferParams &buffer_params = context.buffer_params;
1288  const int width = buffer_params.width;
1289  const int height = buffer_params.height;
1290 
1291  /* Set up input and output layer information. */
1292  OptixImage2D color_layer = {0};
1293  OptixImage2D albedo_layer = {0};
1294  OptixImage2D normal_layer = {0};
1295  OptixImage2D flow_layer = {0};
1296 
1297  OptixImage2D output_layer = {0};
1298  OptixImage2D prev_output_layer = {0};
1299 
1300  /* Color pass. */
1301  {
1302  const int pass_denoised = pass.denoised_offset;
1303  const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
1304 
1305  color_layer.data = context.render_buffers->buffer.device_pointer +
1306  pass_denoised * sizeof(float);
1307  color_layer.width = width;
1308  color_layer.height = height;
1309  color_layer.rowStrideInBytes = pass_stride_in_bytes * context.buffer_params.stride;
1310  color_layer.pixelStrideInBytes = pass_stride_in_bytes;
1311  color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
1312  }
1313 
1314  /* Previous output. */
1315  if (context.prev_output.offset != PASS_UNUSED) {
1316  const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
1317 
1318  prev_output_layer.data = context.prev_output.device_pointer +
1319  context.prev_output.offset * sizeof(float);
1320  prev_output_layer.width = width;
1321  prev_output_layer.height = height;
1322  prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
1323  prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
1324  prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
1325  }
1326 
1327  /* Optional albedo and color passes. */
1328  if (context.num_input_passes > 1) {
1329  const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
1330  const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float);
1331  const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
1332 
1333  if (context.use_pass_albedo) {
1334  albedo_layer.data = d_guiding_buffer + context.guiding_params.pass_albedo * sizeof(float);
1335  albedo_layer.width = width;
1336  albedo_layer.height = height;
1337  albedo_layer.rowStrideInBytes = row_stride_in_bytes;
1338  albedo_layer.pixelStrideInBytes = pixel_stride_in_bytes;
1339  albedo_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
1340  }
1341 
1342  if (context.use_pass_normal) {
1343  normal_layer.data = d_guiding_buffer + context.guiding_params.pass_normal * sizeof(float);
1344  normal_layer.width = width;
1345  normal_layer.height = height;
1346  normal_layer.rowStrideInBytes = row_stride_in_bytes;
1347  normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
1348  normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
1349  }
1350 
1351  if (context.use_pass_flow) {
1352  flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
1353  flow_layer.width = width;
1354  flow_layer.height = height;
1355  flow_layer.rowStrideInBytes = row_stride_in_bytes;
1356  flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
1357  flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
1358  }
1359  }
1360 
1361  /* Denoise in-place of the noisy input in the render buffers. */
1362  output_layer = color_layer;
1363 
1364  OptixDenoiserGuideLayer guide_layers = {};
1365  guide_layers.albedo = albedo_layer;
1366  guide_layers.normal = normal_layer;
1367  guide_layers.flow = flow_layer;
1368 
1369  OptixDenoiserLayer image_layers = {};
1370  image_layers.input = color_layer;
1371  image_layers.previousOutput = prev_output_layer;
1372  image_layers.output = output_layer;
1373 
1374  /* Finally run denoising. */
1375  OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
1376 
1377  optix_assert(ccl::optixUtilDenoiserInvokeTiled(denoiser_.optix_denoiser,
1378  denoiser_.queue.stream(),
1379  &params,
1380  denoiser_.state.device_pointer,
1381  denoiser_.sizes.stateSizeInBytes,
1382  &guide_layers,
1383  &image_layers,
1384  1,
1385  denoiser_.state.device_pointer +
1386  denoiser_.sizes.stateSizeInBytes,
1387  denoiser_.sizes.withOverlapScratchSizeInBytes,
1388  denoiser_.sizes.overlapWindowSizeInPixels,
1389  denoiser_.configured_size.x,
1390  denoiser_.configured_size.y));
1391 
1392  return true;
1393 }
1394 
1395 bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
1396  OptixBuildOperation operation,
1397  const OptixBuildInput &build_input,
1398  uint16_t num_motion_steps)
1399 {
1400  /* Allocate and build acceleration structures only one at a time, to prevent parallel builds
1401  * from running out of memory (since both original and compacted acceleration structure memory
1402  * may be allocated at the same time for the duration of this function). The builds would
1403  * otherwise happen on the same CUDA stream anyway. */
1404  static thread_mutex mutex;
1406 
1407  const CUDAContextScope scope(this);
1408 
1409  const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
1410 
1411  /* Compute memory usage. */
1412  OptixAccelBufferSizes sizes = {};
1413  OptixAccelBuildOptions options = {};
1414  options.operation = operation;
1415  if (use_fast_trace_bvh ||
1416  /* The build flags have to match the ones used to query the built-in curve intersection
1417  program (see optixBuiltinISModuleGet above) */
1418  build_input.type == OPTIX_BUILD_INPUT_TYPE_CURVES) {
1419  VLOG_INFO << "Using fast to trace OptiX BVH";
1420  options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
1421  }
1422  else {
1423  VLOG_INFO << "Using fast to update OptiX BVH";
1424  options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
1425  }
1426 
1427  options.motionOptions.numKeys = num_motion_steps;
1428  options.motionOptions.flags = OPTIX_MOTION_FLAG_START_VANISH | OPTIX_MOTION_FLAG_END_VANISH;
1429  options.motionOptions.timeBegin = 0.0f;
1430  options.motionOptions.timeEnd = 1.0f;
1431 
1432  optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes));
1433 
1434  /* Allocate required output buffers. */
1435  device_only_memory<char> temp_mem(this, "optix temp as build mem", true);
1436  temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8);
1437  if (!temp_mem.device_pointer) {
1438  /* Make sure temporary memory allocation succeeded. */
1439  return false;
1440  }
1441 
1442  /* Acceleration structure memory has to be allocated on the device (not allowed on the host). */
1443  device_only_memory<char> &out_data = *bvh->as_data;
1444  if (operation == OPTIX_BUILD_OPERATION_BUILD) {
1445  assert(out_data.device == this);
1446  out_data.alloc_to_device(sizes.outputSizeInBytes);
1447  if (!out_data.device_pointer) {
1448  return false;
1449  }
1450  }
1451  else {
1452  assert(out_data.device_pointer && out_data.device_size >= sizes.outputSizeInBytes);
1453  }
1454 
1455  /* Finally build the acceleration structure. */
1456  OptixAccelEmitDesc compacted_size_prop = {};
1457  compacted_size_prop.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE;
1458  /* A tiny space was allocated for this property at the end of the temporary buffer above.
1459  * Make sure this pointer is 8-byte aligned. */
1460  compacted_size_prop.result = align_up(temp_mem.device_pointer + sizes.tempSizeInBytes, 8);
1461 
1462  OptixTraversableHandle out_handle = 0;
1463  optix_assert(optixAccelBuild(context,
1464  NULL,
1465  &options,
1466  &build_input,
1467  1,
1468  temp_mem.device_pointer,
1469  sizes.tempSizeInBytes,
1470  out_data.device_pointer,
1471  sizes.outputSizeInBytes,
1472  &out_handle,
1473  use_fast_trace_bvh ? &compacted_size_prop : NULL,
1474  use_fast_trace_bvh ? 1 : 0));
1475  bvh->traversable_handle = static_cast<uint64_t>(out_handle);
1476 
1477  /* Wait for all operations to finish. */
1478  cuda_assert(cuStreamSynchronize(NULL));
1479 
1480  /* Compact acceleration structure to save memory (do not do this in viewport for faster builds).
1481  */
1482  if (use_fast_trace_bvh) {
1483  uint64_t compacted_size = sizes.outputSizeInBytes;
1484  cuda_assert(cuMemcpyDtoH(&compacted_size, compacted_size_prop.result, sizeof(compacted_size)));
1485 
1486  /* Temporary memory is no longer needed, so free it now to make space. */
1487  temp_mem.free();
1488 
1489  /* There is no point compacting if the size does not change. */
1490  if (compacted_size < sizes.outputSizeInBytes) {
1491  device_only_memory<char> compacted_data(this, "optix compacted as", false);
1492  compacted_data.alloc_to_device(compacted_size);
1493  if (!compacted_data.device_pointer) {
1494  /* Do not compact if memory allocation for compacted acceleration structure fails.
1495  * Can just use the uncompacted one then, so succeed here regardless. */
1496  return !have_error();
1497  }
1498 
1499  optix_assert(optixAccelCompact(
1500  context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle));
1501  bvh->traversable_handle = static_cast<uint64_t>(out_handle);
1502 
1503  /* Wait for compaction to finish. */
1504  cuda_assert(cuStreamSynchronize(NULL));
1505 
1506  std::swap(out_data.device_size, compacted_data.device_size);
1507  std::swap(out_data.device_pointer, compacted_data.device_pointer);
1508  /* Original acceleration structure memory is freed when 'compacted_data' goes out of scope.
1509  */
1510  }
1511  }
1512 
1513  return !have_error();
1514 }
1515 
1516 void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
1517 {
1518  const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
1519 
1520  free_bvh_memory_delayed();
1521 
1522  BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh);
1523 
1524  progress.set_substatus("Building OptiX acceleration structure");
1525 
1526  if (!bvh->params.top_level) {
1527  assert(bvh->objects.size() == 1 && bvh->geometry.size() == 1);
1528 
1529  /* Refit is only possible in viewport for now (because AS is built with
1530  * OPTIX_BUILD_FLAG_ALLOW_UPDATE only there, see above). */
1531  OptixBuildOperation operation = OPTIX_BUILD_OPERATION_BUILD;
1532  if (refit && !use_fast_trace_bvh) {
1533  assert(bvh_optix->traversable_handle != 0);
1534  operation = OPTIX_BUILD_OPERATION_UPDATE;
1535  }
1536  else {
1537  bvh_optix->as_data->free();
1538  bvh_optix->traversable_handle = 0;
1539  }
1540 
1541  /* Build bottom level acceleration structures (BLAS). */
1542  Geometry *const geom = bvh->geometry[0];
1543  if (geom->geometry_type == Geometry::HAIR) {
1544  /* Build BLAS for curve primitives. */
1545  Hair *const hair = static_cast<Hair *const>(geom);
1546  if (hair->num_segments() == 0) {
1547  return;
1548  }
1549 
1550  const size_t num_segments = hair->num_segments();
1551 
1552  size_t num_motion_steps = 1;
1554  if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
1555  num_motion_steps = hair->get_motion_steps();
1556  }
1557 
1558  device_vector<OptixAabb> aabb_data(this, "optix temp aabb data", MEM_READ_ONLY);
1559  device_vector<int> index_data(this, "optix temp index data", MEM_READ_ONLY);
1560  device_vector<float4> vertex_data(this, "optix temp vertex data", MEM_READ_ONLY);
1561  /* Four control points for each curve segment. */
1562  const size_t num_vertices = num_segments * 4;
1563  if (hair->curve_shape == CURVE_THICK) {
1564  index_data.alloc(num_segments);
1565  vertex_data.alloc(num_vertices * num_motion_steps);
1566  }
1567  else
1568  aabb_data.alloc(num_segments * num_motion_steps);
1569 
1570  /* Get AABBs for each motion step. */
1571  for (size_t step = 0; step < num_motion_steps; ++step) {
1572  /* The center step for motion vertices is not stored in the attribute. */
1573  const float3 *keys = hair->get_curve_keys().data();
1574  size_t center_step = (num_motion_steps - 1) / 2;
1575  if (step != center_step) {
1576  size_t attr_offset = (step > center_step) ? step - 1 : step;
1577  /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */
1578  keys = motion_keys->data_float3() + attr_offset * hair->get_curve_keys().size();
1579  }
1580 
1581  for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) {
1582  const Hair::Curve curve = hair->get_curve(j);
1583  const array<float> &curve_radius = hair->get_curve_radius();
1584 
1585  for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) {
1586  if (hair->curve_shape == CURVE_THICK) {
1587  int k0 = curve.first_key + segment;
1588  int k1 = k0 + 1;
1589  int ka = max(k0 - 1, curve.first_key);
1590  int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1);
1591 
1592  index_data[i] = i * 4;
1593  float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
1594 
1595 # if OPTIX_ABI_VERSION >= 55
1596  v[0] = make_float4(keys[ka].x, keys[ka].y, keys[ka].z, curve_radius[ka]);
1597  v[1] = make_float4(keys[k0].x, keys[k0].y, keys[k0].z, curve_radius[k0]);
1598  v[2] = make_float4(keys[k1].x, keys[k1].y, keys[k1].z, curve_radius[k1]);
1599  v[3] = make_float4(keys[kb].x, keys[kb].y, keys[kb].z, curve_radius[kb]);
1600 # else
1601  const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x);
1602  const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y);
1603  const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z);
1604  const float4 pw = make_float4(
1605  curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);
1606 
1607  /* Convert Catmull-Rom data to B-spline. */
1608  static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
1609  static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
1610  static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
1611  static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f;
1612 
1613  v[0] = make_float4(
1614  dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw));
1615  v[1] = make_float4(
1616  dot(cr2bsp1, px), dot(cr2bsp1, py), dot(cr2bsp1, pz), dot(cr2bsp1, pw));
1617  v[2] = make_float4(
1618  dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw));
1619  v[3] = make_float4(
1620  dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw));
1621 # endif
1622  }
1623  else {
1625  curve.bounds_grow(segment, keys, hair->get_curve_radius().data(), bounds);
1626 
1627  const size_t index = step * num_segments + i;
1628  aabb_data[index].minX = bounds.min.x;
1629  aabb_data[index].minY = bounds.min.y;
1630  aabb_data[index].minZ = bounds.min.z;
1631  aabb_data[index].maxX = bounds.max.x;
1632  aabb_data[index].maxY = bounds.max.y;
1633  aabb_data[index].maxZ = bounds.max.z;
1634  }
1635  }
1636  }
1637  }
1638 
1639  /* Upload AABB data to GPU. */
1640  aabb_data.copy_to_device();
1641  index_data.copy_to_device();
1642  vertex_data.copy_to_device();
1643 
1644  vector<device_ptr> aabb_ptrs;
1645  aabb_ptrs.reserve(num_motion_steps);
1646  vector<device_ptr> width_ptrs;
1647  vector<device_ptr> vertex_ptrs;
1648  width_ptrs.reserve(num_motion_steps);
1649  vertex_ptrs.reserve(num_motion_steps);
1650  for (size_t step = 0; step < num_motion_steps; ++step) {
1651  aabb_ptrs.push_back(aabb_data.device_pointer + step * num_segments * sizeof(OptixAabb));
1652  const device_ptr base_ptr = vertex_data.device_pointer +
1653  step * num_vertices * sizeof(float4);
1654  width_ptrs.push_back(base_ptr + 3 * sizeof(float)); /* Offset by vertex size. */
1655  vertex_ptrs.push_back(base_ptr);
1656  }
1657 
1658  /* Force a single any-hit call, so shadow record-all behavior works correctly. */
1659  unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1660  OptixBuildInput build_input = {};
1661  if (hair->curve_shape == CURVE_THICK) {
1662  build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
1663 # if OPTIX_ABI_VERSION >= 55
1664  build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
1665 # else
1666  build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
1667 # endif
1668  build_input.curveArray.numPrimitives = num_segments;
1669  build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
1670  build_input.curveArray.numVertices = num_vertices;
1671  build_input.curveArray.vertexStrideInBytes = sizeof(float4);
1672  build_input.curveArray.widthBuffers = (CUdeviceptr *)width_ptrs.data();
1673  build_input.curveArray.widthStrideInBytes = sizeof(float4);
1674  build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer;
1675  build_input.curveArray.indexStrideInBytes = sizeof(int);
1676  build_input.curveArray.flag = build_flags;
1677  build_input.curveArray.primitiveIndexOffset = hair->curve_segment_offset;
1678  }
1679  else {
1680  /* Disable visibility test any-hit program, since it is already checked during
1681  * intersection. Those trace calls that require any-hit can force it with a ray flag. */
1682  build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
1683 
1684  build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
1685  build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
1686  build_input.customPrimitiveArray.numPrimitives = num_segments;
1687  build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
1688  build_input.customPrimitiveArray.flags = &build_flags;
1689  build_input.customPrimitiveArray.numSbtRecords = 1;
1690  build_input.customPrimitiveArray.primitiveIndexOffset = hair->curve_segment_offset;
1691  }
1692 
1693  if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
1694  progress.set_error("Failed to build OptiX acceleration structure");
1695  }
1696  }
1697  else if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
1698  /* Build BLAS for triangle primitives. */
1699  Mesh *const mesh = static_cast<Mesh *const>(geom);
1700  if (mesh->num_triangles() == 0) {
1701  return;
1702  }
1703 
1704  const size_t num_verts = mesh->get_verts().size();
1705 
1706  size_t num_motion_steps = 1;
1708  if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
1709  num_motion_steps = mesh->get_motion_steps();
1710  }
1711 
1712  device_vector<int> index_data(this, "optix temp index data", MEM_READ_ONLY);
1713  index_data.alloc(mesh->get_triangles().size());
1714  memcpy(index_data.data(),
1715  mesh->get_triangles().data(),
1716  mesh->get_triangles().size() * sizeof(int));
1717  device_vector<float4> vertex_data(this, "optix temp vertex data", MEM_READ_ONLY);
1718  vertex_data.alloc(num_verts * num_motion_steps);
1719 
1720  for (size_t step = 0; step < num_motion_steps; ++step) {
1721  const float3 *verts = mesh->get_verts().data();
1722 
1723  size_t center_step = (num_motion_steps - 1) / 2;
1724  /* The center step for motion vertices is not stored in the attribute. */
1725  if (step != center_step) {
1726  verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts;
1727  }
1728 
1729  memcpy(vertex_data.data() + num_verts * step, verts, num_verts * sizeof(float3));
1730  }
1731 
1732  /* Upload triangle data to GPU. */
1733  index_data.copy_to_device();
1734  vertex_data.copy_to_device();
1735 
1736  vector<device_ptr> vertex_ptrs;
1737  vertex_ptrs.reserve(num_motion_steps);
1738  for (size_t step = 0; step < num_motion_steps; ++step) {
1739  vertex_ptrs.push_back(vertex_data.device_pointer + num_verts * step * sizeof(float3));
1740  }
1741 
1742  /* Force a single any-hit call, so shadow record-all behavior works correctly. */
1743  unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1744  OptixBuildInput build_input = {};
1745  build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
1746  build_input.triangleArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
1747  build_input.triangleArray.numVertices = num_verts;
1748  build_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
1749  build_input.triangleArray.vertexStrideInBytes = sizeof(float4);
1750  build_input.triangleArray.indexBuffer = index_data.device_pointer;
1751  build_input.triangleArray.numIndexTriplets = mesh->num_triangles();
1752  build_input.triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3;
1753  build_input.triangleArray.indexStrideInBytes = 3 * sizeof(int);
1754  build_input.triangleArray.flags = &build_flags;
1755  /* The SBT does not store per primitive data since Cycles already allocates separate
1756  * buffers for that purpose. OptiX does not allow this to be zero though, so just pass in
1757  * one and rely on that having the same meaning in this case. */
1758  build_input.triangleArray.numSbtRecords = 1;
1759  build_input.triangleArray.primitiveIndexOffset = mesh->prim_offset;
1760 
1761  if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
1762  progress.set_error("Failed to build OptiX acceleration structure");
1763  }
1764  }
1765  else if (geom->geometry_type == Geometry::POINTCLOUD) {
1766  /* Build BLAS for points primitives. */
1767  PointCloud *const pointcloud = static_cast<PointCloud *const>(geom);
1768  const size_t num_points = pointcloud->num_points();
1769  if (num_points == 0) {
1770  return;
1771  }
1772 
1773  size_t num_motion_steps = 1;
1774  Attribute *motion_points = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
1775  if (motion_blur && pointcloud->get_use_motion_blur() && motion_points) {
1776  num_motion_steps = pointcloud->get_motion_steps();
1777  }
1778 
1779  device_vector<OptixAabb> aabb_data(this, "optix temp aabb data", MEM_READ_ONLY);
1780  aabb_data.alloc(num_points * num_motion_steps);
1781 
1782  /* Get AABBs for each motion step. */
1783  for (size_t step = 0; step < num_motion_steps; ++step) {
1784  /* The center step for motion vertices is not stored in the attribute. */
1785  const float3 *points = pointcloud->get_points().data();
1786  const float *radius = pointcloud->get_radius().data();
1787  size_t center_step = (num_motion_steps - 1) / 2;
1788  if (step != center_step) {
1789  size_t attr_offset = (step > center_step) ? step - 1 : step;
1790  /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */
1791  points = motion_points->data_float3() + attr_offset * num_points;
1792  }
1793 
1794  for (size_t i = 0; i < num_points; ++i) {
1795  const PointCloud::Point point = pointcloud->get_point(i);
1797  point.bounds_grow(points, radius, bounds);
1798 
1799  const size_t index = step * num_points + i;
1800  aabb_data[index].minX = bounds.min.x;
1801  aabb_data[index].minY = bounds.min.y;
1802  aabb_data[index].minZ = bounds.min.z;
1803  aabb_data[index].maxX = bounds.max.x;
1804  aabb_data[index].maxY = bounds.max.y;
1805  aabb_data[index].maxZ = bounds.max.z;
1806  }
1807  }
1808 
1809  /* Upload AABB data to GPU. */
1810  aabb_data.copy_to_device();
1811 
1812  vector<device_ptr> aabb_ptrs;
1813  aabb_ptrs.reserve(num_motion_steps);
1814  for (size_t step = 0; step < num_motion_steps; ++step) {
1815  aabb_ptrs.push_back(aabb_data.device_pointer + step * num_points * sizeof(OptixAabb));
1816  }
1817 
1818  /* Disable visibility test any-hit program, since it is already checked during
1819  * intersection. Those trace calls that require anyhit can force it with a ray flag.
1820  * For those, force a single any-hit call, so shadow record-all behavior works correctly. */
1821  unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT |
1822  OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1823  OptixBuildInput build_input = {};
1824  build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
1825 # if OPTIX_ABI_VERSION < 23
1826  build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
1827  build_input.aabbArray.numPrimitives = num_points;
1828  build_input.aabbArray.strideInBytes = sizeof(OptixAabb);
1829  build_input.aabbArray.flags = &build_flags;
1830  build_input.aabbArray.numSbtRecords = 1;
1831  build_input.aabbArray.primitiveIndexOffset = pointcloud->prim_offset;
1832 # else
1833  build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
1834  build_input.customPrimitiveArray.numPrimitives = num_points;
1835  build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
1836  build_input.customPrimitiveArray.flags = &build_flags;
1837  build_input.customPrimitiveArray.numSbtRecords = 1;
1838  build_input.customPrimitiveArray.primitiveIndexOffset = pointcloud->prim_offset;
1839 # endif
1840 
1841  if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
1842  progress.set_error("Failed to build OptiX acceleration structure");
1843  }
1844  }
1845  }
1846  else {
1847  unsigned int num_instances = 0;
1848  unsigned int max_num_instances = 0xFFFFFFFF;
1849 
1850  bvh_optix->as_data->free();
1851  bvh_optix->traversable_handle = 0;
1852  bvh_optix->motion_transform_data->free();
1853 
1854  optixDeviceContextGetProperty(context,
1855  OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
1856  &max_num_instances,
1857  sizeof(max_num_instances));
1858  /* Do not count first bit, which is used to distinguish instanced and non-instanced objects. */
1859  max_num_instances >>= 1;
1860  if (bvh->objects.size() > max_num_instances) {
1861  progress.set_error(
1862  "Failed to build OptiX acceleration structure because there are too many instances");
1863  return;
1864  }
1865 
1866  /* Fill instance descriptions. */
1867  device_vector<OptixInstance> instances(this, "optix tlas instances", MEM_READ_ONLY);
1868  instances.alloc(bvh->objects.size());
1869 
1870  /* Calculate total motion transform size and allocate memory for them. */
1871  size_t motion_transform_offset = 0;
1872  if (motion_blur) {
1873  size_t total_motion_transform_size = 0;
1874  for (Object *const ob : bvh->objects) {
1875  if (ob->is_traceable() && ob->use_motion()) {
1876  total_motion_transform_size = align_up(total_motion_transform_size,
1877  OPTIX_TRANSFORM_BYTE_ALIGNMENT);
1878  const size_t motion_keys = max(ob->get_motion().size(), (size_t)2) - 2;
1879  total_motion_transform_size = total_motion_transform_size +
1880  sizeof(OptixSRTMotionTransform) +
1881  motion_keys * sizeof(OptixSRTData);
1882  }
1883  }
1884 
1885  assert(bvh_optix->motion_transform_data->device == this);
1886  bvh_optix->motion_transform_data->alloc_to_device(total_motion_transform_size);
1887  }
1888 
1889  for (Object *ob : bvh->objects) {
1890  /* Skip non-traceable objects. */
1891  if (!ob->is_traceable()) {
1892  continue;
1893  }
1894 
1895  BVHOptiX *const blas = static_cast<BVHOptiX *>(ob->get_geometry()->bvh);
1896  OptixTraversableHandle handle = blas->traversable_handle;
1897 
1898  OptixInstance &instance = instances[num_instances++];
1899  memset(&instance, 0, sizeof(instance));
1900 
1901  /* Clear transform to identity matrix. */
1902  instance.transform[0] = 1.0f;
1903  instance.transform[5] = 1.0f;
1904  instance.transform[10] = 1.0f;
1905 
1906  /* Set user instance ID to object index. */
1907  instance.instanceId = ob->get_device_index();
1908 
1909  /* Add some of the object visibility bits to the mask.
1910  * __prim_visibility contains the combined visibility bits of all instances, so is not
1911  * reliable if they differ between instances. But the OptiX visibility mask can only contain
1912  * 8 bits, so have to trade-off here and select just a few important ones.
1913  */
1914  instance.visibilityMask = ob->visibility_for_tracing() & 0xFF;
1915 
1916  /* Have to have at least one bit in the mask, or else instance would always be culled. */
1917  if (0 == instance.visibilityMask) {
1918  instance.visibilityMask = 0xFF;
1919  }
1920 
1921  if (ob->get_geometry()->geometry_type == Geometry::HAIR &&
1922  static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) {
1923  if (motion_blur && ob->get_geometry()->has_motion_blur()) {
1924  /* Select between motion blur and non-motion blur built-in intersection module. */
1925  instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
1926  }
1927  }
1928  else if (ob->get_geometry()->geometry_type == Geometry::POINTCLOUD) {
1929  /* Use the hit group that has an intersection program for point clouds. */
1930  instance.sbtOffset = PG_HITD_POINTCLOUD - PG_HITD;
1931 
1932  /* Also skip point clouds in local trace calls. */
1933  instance.visibilityMask |= 4;
1934  }
1935 
1936 # if OPTIX_ABI_VERSION < 55
1937  /* Cannot disable any-hit program for thick curves, since it needs to filter out end-caps. */
1938  else
1939 # endif
1940  {
1941  /* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
1942  * since it needs to filter out end-caps there).
1943  *
1944  * It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
1945  * programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
1946  */
1947  instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT;
1948  }
1949 
1950  /* Insert motion traversable if object has motion. */
1951  if (motion_blur && ob->use_motion()) {
1952  size_t motion_keys = max(ob->get_motion().size(), (size_t)2) - 2;
1953  size_t motion_transform_size = sizeof(OptixSRTMotionTransform) +
1954  motion_keys * sizeof(OptixSRTData);
1955 
1956  const CUDAContextScope scope(this);
1957 
1958  motion_transform_offset = align_up(motion_transform_offset,
1959  OPTIX_TRANSFORM_BYTE_ALIGNMENT);
1960  CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data->device_pointer +
1961  motion_transform_offset;
1962  motion_transform_offset += motion_transform_size;
1963 
1964  /* Allocate host side memory for motion transform and fill it with transform data. */
1965  OptixSRTMotionTransform &motion_transform = *reinterpret_cast<OptixSRTMotionTransform *>(
1966  new uint8_t[motion_transform_size]);
1967  motion_transform.child = handle;
1968  motion_transform.motionOptions.numKeys = ob->get_motion().size();
1969  motion_transform.motionOptions.flags = OPTIX_MOTION_FLAG_NONE;
1970  motion_transform.motionOptions.timeBegin = 0.0f;
1971  motion_transform.motionOptions.timeEnd = 1.0f;
1972 
1973  OptixSRTData *const srt_data = motion_transform.srtData;
1974  array<DecomposedTransform> decomp(ob->get_motion().size());
1976  decomp.data(), ob->get_motion().data(), ob->get_motion().size());
1977 
1978  for (size_t i = 0; i < ob->get_motion().size(); ++i) {
1979  /* Scale. */
1980  srt_data[i].sx = decomp[i].y.w; /* scale.x.x */
1981  srt_data[i].sy = decomp[i].z.w; /* scale.y.y */
1982  srt_data[i].sz = decomp[i].w.w; /* scale.z.z */
1983 
1984  /* Shear. */
1985  srt_data[i].a = decomp[i].z.x; /* scale.x.y */
1986  srt_data[i].b = decomp[i].z.y; /* scale.x.z */
1987  srt_data[i].c = decomp[i].w.x; /* scale.y.z */
1988  assert(decomp[i].z.z == 0.0f); /* scale.y.x */
1989  assert(decomp[i].w.y == 0.0f); /* scale.z.x */
1990  assert(decomp[i].w.z == 0.0f); /* scale.z.y */
1991 
1992  /* Pivot point. */
1993  srt_data[i].pvx = 0.0f;
1994  srt_data[i].pvy = 0.0f;
1995  srt_data[i].pvz = 0.0f;
1996 
1997  /* Rotation. */
1998  srt_data[i].qx = decomp[i].x.x;
1999  srt_data[i].qy = decomp[i].x.y;
2000  srt_data[i].qz = decomp[i].x.z;
2001  srt_data[i].qw = decomp[i].x.w;
2002 
2003  /* Translation. */
2004  srt_data[i].tx = decomp[i].y.x;
2005  srt_data[i].ty = decomp[i].y.y;
2006  srt_data[i].tz = decomp[i].y.z;
2007  }
2008 
2009  /* Upload motion transform to GPU. */
2010  cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size);
2011  delete[] reinterpret_cast<uint8_t *>(&motion_transform);
2012 
2013  /* Get traversable handle to motion transform. */
2014  optixConvertPointerToTraversableHandle(context,
2015  motion_transform_gpu,
2016  OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM,
2017  &instance.traversableHandle);
2018  }
2019  else {
2020  instance.traversableHandle = handle;
2021 
2022  if (ob->get_geometry()->is_instanced()) {
2023  /* Set transform matrix. */
2024  memcpy(instance.transform, &ob->get_tfm(), sizeof(instance.transform));
2025  }
2026  }
2027  }
2028 
2029  /* Upload instance descriptions. */
2030  instances.resize(num_instances);
2031  instances.copy_to_device();
2032 
2033  /* Build top-level acceleration structure (TLAS) */
2034  OptixBuildInput build_input = {};
2035  build_input.type = OPTIX_BUILD_INPUT_TYPE_INSTANCES;
2036  build_input.instanceArray.instances = instances.device_pointer;
2037  build_input.instanceArray.numInstances = num_instances;
2038 
2039  if (!build_optix_bvh(bvh_optix, OPTIX_BUILD_OPERATION_BUILD, build_input, 0)) {
2040  progress.set_error("Failed to build OptiX acceleration structure");
2041  }
2042  tlas_handle = bvh_optix->traversable_handle;
2043  }
2044 }
2045 
2046 void OptiXDevice::release_optix_bvh(BVH *bvh)
2047 {
2048  thread_scoped_lock lock(delayed_free_bvh_mutex);
2049  /* Do delayed free of BVH memory, since geometry holding BVH might be deleted
2050  * while GPU is still rendering. */
2051  BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh);
2052 
2053  delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->as_data));
2054  delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->motion_transform_data));
2055  bvh_optix->traversable_handle = 0;
2056 }
2057 
2058 void OptiXDevice::free_bvh_memory_delayed()
2059 {
2060  thread_scoped_lock lock(delayed_free_bvh_mutex);
2061  delayed_free_bvh_memory.free_memory();
2062 }
2063 
2064 void OptiXDevice::const_copy_to(const char *name, void *host, size_t size)
2065 {
2066  /* Set constant memory for CUDA module. */
2067  CUDADevice::const_copy_to(name, host, size);
2068 
2069  if (strcmp(name, "data") == 0) {
2070  assert(size <= sizeof(KernelData));
2071 
2072  /* Update traversable handle (since it is different for each device on multi devices). */
2073  KernelData *const data = (KernelData *)host;
2074  *(OptixTraversableHandle *)&data->device_bvh = tlas_handle;
2075 
2076  update_launch_params(offsetof(KernelParamsOptiX, data), host, size);
2077  return;
2078  }
2079 
2080  /* Update data storage pointers in launch parameters. */
2081 # define KERNEL_DATA_ARRAY(data_type, data_name) \
2082  if (strcmp(name, #data_name) == 0) { \
2083  update_launch_params(offsetof(KernelParamsOptiX, data_name), host, size); \
2084  return; \
2085  }
2086  KERNEL_DATA_ARRAY(IntegratorStateGPU, integrator_state)
2087 # include "kernel/data_arrays.h"
2088 # undef KERNEL_DATA_ARRAY
2089 }
2090 
2091 void OptiXDevice::update_launch_params(size_t offset, void *data, size_t data_size)
2092 {
2093  const CUDAContextScope scope(this);
2094 
2095  cuda_assert(cuMemcpyHtoD(launch_params.device_pointer + offset, data, data_size));
2096 }
2097 
2099 
2100 #endif /* WITH_OPTIX */
typedef float(TangentPoint)[2]
unsigned int uint
Definition: BLI_sys_types.h:67
ThreadMutex mutex
void swap(T &a, T &b)
Definition: Common.h:19
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble z
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei height
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum type
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei width
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei stride
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
float float4[4]
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object instance
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object for example to give variation to multiple instances of an object Point Retrieve information about points in a point cloud Retrieve the edges of an object as it appears to Cycles topology will always appear triangulated Convert a blackbody temperature to an RGB value Normal Generate a perturbed normal from an RGB normal map image Typically used for faking highly detailed surfaces Generate an OSL shader from a file or text data block Image Sample an image file as a texture Sky Generate a procedural sky texture Noise Generate fractal Perlin noise Wave Generate procedural bands or rings with noise Voronoi Generate Worley noise based on the distance to random points Typically used to generate textures such as or biological cells Brick Generate a procedural texture producing bricks Texture Retrieve multiple types of texture coordinates nTypically used as inputs for texture nodes Vector Convert a point
volatile int lock
ATTR_WARN_UNUSED_RESULT const BMLoop * l
ATTR_WARN_UNUSED_RESULT const BMVert * v
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
Definition: btDbvt.cpp:299
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
Attribute * find(ustring name) const
float3 * data_float3()
bool top_level
Definition: params.h:77
int bvh_type
Definition: params.h:102
Definition: bvh/bvh.h:63
vector< Geometry * > geometry
Definition: bvh/bvh.h:66
BVHParams params
Definition: bvh/bvh.h:65
vector< Object * > objects
Definition: bvh/bvh.h:67
int pass_stride
Definition: buffers.h:93
int offset
Definition: buffers.h:90
int full_x
Definition: buffers.h:84
int stride
Definition: buffers.h:90
int height
Definition: buffers.h:72
int get_pass_offset(PassType type, PassMode mode=PassMode::NOISY) const
Definition: buffers.cpp:168
int window_y
Definition: buffers.h:79
int window_height
Definition: buffers.h:81
int window_width
Definition: buffers.h:80
NODE_DECLARE int width
Definition: buffers.h:71
int window_x
Definition: buffers.h:78
int full_y
Definition: buffers.h:85
Type geometry_type
size_t prim_offset
AttributeSet attributes
Definition: hair.h:13
Curve get_curve(size_t i) const
Definition: hair.h:109
size_t curve_segment_offset
Definition: hair.h:88
size_t num_curves() const
Definition: hair.h:123
size_t num_segments() const
Definition: hair.h:128
CurveShapeType curve_shape
Definition: hair.h:89
PassInfo get_info() const
Definition: pass.cpp:136
void set_substatus(const string &substatus_)
Definition: progress.h:259
void set_error(const string &error_message_)
Definition: progress.h:110
device_ptr device_pointer
void alloc_to_device(size_t num, bool shrink_to_fit=true)
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
@ MEM_READ_ONLY
CCL_NAMESPACE_BEGIN struct Options options
#define KERNEL_DATA_ARRAY(type, name)
Definition: data_arrays.h:5
DebugFlags & DebugFlags()
Definition: debug.h:159
Curve curve
#define function_bind
SyclQueue * queue
static float verts[][3]
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
ccl_gpu_kernel_postfix ccl_global float * guiding_buffer
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
ccl_gpu_kernel_postfix ccl_global float int int int int int int int int int int int int num_components
ccl_gpu_kernel_postfix ccl_global KernelWorkTile * tiles
ccl_gpu_kernel_postfix ccl_global float int int int int int int int int pass_denoised
ccl_global KernelShaderEvalInput ccl_global float * output
ccl_global const KernelWorkTile * tile
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
ccl_global KernelShaderEvalInput * input
const int state
ccl_gpu_kernel_postfix ccl_global float int int int int int int int pass_stride
ccl_gpu_kernel_postfix ccl_global float int int int int int int int int int int int pass_sample_count
ccl_gpu_kernel_postfix ccl_global float int int int int ccl_global const float int int int int int int int int int int int int num_samples
ccl_gpu_kernel_postfix ccl_global float int int int int int int int int int int int int bool use_compositing
#define PASS_UNUSED
Definition: kernel/types.h:44
@ ATTR_STD_MOTION_VERTEX_POSITION
Definition: kernel/types.h:624
@ KERNEL_FEATURE_BAKING
@ KERNEL_FEATURE_OBJECT_MOTION
@ KERNEL_FEATURE_MNEE
@ KERNEL_FEATURE_SUBSURFACE
@ KERNEL_FEATURE_HAIR
@ KERNEL_FEATURE_HAIR_THICK
@ KERNEL_FEATURE_PATH_TRACING
@ KERNEL_FEATURE_POINTCLOUD
@ KERNEL_FEATURE_NODE_RAYTRACE
@ CURVE_THICK
Definition: kernel/types.h:583
@ BVH_LAYOUT_OPTIX
PassType
Definition: kernel/types.h:334
@ PASS_SHADOW_CATCHER_MATTE
Definition: kernel/types.h:394
@ PASS_SHADOW_CATCHER
Definition: kernel/types.h:392
@ PASS_DENOISING_NORMAL
Definition: kernel/types.h:377
@ PASS_MOTION
Definition: kernel/types.h:365
@ PASS_COMBINED
Definition: kernel/types.h:338
@ PASS_SAMPLE_COUNT
Definition: kernel/types.h:371
@ PASS_DENOISING_ALBEDO
Definition: kernel/types.h:378
@ PASS_DENOISING_PREVIOUS
Definition: kernel/types.h:380
@ DEVICE_KERNEL_FILTER_COLOR_PREPROCESS
@ DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO
@ DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS
@ DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS
#define VLOG_INFO
Definition: log.h:77
#define VLOG_IS_ON(severity)
Definition: log.h:39
#define LOG(severity)
Definition: log.h:36
#define make_float4(x, y, z, w)
Definition: metal/compat.h:205
#define make_int2(x, y)
Definition: metal/compat.h:206
Segment< FEdge *, Vec3r > segment
struct blender::compositor::@179::@181 task
T dot(const vec_base< T, Size > &a, const vec_base< T, Size > &b)
int BVHLayoutMask
Definition: params.h:47
@ BVH_TYPE_STATIC
Definition: params.h:37
CCL_NAMESPACE_BEGIN const char * pass_type_as_string(const PassType type)
Definition: pass.cpp:11
size_t path_file_size(const string &path)
Definition: path.cpp:550
string path_get(const string &sub)
Definition: path.cpp:338
bool path_read_text(const string &path, string &text)
Definition: path.cpp:701
#define min(a, b)
Definition: sort.c:35
unsigned short uint16_t
Definition: stdint.h:79
__int64 int64_t
Definition: stdint.h:89
unsigned char uint8_t
Definition: stdint.h:78
unsigned __int64 uint64_t
Definition: stdint.h:90
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition: string.cpp:22
@ empty
Definition: boundbox.h:35
float size[3]
size_t num_triangles() const
Definition: scene/mesh.h:79
bool use_motion() const
int get_device_index() const
bool is_traceable() const
uint visibility_for_tracing() const
void * data
Definition: pass.h:26
bool use_compositing
Definition: pass.h:37
bool use_denoising_albedo
Definition: pass.h:42
int num_components
Definition: pass.h:27
Point get_point(int i) const
size_t num_points() const
void push(TaskRunFunction &&task)
Definition: task.cpp:23
void wait_work(Summary *stats=NULL)
Definition: task.cpp:29
int x
Definition: types_int2.h:15
int y
Definition: types_int2.h:15
std::unique_lock< std::mutex > thread_scoped_lock
Definition: thread.h:28
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex
Definition: thread.h:27
void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size)
Definition: transform.cpp:246
float max
ccl_device_inline size_t align_up(size_t offset, size_t alignment)
Definition: util/types.h:46
uint64_t device_ptr
Definition: util/types.h:43