Blender  V3.3
svm.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 #include "device/device.h"
5 
6 #include "scene/background.h"
7 #include "scene/light.h"
8 #include "scene/mesh.h"
9 #include "scene/scene.h"
10 #include "scene/shader.h"
11 #include "scene/shader_graph.h"
12 #include "scene/shader_nodes.h"
13 #include "scene/stats.h"
14 #include "scene/svm.h"
15 
16 #include "util/foreach.h"
17 #include "util/log.h"
18 #include "util/progress.h"
19 #include "util/task.h"
20 
22 
23 /* Shader Manager */
24 
26 {
27 }
28 
30 {
31 }
32 
33 void SVMShaderManager::reset(Scene * /*scene*/)
34 {
35 }
36 
38  Shader *shader,
39  Progress *progress,
40  array<int4> *svm_nodes)
41 {
42  if (progress->get_cancel()) {
43  return;
44  }
45  assert(shader->graph);
46 
47  SVMCompiler::Summary summary;
48  SVMCompiler compiler(scene);
49  compiler.background = (shader == scene->background->get_shader(scene));
50  compiler.compile(shader, *svm_nodes, 0, &summary);
51 
52  VLOG_WORK << "Compilation summary:\n"
53  << "Shader name: " << shader->name << "\n"
54  << summary.full_report();
55 }
56 
58  DeviceScene *dscene,
59  Scene *scene,
60  Progress &progress)
61 {
62  if (!need_update())
63  return;
64 
65  scoped_callback_timer timer([scene](double time) {
66  if (scene->update_stats) {
67  scene->update_stats->svm.times.add_entry({"device_update", time});
68  }
69  });
70 
71  const int num_shaders = scene->shaders.size();
72 
73  VLOG_INFO << "Total " << num_shaders << " shaders.";
74 
75  double start_time = time_dt();
76 
77  /* test if we need to update */
78  device_free(device, dscene, scene);
79 
80  /* Build all shaders. */
82  vector<array<int4>> shader_svm_nodes(num_shaders);
83  for (int i = 0; i < num_shaders; i++) {
85  this,
86  scene,
87  scene->shaders[i],
88  &progress,
89  &shader_svm_nodes[i]));
90  }
92 
93  if (progress.get_cancel()) {
94  return;
95  }
96 
97  /* The global node list contains a jump table (one node per shader)
98  * followed by the nodes of all shaders. */
99  int svm_nodes_size = num_shaders;
100  for (int i = 0; i < num_shaders; i++) {
101  /* Since we're not copying the local jump node, the size ends up being one node lower. */
102  svm_nodes_size += shader_svm_nodes[i].size() - 1;
103  }
104 
105  int4 *svm_nodes = dscene->svm_nodes.alloc(svm_nodes_size);
106 
107  int node_offset = num_shaders;
108  for (int i = 0; i < num_shaders; i++) {
109  Shader *shader = scene->shaders[i];
110 
111  shader->clear_modified();
112  if (shader->get_use_mis() && shader->has_surface_emission) {
114  }
115 
116  /* Update the global jump table.
117  * Each compiled shader starts with a jump node that has offsets local
118  * to the shader, so copy those and add the offset into the global node list. */
119  int4 &global_jump_node = svm_nodes[shader->id];
120  int4 &local_jump_node = shader_svm_nodes[i][0];
121 
122  global_jump_node.x = NODE_SHADER_JUMP;
123  global_jump_node.y = local_jump_node.y - 1 + node_offset;
124  global_jump_node.z = local_jump_node.z - 1 + node_offset;
125  global_jump_node.w = local_jump_node.w - 1 + node_offset;
126 
127  node_offset += shader_svm_nodes[i].size() - 1;
128  }
129 
130  /* Copy the nodes of each shader into the correct location. */
131  svm_nodes += num_shaders;
132  for (int i = 0; i < num_shaders; i++) {
133  int shader_size = shader_svm_nodes[i].size() - 1;
134 
135  memcpy(svm_nodes, &shader_svm_nodes[i][1], sizeof(int4) * shader_size);
136  svm_nodes += shader_size;
137  }
138 
139  if (progress.get_cancel()) {
140  return;
141  }
142 
143  dscene->svm_nodes.copy_to_device();
144 
145  device_update_common(device, dscene, scene, progress);
146 
147  update_flags = UPDATE_NONE;
148 
149  VLOG_INFO << "Shader manager updated " << num_shaders << " shaders in " << time_dt() - start_time
150  << " seconds.";
151 }
152 
154 {
155  device_free_common(device, dscene, scene);
156 
157  dscene->svm_nodes.free();
158 }
159 
160 /* Graph Compiler */
161 
163 {
164  max_stack_use = 0;
168  background = false;
170  compile_failed = false;
171 
172  /* This struct has one entry for every node, in order of ShaderNodeType definition. */
173  svm_node_types_used = (std::atomic_int *)&scene->dscene.data.svm_usage;
174 }
175 
177 {
178  int size = 0;
179 
180  switch (type) {
181  case SocketType::FLOAT:
182  case SocketType::INT:
183  size = 1;
184  break;
185  case SocketType::COLOR:
186  case SocketType::VECTOR:
187  case SocketType::NORMAL:
188  case SocketType::POINT:
189  size = 3;
190  break;
191  case SocketType::CLOSURE:
192  size = 0;
193  break;
194  default:
195  assert(0);
196  break;
197  }
198 
199  return size;
200 }
201 
203 {
204  int offset = -1;
205 
206  /* find free space in stack & mark as used */
207  for (int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
208  if (active_stack.users[i])
209  num_unused = 0;
210  else
211  num_unused++;
212 
213  if (num_unused == size) {
214  offset = i + 1 - size;
215  max_stack_use = max(i + 1, max_stack_use);
216 
217  while (i >= offset)
218  active_stack.users[i--] = 1;
219 
220  return offset;
221  }
222  }
223 
224  if (!compile_failed) {
225  compile_failed = true;
226  fprintf(stderr,
227  "Cycles: out of SVM stack space, shader \"%s\" too big.\n",
228  current_shader->name.c_str());
229  }
230 
231  return 0;
232 }
233 
235 {
237 }
238 
240 {
241  int size = stack_size(type);
242 
243  for (int i = 0; i < size; i++)
244  active_stack.users[offset + i]--;
245 }
246 
248 {
249  /* stack offset assign? */
250  if (input->stack_offset == SVM_STACK_INVALID) {
251  if (input->link) {
252  /* linked to output -> use output offset */
253  assert(input->link->stack_offset != SVM_STACK_INVALID);
254  input->stack_offset = input->link->stack_offset;
255  }
256  else {
257  Node *node = input->parent;
258 
259  /* not linked to output -> add nodes to load default value */
260  input->stack_offset = stack_find_offset(input->type());
261 
262  if (input->type() == SocketType::FLOAT) {
263  add_node(NODE_VALUE_F,
264  __float_as_int(node->get_float(input->socket_type)),
265  input->stack_offset);
266  }
267  else if (input->type() == SocketType::INT) {
268  add_node(NODE_VALUE_F, node->get_int(input->socket_type), input->stack_offset);
269  }
270  else if (input->type() == SocketType::VECTOR || input->type() == SocketType::NORMAL ||
271  input->type() == SocketType::POINT || input->type() == SocketType::COLOR) {
272 
273  add_node(NODE_VALUE_V, input->stack_offset);
274  add_node(NODE_VALUE_V, node->get_float3(input->socket_type));
275  }
276  else /* should not get called for closure */
277  assert(0);
278  }
279  }
280 
281  return input->stack_offset;
282 }
283 
285 {
286  /* if no stack offset assigned yet, find one */
287  if (output->stack_offset == SVM_STACK_INVALID)
288  output->stack_offset = stack_find_offset(output->type());
289 
290  return output->stack_offset;
291 }
292 
294 {
295  if (input->link || input->constant_folded_in)
296  return stack_assign(input);
297 
298  return SVM_STACK_INVALID;
299 }
300 
302 {
303  if (!output->links.empty())
304  return stack_assign(output);
305 
306  return SVM_STACK_INVALID;
307 }
308 
310 {
311  if (output->stack_offset == SVM_STACK_INVALID) {
312  assert(input->link);
313  assert(stack_size(output->type()) == stack_size(input->link->type()));
314 
315  output->stack_offset = input->link->stack_offset;
316 
317  int size = stack_size(output->type());
318 
319  for (int i = 0; i < size; i++)
320  active_stack.users[output->stack_offset + i]++;
321  }
322 }
323 
325 {
326  /* optimization we should add:
327  * find and lower user counts for outputs for which all inputs are done.
328  * this is done before the node is compiled, under the assumption that the
329  * node will first load all inputs from the stack and then writes its
330  * outputs. this used to work, but was disabled because it gave trouble
331  * with inputs getting stack positions assigned */
332 
333  foreach (ShaderInput *input, node->inputs) {
334  ShaderOutput *output = input->link;
335 
336  if (output && output->stack_offset != SVM_STACK_INVALID) {
337  bool all_done = true;
338 
339  /* optimization we should add: verify if in->parent is actually used */
340  foreach (ShaderInput *in, output->links)
341  if (in->parent != node && done.find(in->parent) == done.end())
342  all_done = false;
343 
344  if (all_done) {
345  stack_clear_offset(output->type(), output->stack_offset);
346  output->stack_offset = SVM_STACK_INVALID;
347 
348  foreach (ShaderInput *in, output->links)
349  in->stack_offset = SVM_STACK_INVALID;
350  }
351  }
352  }
353 }
354 
356 {
357  foreach (ShaderInput *input, node->inputs) {
358  if (!input->link && input->stack_offset != SVM_STACK_INVALID) {
359  stack_clear_offset(input->type(), input->stack_offset);
360  input->stack_offset = SVM_STACK_INVALID;
361  }
362  }
363 }
364 
366 {
367  assert(x <= 255);
368  assert(y <= 255);
369  assert(z <= 255);
370  assert(w <= 255);
371 
372  return (x) | (y << 8) | (z << 16) | (w << 24);
373 }
374 
375 void SVMCompiler::add_node(int a, int b, int c, int d)
376 {
378 }
379 
381 {
382  svm_node_types_used[type] = true;
384 }
385 
387 {
388  svm_node_types_used[type] = true;
391 }
392 
394 {
397 }
398 
400 {
401  return scene->shader_manager->get_attribute_id(name);
402 }
403 
405 {
407 }
408 
410 {
412  return (std) ? attribute(std) : attribute(name);
413 }
414 
416  const ShaderNodeSet &done,
418  ShaderNode *skip_node)
419 {
420  ShaderNode *node = (input->link) ? input->link->parent : NULL;
421  if (node != NULL && done.find(node) == done.end() && node != skip_node &&
422  dependencies.find(node) == dependencies.end()) {
423  foreach (ShaderInput *in, node->inputs) {
424  find_dependencies(dependencies, done, in, skip_node);
425  }
426  dependencies.insert(node);
427  }
428 }
429 
431 {
432  node->compile(*this);
433  stack_clear_users(node, done);
435 
437  if (node->has_spatial_varying())
439  if (node->get_feature() & KERNEL_FEATURE_NODE_RAYTRACE)
441  }
442  else if (current_type == SHADER_TYPE_VOLUME) {
443  if (node->has_spatial_varying())
445  if (node->has_attribute_dependency())
447  }
448 
449  if (node->has_integrator_dependency()) {
451  }
452 }
453 
455 {
456  ShaderNodeSet &done = state->nodes_done;
457  vector<bool> &done_flag = state->nodes_done_flag;
458 
459  bool nodes_done;
460  do {
461  nodes_done = true;
462 
463  foreach (ShaderNode *node, nodes) {
464  if (!done_flag[node->id]) {
465  bool inputs_done = true;
466 
467  foreach (ShaderInput *input, node->inputs) {
468  if (input->link && !done_flag[input->link->parent->id]) {
469  inputs_done = false;
470  }
471  }
472  if (inputs_done) {
473  generate_node(node, done);
474  done.insert(node);
475  done_flag[node->id] = true;
476  }
477  else {
478  nodes_done = false;
479  }
480  }
481  }
482  } while (!nodes_done);
483 }
484 
486 {
487  /* Skip generating closure that are not supported or needed for a particular
488  * type of shader. For example a BSDF in a volume shader. */
489  const int node_feature = node->get_feature();
490  if ((state->node_feature_mask & node_feature) != node_feature) {
491  return;
492  }
493 
494  /* execute dependencies for closure */
495  foreach (ShaderInput *in, node->inputs) {
496  if (in->link != NULL) {
497  ShaderNodeSet dependencies;
498  find_dependencies(dependencies, state->nodes_done, in);
499  generate_svm_nodes(dependencies, state);
500  }
501  }
502 
503  /* closure mix weight */
504  const char *weight_name = (current_type == SHADER_TYPE_VOLUME) ? "VolumeMixWeight" :
505  "SurfaceMixWeight";
506  ShaderInput *weight_in = node->input(weight_name);
507 
508  if (weight_in && (weight_in->link || node->get_float(weight_in->socket_type) != 1.0f))
509  mix_weight_offset = stack_assign(weight_in);
510  else
512 
513  /* compile closure itself */
514  generate_node(node, state->nodes_done);
515 
517 
519  if (node->has_surface_emission())
521  if (node->has_surface_transparent())
523  if (node->has_surface_bssrdf()) {
525  if (node->has_bssrdf_bump())
527  }
528  if (node->has_bump()) {
529  current_shader->has_bump = true;
530  }
531  }
532 }
533 
535  ShaderNode *node,
537  const ShaderNodeSet &shared)
538 {
539  if (shared.find(node) != shared.end()) {
540  generate_multi_closure(root_node, node, state);
541  }
542  else {
543  foreach (ShaderInput *in, node->inputs) {
544  if (in->type() == SocketType::CLOSURE && in->link)
545  generated_shared_closure_nodes(root_node, in->link->parent, state, shared);
546  }
547  }
548 }
549 
553 {
554  foreach (ShaderNode *node, graph->nodes) {
555  if (node->special_type == SHADER_SPECIAL_TYPE_OUTPUT_AOV) {
556  OutputAOVNode *aov_node = static_cast<OutputAOVNode *>(node);
557  if (aov_node->offset >= 0) {
558  aov_nodes.insert(aov_node);
559  foreach (ShaderInput *in, node->inputs) {
560  if (in->link != NULL) {
561  find_dependencies(aov_nodes, state->nodes_done, in);
562  }
563  }
564  }
565  }
566  }
567 }
568 
570  ShaderNode *node,
572 {
573  /* only generate once */
574  if (state->closure_done.find(node) != state->closure_done.end())
575  return;
576 
577  state->closure_done.insert(node);
578 
579  if (node->special_type == SHADER_SPECIAL_TYPE_COMBINE_CLOSURE) {
580  /* weighting is already taken care of in ShaderGraph::transform_multi_closure */
581  ShaderInput *cl1in = node->input("Closure1");
582  ShaderInput *cl2in = node->input("Closure2");
583  ShaderInput *facin = node->input("Fac");
584 
585  /* skip empty mix/add closure nodes */
586  if (!cl1in->link && !cl2in->link)
587  return;
588 
589  if (facin && facin->link) {
590  /* mix closure: generate instructions to compute mix weight */
591  ShaderNodeSet dependencies;
592  find_dependencies(dependencies, state->nodes_done, facin);
593  generate_svm_nodes(dependencies, state);
594 
595  /* execute shared dependencies. this is needed to allow skipping
596  * of zero weight closures and their dependencies later, so we
597  * ensure that they only skip dependencies that are unique to them */
598  ShaderNodeSet cl1deps, cl2deps, shareddeps;
599 
600  find_dependencies(cl1deps, state->nodes_done, cl1in);
601  find_dependencies(cl2deps, state->nodes_done, cl2in);
602 
603  ShaderNodeIDComparator node_id_comp;
604  set_intersection(cl1deps.begin(),
605  cl1deps.end(),
606  cl2deps.begin(),
607  cl2deps.end(),
608  std::inserter(shareddeps, shareddeps.begin()),
609  node_id_comp);
610 
611  /* it's possible some nodes are not shared between this mix node
612  * inputs, but still needed to be always executed, this mainly
613  * happens when a node of current subbranch is used by a parent
614  * node or so */
615  if (root_node != node) {
616  foreach (ShaderInput *in, root_node->inputs) {
617  ShaderNodeSet rootdeps;
618  find_dependencies(rootdeps, state->nodes_done, in, node);
619  set_intersection(rootdeps.begin(),
620  rootdeps.end(),
621  cl1deps.begin(),
622  cl1deps.end(),
623  std::inserter(shareddeps, shareddeps.begin()),
624  node_id_comp);
625  set_intersection(rootdeps.begin(),
626  rootdeps.end(),
627  cl2deps.begin(),
628  cl2deps.end(),
629  std::inserter(shareddeps, shareddeps.begin()),
630  node_id_comp);
631  }
632  }
633 
634  /* For dependencies AOV nodes, prevent them from being categorized
635  * as exclusive deps of one or the other closure, since the need to
636  * execute them for AOV writing is not dependent on the closure
637  * weights. */
638  if (state->aov_nodes.size()) {
639  set_intersection(state->aov_nodes.begin(),
640  state->aov_nodes.end(),
641  cl1deps.begin(),
642  cl1deps.end(),
643  std::inserter(shareddeps, shareddeps.begin()),
644  node_id_comp);
645  set_intersection(state->aov_nodes.begin(),
646  state->aov_nodes.end(),
647  cl2deps.begin(),
648  cl2deps.end(),
649  std::inserter(shareddeps, shareddeps.begin()),
650  node_id_comp);
651  }
652 
653  if (!shareddeps.empty()) {
654  if (cl1in->link) {
655  generated_shared_closure_nodes(root_node, cl1in->link->parent, state, shareddeps);
656  }
657  if (cl2in->link) {
658  generated_shared_closure_nodes(root_node, cl2in->link->parent, state, shareddeps);
659  }
660 
661  generate_svm_nodes(shareddeps, state);
662  }
663 
664  /* generate instructions for input closure 1 */
665  if (cl1in->link) {
666  /* Add instruction to skip closure and its dependencies if mix
667  * weight is zero.
668  */
669  svm_node_types_used[NODE_JUMP_IF_ONE] = true;
670  current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ONE, 0, stack_assign(facin), 0));
671  int node_jump_skip_index = current_svm_nodes.size() - 1;
672 
673  generate_multi_closure(root_node, cl1in->link->parent, state);
674 
675  /* Fill in jump instruction location to be after closure. */
676  current_svm_nodes[node_jump_skip_index].y = current_svm_nodes.size() -
677  node_jump_skip_index - 1;
678  }
679 
680  /* generate instructions for input closure 2 */
681  if (cl2in->link) {
682  /* Add instruction to skip closure and its dependencies if mix
683  * weight is zero.
684  */
685  svm_node_types_used[NODE_JUMP_IF_ZERO] = true;
686  current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ZERO, 0, stack_assign(facin), 0));
687  int node_jump_skip_index = current_svm_nodes.size() - 1;
688 
689  generate_multi_closure(root_node, cl2in->link->parent, state);
690 
691  /* Fill in jump instruction location to be after closure. */
692  current_svm_nodes[node_jump_skip_index].y = current_svm_nodes.size() -
693  node_jump_skip_index - 1;
694  }
695 
696  /* unassign */
698  }
699  else {
700  /* execute closures and their dependencies, no runtime checks
701  * to skip closures here because was already optimized due to
702  * fixed weight or add closure that always needs both */
703  if (cl1in->link)
704  generate_multi_closure(root_node, cl1in->link->parent, state);
705  if (cl2in->link)
706  generate_multi_closure(root_node, cl2in->link->parent, state);
707  }
708  }
709  else {
711  }
712 
713  state->nodes_done.insert(node);
714  state->nodes_done_flag[node->id] = true;
715 }
716 
718 {
719  /* Converting a shader graph into svm_nodes that can be executed
720  * sequentially on the virtual machine is fairly simple. We can keep
721  * looping over nodes and each time all the inputs of a node are
722  * ready, we add svm_nodes for it that read the inputs from the
723  * stack and write outputs back to the stack.
724  *
725  * With the SVM, we always sample only a single closure. We can think
726  * of all closures nodes as a binary tree with mix closures as inner
727  * nodes and other closures as leafs. The SVM will traverse that tree,
728  * each time deciding to go left or right depending on the mix weights,
729  * until a closure is found.
730  *
731  * We only execute nodes that are needed for the mix weights and chosen
732  * closure.
733  */
734 
735  current_type = type;
737 
738  /* get input in output node */
739  ShaderNode *output = graph->output();
740  ShaderInput *clin = NULL;
741 
742  switch (type) {
743  case SHADER_TYPE_SURFACE:
744  clin = output->input("Surface");
745  break;
746  case SHADER_TYPE_VOLUME:
747  clin = output->input("Volume");
748  break;
750  clin = output->input("Displacement");
751  break;
752  case SHADER_TYPE_BUMP:
753  clin = output->input("Normal");
754  break;
755  default:
756  assert(0);
757  break;
758  }
759 
760  /* clear all compiler state */
761  memset((void *)&active_stack, 0, sizeof(active_stack));
763 
764  foreach (ShaderNode *node, graph->nodes) {
765  foreach (ShaderInput *input, node->inputs)
766  input->stack_offset = SVM_STACK_INVALID;
767  foreach (ShaderOutput *output, node->outputs)
768  output->stack_offset = SVM_STACK_INVALID;
769  }
770 
771  /* for the bump shader we need add a node to store the shader state */
772  bool need_bump_state = (type == SHADER_TYPE_BUMP) &&
773  (shader->get_displacement_method() == DISPLACE_BOTH);
774  int bump_state_offset = SVM_STACK_INVALID;
775  if (need_bump_state) {
776  bump_state_offset = stack_find_offset(SVM_BUMP_EVAL_STATE_SIZE);
777  add_node(NODE_ENTER_BUMP_EVAL, bump_state_offset);
778  }
779 
780  if (shader->reference_count()) {
782  if (clin->link) {
783  bool generate = false;
784 
785  switch (type) {
786  case SHADER_TYPE_SURFACE: /* generate surface shader */
787  generate = true;
788  shader->has_surface = true;
789  state.node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE;
790  break;
791  case SHADER_TYPE_VOLUME: /* generate volume shader */
792  generate = true;
793  shader->has_volume = true;
794  state.node_feature_mask = KERNEL_FEATURE_NODE_MASK_VOLUME;
795  break;
796  case SHADER_TYPE_DISPLACEMENT: /* generate displacement shader */
797  generate = true;
798  shader->has_displacement = true;
799  state.node_feature_mask = KERNEL_FEATURE_NODE_MASK_DISPLACEMENT;
800  break;
801  case SHADER_TYPE_BUMP: /* generate bump shader */
802  generate = true;
803  state.node_feature_mask = KERNEL_FEATURE_NODE_MASK_BUMP;
804  break;
805  default:
806  break;
807  }
808 
809  if (generate) {
810  if (type == SHADER_TYPE_SURFACE) {
812  }
814  }
815  }
816 
817  /* compile output node */
818  output->compile(*this);
819 
820  if (!state.aov_nodes.empty()) {
821  /* AOV passes are only written if the object is directly visible, so
822  * there is no point in evaluating all the nodes generated only for the
823  * AOV outputs if that's not the case. Therefore, we insert
824  * NODE_AOV_START into the shader before the AOV-only nodes are
825  * generated which tells the kernel that it can stop evaluation
826  * early if AOVs will not be written. */
827  add_node(NODE_AOV_START, 0, 0, 0);
828  generate_svm_nodes(state.aov_nodes, &state);
829  }
830  }
831 
832  /* add node to restore state after bump shader has finished */
833  if (need_bump_state) {
834  add_node(NODE_LEAVE_BUMP_EVAL, bump_state_offset);
835  }
836 
837  /* if compile failed, generate empty shader */
838  if (compile_failed) {
840  compile_failed = false;
841  }
842 
843  /* for bump shaders we fall thru to the surface shader, but if this is any other kind of shader
844  * it ends here */
845  if (type != SHADER_TYPE_BUMP) {
846  add_node(NODE_END, 0, 0, 0);
847  }
848 }
849 
850 void SVMCompiler::compile(Shader *shader, array<int4> &svm_nodes, int index, Summary *summary)
851 {
852  svm_node_types_used[NODE_SHADER_JUMP] = true;
853  svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
854 
855  /* copy graph for shader with bump mapping */
856  ShaderNode *output = shader->graph->output();
857  int start_num_svm_nodes = svm_nodes.size();
858 
859  const double time_start = time_dt();
860 
861  bool has_bump = (shader->get_displacement_method() != DISPLACE_TRUE) &&
862  output->input("Surface")->link && output->input("Displacement")->link;
863 
864  /* finalize */
865  {
866  scoped_timer timer((summary != NULL) ? &summary->time_finalize : NULL);
867  shader->graph->finalize(scene,
868  has_bump,
870  shader->get_displacement_method() == DISPLACE_BOTH);
871  }
872 
873  current_shader = shader;
874 
875  shader->has_surface = false;
876  shader->has_surface_emission = false;
877  shader->has_surface_transparent = false;
878  shader->has_surface_raytrace = false;
879  shader->has_surface_bssrdf = false;
880  shader->has_bump = has_bump;
881  shader->has_bssrdf_bump = has_bump;
882  shader->has_volume = false;
883  shader->has_displacement = false;
884  shader->has_surface_spatial_varying = false;
885  shader->has_volume_spatial_varying = false;
886  shader->has_volume_attribute_dependency = false;
887  shader->has_integrator_dependency = false;
888 
889  /* generate bump shader */
890  if (has_bump) {
891  scoped_timer timer((summary != NULL) ? &summary->time_generate_bump : NULL);
892  compile_type(shader, shader->graph, SHADER_TYPE_BUMP);
893  svm_nodes[index].y = svm_nodes.size();
894  svm_nodes.append(current_svm_nodes);
895  }
896 
897  /* generate surface shader */
898  {
899  scoped_timer timer((summary != NULL) ? &summary->time_generate_surface : NULL);
900  compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
901  /* only set jump offset if there's no bump shader, as the bump shader will fall thru to this
902  * one if it exists */
903  if (!has_bump) {
904  svm_nodes[index].y = svm_nodes.size();
905  }
906  svm_nodes.append(current_svm_nodes);
907  }
908 
909  /* generate volume shader */
910  {
911  scoped_timer timer((summary != NULL) ? &summary->time_generate_volume : NULL);
912  compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
913  svm_nodes[index].z = svm_nodes.size();
914  svm_nodes.append(current_svm_nodes);
915  }
916 
917  /* generate displacement shader */
918  {
919  scoped_timer timer((summary != NULL) ? &summary->time_generate_displacement : NULL);
920  compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
921  svm_nodes[index].w = svm_nodes.size();
922  svm_nodes.append(current_svm_nodes);
923  }
924 
925  /* Fill in summary information. */
926  if (summary != NULL) {
927  summary->time_total = time_dt() - time_start;
928  summary->peak_stack_usage = max_stack_use;
929  summary->num_svm_nodes = svm_nodes.size() - start_num_svm_nodes;
930  }
931 }
932 
933 /* Compiler summary implementation. */
934 
936  : num_svm_nodes(0),
937  peak_stack_usage(0),
938  time_finalize(0.0),
939  time_generate_surface(0.0),
940  time_generate_bump(0.0),
941  time_generate_volume(0.0),
942  time_generate_displacement(0.0),
943  time_total(0.0)
944 {
945 }
946 
948 {
949  string report = "";
950  report += string_printf("Number of SVM nodes: %d\n", num_svm_nodes);
951  report += string_printf("Peak stack usage: %d\n", peak_stack_usage);
952 
953  report += string_printf("Time (in seconds):\n");
954  report += string_printf("Finalize: %f\n", time_finalize);
955  report += string_printf(" Surface: %f\n", time_generate_surface);
956  report += string_printf(" Bump: %f\n", time_generate_bump);
957  report += string_printf(" Volume: %f\n", time_generate_volume);
958  report += string_printf(" Displacement: %f\n", time_generate_displacement);
959  report += string_printf("Generate: %f\n",
960  time_generate_surface + time_generate_bump + time_generate_volume +
961  time_generate_displacement);
962  report += string_printf("Total: %f\n", time_total);
963 
964  return report;
965 }
966 
967 /* Global state of the compiler. */
968 
970 {
971  int max_id = 0;
972  foreach (ShaderNode *node, graph->nodes) {
973  max_id = max(node->id, max_id);
974  }
975  nodes_done_flag.resize(max_id + 1, false);
976  node_feature_mask = 0;
977 }
978 
unsigned int uint
Definition: BLI_sys_types.h:67
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble z
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum type
float float4[4]
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
static AttributeStandard name_standard(const char *name)
Shader * get_shader(const Scene *scene)
device_vector< int4 > svm_nodes
Definition: scene.h:118
KernelData data
Definition: scene.h:130
void tag_update(Scene *scene, uint32_t flag)
bool get_cancel() const
Definition: progress.h:90
ShaderGraph * current_graph
Definition: scene/svm.h:115
void compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
Definition: svm.cpp:717
int max_stack_use
Definition: scene/svm.h:219
void find_aov_nodes_and_dependencies(ShaderNodeSet &aov_nodes, ShaderGraph *graph, CompilerState *state)
Definition: svm.cpp:550
void add_node(ShaderNodeType type, int a=0, int b=0, int c=0)
Definition: svm.cpp:380
bool background
Definition: scene/svm.h:116
SVMCompiler(Scene *scene)
Definition: svm.cpp:162
array< int4 > current_svm_nodes
Definition: scene/svm.h:215
void generate_closure_node(ShaderNode *node, CompilerState *state)
Definition: svm.cpp:485
void stack_clear_offset(SocketType::Type type, int offset)
Definition: svm.cpp:239
ShaderType current_type
Definition: scene/svm.h:216
void generate_node(ShaderNode *node, ShaderNodeSet &done)
Definition: svm.cpp:430
int stack_assign_if_linked(ShaderInput *input)
Definition: svm.cpp:293
Shader * current_shader
Definition: scene/svm.h:217
void stack_clear_users(ShaderNode *node, ShaderNodeSet &done)
Definition: svm.cpp:324
int stack_size(SocketType::Type type)
Definition: svm.cpp:176
void stack_clear_temporary(ShaderNode *node)
Definition: svm.cpp:355
uint encode_uchar4(uint x, uint y=0, uint z=0, uint w=0)
Definition: svm.cpp:365
uint attribute_standard(ustring name)
Definition: svm.cpp:409
Stack active_stack
Definition: scene/svm.h:218
void stack_link(ShaderInput *input, ShaderOutput *output)
Definition: svm.cpp:309
void find_dependencies(ShaderNodeSet &dependencies, const ShaderNodeSet &done, ShaderInput *input, ShaderNode *skip_node=NULL)
Definition: svm.cpp:415
void generate_svm_nodes(const ShaderNodeSet &nodes, CompilerState *state)
Definition: svm.cpp:454
void generate_multi_closure(ShaderNode *root_node, ShaderNode *node, CompilerState *state)
Definition: svm.cpp:569
uint attribute(ustring name)
Definition: svm.cpp:399
std::atomic_int * svm_node_types_used
Definition: scene/svm.h:214
Scene * scene
Definition: scene/svm.h:114
uint mix_weight_offset
Definition: scene/svm.h:220
bool compile_failed
Definition: scene/svm.h:221
int stack_find_offset(int size)
Definition: svm.cpp:202
void compile(Shader *shader, array< int4 > &svm_nodes, int index, Summary *summary=NULL)
Definition: svm.cpp:850
void generated_shared_closure_nodes(ShaderNode *root_node, ShaderNode *node, CompilerState *state, const ShaderNodeSet &shared)
Definition: svm.cpp:534
int stack_assign(ShaderOutput *output)
Definition: svm.cpp:284
void reset(Scene *scene) override
Definition: svm.cpp:33
void device_free(Device *device, DeviceScene *dscene, Scene *scene) override
Definition: svm.cpp:153
SVMShaderManager()
Definition: svm.cpp:25
~SVMShaderManager()
Definition: svm.cpp:29
void device_update_specific(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress) override
Definition: svm.cpp:57
void device_update_shader(Scene *scene, Shader *shader, Progress *progress, array< int4 > *svm_nodes)
Definition: svm.cpp:37
OutputNode * output()
void finalize(Scene *scene, bool do_bump=false, bool do_simplify=false, bool bump_in_object_space=false)
ShaderOutput * link
Definition: shader_graph.h:103
const SocketType & socket_type
Definition: shader_graph.h:101
uint get_attribute_id(ustring name)
bool need_update() const
void device_free_common(Device *device, DeviceScene *dscene, Scene *scene)
vector< ShaderInput * > inputs
Definition: shader_graph.h:214
ShaderNode * parent
Definition: shader_graph.h:134
bool has_surface_spatial_varying
Definition: scene/shader.h:112
bool has_surface_bssrdf
Definition: scene/shader.h:109
bool has_volume_attribute_dependency
Definition: scene/shader.h:114
uint id
Definition: scene/shader.h:121
bool has_volume
Definition: scene/shader.h:107
bool has_surface
Definition: scene/shader.h:103
bool has_bssrdf_bump
Definition: scene/shader.h:111
NODE_DECLARE ShaderGraph * graph
Definition: scene/shader.h:71
bool has_integrator_dependency
Definition: scene/shader.h:115
bool has_surface_emission
Definition: scene/shader.h:104
bool has_surface_raytrace
Definition: scene/shader.h:106
bool has_displacement
Definition: scene/shader.h:108
bool has_bump
Definition: scene/shader.h:110
bool has_surface_transparent
Definition: scene/shader.h:105
bool has_volume_spatial_varying
Definition: scene/shader.h:113
void append(const array< T > &from)
size_t size() const
void push_back_slow(const T &t)
void clear()
int x
Definition: btConvexHull.h:149
int w
Definition: btConvexHull.h:149
int y
Definition: btConvexHull.h:149
int z
Definition: btConvexHull.h:149
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
OperationNode * node
Depsgraph * graph
double time
Scene scene
#define function_bind
TaskPool * task_pool
ccl_global KernelShaderEvalInput ccl_global float * output
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
ccl_global KernelShaderEvalInput * input
const int state
#define SVM_STACK_SIZE
ShaderNodeType
ShaderType
@ SHADER_TYPE_BUMP
@ SHADER_TYPE_SURFACE
@ SHADER_TYPE_VOLUME
@ SHADER_TYPE_DISPLACEMENT
#define SVM_BUMP_EVAL_STATE_SIZE
#define SVM_STACK_INVALID
#define KERNEL_FEATURE_NODE_MASK_DISPLACEMENT
AttributeStandard
Definition: kernel/types.h:612
@ KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_NODE_MASK_BUMP
#define KERNEL_FEATURE_NODE_MASK_VOLUME
#define KERNEL_FEATURE_NODE_MASK_SURFACE
#define VLOG_INFO
Definition: log.h:77
#define VLOG_WORK
Definition: log.h:80
#define make_int4(x, y, z, w)
Definition: metal/compat.h:208
static char * generate(GHash *messages, size_t *r_output_size)
Definition: msgfmt.c:170
static unsigned c
Definition: RandGen.cpp:83
static unsigned a[3]
Definition: RandGen.cpp:78
static const pxr::TfToken b("b", pxr::TfToken::Immortal)
@ DISPLACE_TRUE
Definition: scene/shader.h:54
@ DISPLACE_BOTH
Definition: scene/shader.h:55
@ SHADER_SPECIAL_TYPE_OUTPUT_AOV
Definition: shader_graph.h:57
@ SHADER_SPECIAL_TYPE_COMBINE_CLOSURE
Definition: shader_graph.h:54
set< ShaderNode *, ShaderNodeIDComparator > ShaderNodeSet
Definition: shader_graph.h:289
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition: string.cpp:22
ustring name
Definition: graph/node.h:174
int reference_count() const
Definition: graph/node.h:180
void clear_modified()
Definition: graph/node.cpp:814
CompilerState(ShaderGraph *graph)
Definition: svm.cpp:969
int users[SVM_STACK_SIZE]
Definition: scene/svm.h:154
double time_generate_volume
Definition: scene/svm.h:72
double time_generate_surface
Definition: scene/svm.h:66
double time_generate_bump
Definition: scene/svm.h:69
string full_report() const
Definition: svm.cpp:947
double time_generate_displacement
Definition: scene/svm.h:75
vector< Shader * > shaders
Definition: scene.h:215
Background * background
Definition: scene.h:209
ShaderManager * shader_manager
Definition: scene.h:224
LightManager * light_manager
Definition: scene.h:223
SceneUpdateStats * update_stats
Definition: scene.h:249
DeviceScene dscene
Definition: scene.h:240
void push(TaskRunFunction &&task)
Definition: task.cpp:23
void wait_work(Summary *stats=NULL)
Definition: task.cpp:29
float z
float y
float x
CCL_NAMESPACE_BEGIN double time_dt()
Definition: time.cpp:35
float max
ccl_device_inline int __float_as_int(float f)
Definition: util/math.h:243