Blender  V3.3
render_scheduler.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
5 
6 #include "session/session.h"
7 #include "session/tile.h"
8 #include "util/log.h"
9 #include "util/math.h"
10 #include "util/time.h"
11 
13 
14 /* --------------------------------------------------------------------
15  * Render scheduler.
16  */
17 
19  : headless_(params.headless),
20  background_(params.background),
21  pixel_size_(params.pixel_size),
22  tile_manager_(tile_manager),
23  default_start_resolution_divider_(params.use_resolution_divider ? pixel_size_ * 8 : 0)
24 {
26 }
27 
28 void RenderScheduler::set_need_schedule_cryptomatte(bool need_schedule_cryptomatte)
29 {
30  need_schedule_cryptomatte_ = need_schedule_cryptomatte;
31 }
32 
33 void RenderScheduler::set_need_schedule_rebalance(bool need_schedule_rebalance)
34 {
35  need_schedule_rebalance_works_ = need_schedule_rebalance;
36 }
37 
39 {
40  return background_;
41 }
42 
44 {
46 }
47 
49 {
50  adaptive_sampling_ = adaptive_sampling;
51 }
52 
54 {
55  return adaptive_sampling_.use;
56 }
57 
58 void RenderScheduler::set_start_sample(int start_sample)
59 {
60  start_sample_ = start_sample;
61 }
62 
64 {
65  return start_sample_;
66 }
67 
69 {
71 }
72 
74 {
75  return num_samples_;
76 }
77 
78 void RenderScheduler::set_sample_offset(int sample_offset)
79 {
80  sample_offset_ = sample_offset;
81 }
82 
84 {
85  return sample_offset_;
86 }
87 
88 void RenderScheduler::set_time_limit(double time_limit)
89 {
90  time_limit_ = time_limit;
91 }
92 
94 {
95  return time_limit_;
96 }
97 
99 {
101 
103 }
104 
106 {
107  return state_.num_rendered_samples;
108 }
109 
110 void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples, int sample_offset)
111 {
112  buffer_params_ = buffer_params;
113 
115 
117  set_start_sample(sample_offset);
118  set_sample_offset(sample_offset);
119 
120  /* In background mode never do lower resolution render preview, as it is not really supported
121  * by the software. */
123  state_.resolution_divider = 1;
124  }
125  else {
126  /* NOTE: Divide by 2 because of the way how scheduling works: it advances resolution divider
127  * first and then initialized render work. */
128  state_.resolution_divider = start_resolution_divider_ * 2;
129  }
130 
131  state_.num_rendered_samples = 0;
132  state_.last_display_update_time = 0.0;
133  state_.last_display_update_sample = -1;
134 
135  state_.last_rebalance_time = 0.0;
136  state_.num_rebalance_requested = 0;
137  state_.num_rebalance_changes = 0;
138  state_.last_rebalance_changed = false;
139  state_.need_rebalance_at_next_work = false;
140 
141  /* TODO(sergey): Choose better initial value. */
142  /* NOTE: The adaptive sampling settings might not be available here yet. */
143  state_.adaptive_sampling_threshold = 0.4f;
144 
145  state_.last_work_tile_was_denoised = false;
146  state_.tile_result_was_written = false;
147  state_.postprocess_work_scheduled = false;
148  state_.full_frame_work_scheduled = false;
149  state_.full_frame_was_written = false;
150 
151  state_.path_trace_finished = false;
152 
153  state_.start_render_time = 0.0;
154  state_.end_render_time = 0.0;
155  state_.time_limit_reached = false;
156 
157  state_.occupancy_num_samples = 0;
158  state_.occupancy = 1.0f;
159 
160  first_render_time_.path_trace_per_sample = 0.0;
161  first_render_time_.denoise_time = 0.0;
162  first_render_time_.display_update_time = 0.0;
163 
169 }
170 
172 {
174 }
175 
177 {
178  /* Move to the next resolution divider. Assume adaptive filtering is not needed during
179  * navigation. */
180  if (state_.resolution_divider != pixel_size_) {
181  return false;
182  }
183 
184  if (render_work_reschedule_on_idle(render_work)) {
185  return true;
186  }
187 
188  state_.path_trace_finished = true;
189 
190  bool denoiser_delayed, denoiser_ready_to_display;
191  render_work.tile.denoise = work_need_denoise(denoiser_delayed, denoiser_ready_to_display);
192 
193  render_work.display.update = work_need_update_display(denoiser_delayed);
194  render_work.display.use_denoised_result = denoiser_ready_to_display;
195 
196  return false;
197 }
198 
200 {
202  return false;
203  }
204 
205  /* Move to the next resolution divider. Assume adaptive filtering is not needed during
206  * navigation. */
207  if (state_.resolution_divider != pixel_size_) {
208  return false;
209  }
210 
211  if (adaptive_sampling_.use) {
212  if (state_.adaptive_sampling_threshold > adaptive_sampling_.threshold) {
213  state_.adaptive_sampling_threshold = max(state_.adaptive_sampling_threshold / 2,
215 
216  render_work.adaptive_sampling.threshold = state_.adaptive_sampling_threshold;
217  render_work.adaptive_sampling.reset = true;
218 
219  return true;
220  }
221  }
222 
223  return false;
224 }
225 
227 {
228  VLOG_WORK << "Schedule work for cancel.";
229 
230  /* Un-schedule samples: they will not be rendered and should not be counted. */
231  state_.num_rendered_samples -= render_work.path_trace.num_samples;
232 
233  const bool has_rendered_samples = get_num_rendered_samples() != 0;
234 
235  /* Reset all fields of the previous work, canceling things like adaptive sampling filtering and
236  * denoising.
237  * However, need to preserve write requests, since those will not be possible to recover and
238  * writes are only to happen once. */
239  const bool tile_write = render_work.tile.write;
240  const bool full_write = render_work.full.write;
241 
242  render_work = RenderWork();
243 
244  render_work.tile.write = tile_write;
245  render_work.full.write = full_write;
246 
247  /* Do not write tile if it has zero samples in it, treat it similarly to all other tiles which
248  * got canceled. */
249  if (!state_.tile_result_was_written && has_rendered_samples) {
250  render_work.tile.write = true;
251  }
252 
253  if (!state_.full_frame_was_written) {
254  render_work.full.write = true;
255  }
256 
257  /* Update current tile, but only if any sample was rendered.
258  * Allows to have latest state of tile visible while full buffer is being processed.
259  *
260  * Note that if there are no samples in the current tile its render buffer might have pixels
261  * remained from previous state.
262  *
263  * If the full result was written, then there is no way any updates were made to the render
264  * buffers. And the buffers might have been freed from the device, so display update is not
265  * possible. */
266  if (has_rendered_samples && !state_.full_frame_was_written) {
267  render_work.display.update = true;
268  }
269 }
270 
272 {
273  if (state_.resolution_divider != pixel_size_) {
274  return false;
275  }
276 
277  if (state_.path_trace_finished || state_.time_limit_reached) {
278  return true;
279  }
280 
282 }
283 
285 {
287 
288  const double time_now = time_dt();
289 
290  if (done()) {
291  RenderWork render_work;
292  render_work.resolution_divider = state_.resolution_divider;
293 
294  if (!set_postprocess_render_work(&render_work)) {
295  set_full_frame_render_work(&render_work);
296  }
297 
298  if (!render_work) {
299  state_.end_render_time = time_now;
300  }
301 
302  update_state_for_render_work(render_work);
303 
304  return render_work;
305  }
306 
307  RenderWork render_work;
308 
309  if (state_.resolution_divider != pixel_size_) {
310  state_.resolution_divider = max(state_.resolution_divider / 2, pixel_size_);
311  state_.num_rendered_samples = 0;
312  state_.last_display_update_sample = -1;
313  }
314 
315  render_work.resolution_divider = state_.resolution_divider;
316 
320 
321  render_work.init_render_buffers = (render_work.path_trace.start_sample == get_start_sample());
322 
323  /* NOTE: Rebalance scheduler requires current number of samples to not be advanced forward. */
324  render_work.rebalance = work_need_rebalance();
325 
326  /* NOTE: Advance number of samples now, so that filter and denoising check can see that all the
327  * samples are rendered. */
328  state_.num_rendered_samples += render_work.path_trace.num_samples;
329 
332  render_work.adaptive_sampling.reset = false;
333 
334  bool denoiser_delayed, denoiser_ready_to_display;
335  render_work.tile.denoise = work_need_denoise(denoiser_delayed, denoiser_ready_to_display);
336 
337  render_work.tile.write = done();
338 
339  render_work.display.update = work_need_update_display(denoiser_delayed);
340  render_work.display.use_denoised_result = denoiser_ready_to_display;
341 
342  if (done()) {
343  set_postprocess_render_work(&render_work);
344  }
345 
346  update_state_for_render_work(render_work);
347 
348  return render_work;
349 }
350 
352 {
353  const double time_now = time_dt();
354 
355  if (render_work.rebalance) {
356  state_.last_rebalance_time = time_now;
357  ++state_.num_rebalance_requested;
358  }
359 
360  /* A fallback display update time, for the case there is an error of display update, or when
361  * there is no display at all. */
362  if (render_work.display.update) {
363  state_.last_display_update_time = time_now;
364  state_.last_display_update_sample = state_.num_rendered_samples;
365  }
366 
367  state_.last_work_tile_was_denoised = render_work.tile.denoise;
368  state_.tile_result_was_written |= render_work.tile.write;
369  state_.full_frame_was_written |= render_work.full.write;
370 }
371 
373 {
374  if (state_.postprocess_work_scheduled) {
375  return false;
376  }
377  state_.postprocess_work_scheduled = true;
378 
379  bool any_scheduled = false;
380 
382  render_work->cryptomatte.postprocess = true;
383  any_scheduled = true;
384  }
385 
386  if (denoiser_params_.use && !state_.last_work_tile_was_denoised) {
387  render_work->tile.denoise = !tile_manager_.has_multiple_tiles();
388  any_scheduled = true;
389  }
390 
391  if (!state_.tile_result_was_written) {
392  render_work->tile.write = true;
393  any_scheduled = true;
394  }
395 
396  if (any_scheduled) {
397  render_work->display.update = true;
398  }
399 
400  return any_scheduled;
401 }
402 
404 {
405  if (state_.full_frame_work_scheduled) {
406  return;
407  }
408 
410  /* There is only single tile, so all work has been performed already. */
411  return;
412  }
413 
414  if (!tile_manager_.done()) {
415  /* There are still tiles to be rendered. */
416  return;
417  }
418 
419  if (state_.full_frame_was_written) {
420  return;
421  }
422 
423  state_.full_frame_work_scheduled = true;
424 
425  render_work->full.write = true;
426 }
427 
428 /* Knowing time which it took to complete a task at the current resolution divider approximate how
429  * long it would have taken to complete it at a final resolution. */
430 static double approximate_final_time(const RenderWork &render_work, double time)
431 {
432  if (render_work.resolution_divider == 1) {
433  return time;
434  }
435 
436  const double resolution_divider_sq = render_work.resolution_divider *
437  render_work.resolution_divider;
438  return time * resolution_divider_sq;
439 }
440 
442 {
443  /* Start counting render time when rendering samples at their final resolution.
444  *
445  * NOTE: The work might have the path trace part be all zero: this happens when a post-processing
446  * work is scheduled after the path tracing. Checking for just a start sample doesn't work here
447  * because it might be wrongly 0. Check for whether path tracing is actually happening as it is
448  * expected to happen in the first work. */
449  if (render_work.resolution_divider == pixel_size_ && render_work.path_trace.num_samples != 0 &&
450  render_work.path_trace.start_sample == get_start_sample()) {
451  state_.start_render_time = time_dt();
452  }
453 }
454 
456  double time,
457  bool is_cancelled)
458 {
460 
461  if (is_cancelled) {
462  return;
463  }
464 
465  const double final_time_approx = approximate_final_time(render_work, time);
466 
468  first_render_time_.path_trace_per_sample = final_time_approx /
469  render_work.path_trace.num_samples;
470  }
471 
472  if (work_report_reset_average(render_work)) {
474  }
475 
476  path_trace_time_.add_average(final_time_approx, render_work.path_trace.num_samples);
477 
478  VLOG_WORK << "Average path tracing time: " << path_trace_time_.get_average() << " seconds.";
479 }
480 
481 void RenderScheduler::report_path_trace_occupancy(const RenderWork &render_work, float occupancy)
482 {
483  state_.occupancy_num_samples = render_work.path_trace.num_samples;
484  state_.occupancy = occupancy;
485  VLOG_WORK << "Measured path tracing occupancy: " << occupancy;
486 }
487 
489  double time,
490  bool is_cancelled)
491 {
493 
494  if (is_cancelled) {
495  return;
496  }
497 
498  const double final_time_approx = approximate_final_time(render_work, time);
499 
500  if (work_report_reset_average(render_work)) {
502  }
503 
504  adaptive_filter_time_.add_average(final_time_approx, render_work.path_trace.num_samples);
505 
506  VLOG_WORK << "Average adaptive sampling filter time: " << adaptive_filter_time_.get_average()
507  << " seconds.";
508 }
509 
510 void RenderScheduler::report_denoise_time(const RenderWork &render_work, double time)
511 {
513 
514  const double final_time_approx = approximate_final_time(render_work, time);
515 
517  first_render_time_.denoise_time = final_time_approx;
518  }
519 
520  if (work_report_reset_average(render_work)) {
522  }
523 
524  denoise_time_.add_average(final_time_approx);
525 
526  VLOG_WORK << "Average denoising time: " << denoise_time_.get_average() << " seconds.";
527 }
528 
530 {
532 
533  const double final_time_approx = approximate_final_time(render_work, time);
534 
536  first_render_time_.display_update_time = final_time_approx;
537  }
538 
539  if (work_report_reset_average(render_work)) {
541  }
542 
543  display_update_time_.add_average(final_time_approx);
544 
545  VLOG_WORK << "Average display update time: " << display_update_time_.get_average()
546  << " seconds.";
547 
548  /* Move the display update moment further in time, so that logic which checks when last update
549  * did happen have more reliable point in time (without path tracing and denoising parts of the
550  * render work). */
551  state_.last_display_update_time = time_dt();
552 }
553 
555  double time,
556  bool balance_changed)
557 {
559 
560  if (work_report_reset_average(render_work)) {
562  }
563 
565 
566  if (balance_changed) {
567  ++state_.num_rebalance_changes;
568  }
569 
570  state_.last_rebalance_changed = balance_changed;
571 
572  VLOG_WORK << "Average rebalance time: " << rebalance_time_.get_average() << " seconds.";
573 }
574 
576 {
577  const double render_wall_time = state_.end_render_time - state_.start_render_time;
579 
580  string result = "\nRender Scheduler Summary\n\n";
581 
582  {
583  string mode;
584  if (headless_) {
585  mode = "Headless";
586  }
587  else if (background_) {
588  mode = "Background";
589  }
590  else {
591  mode = "Interactive";
592  }
593  result += "Mode: " + mode + "\n";
594  }
595 
596  result += "Resolution: " + to_string(buffer_params_.width) + "x" +
598 
599  result += "\nAdaptive sampling:\n";
600  result += " Use: " + string_from_bool(adaptive_sampling_.use) + "\n";
601  if (adaptive_sampling_.use) {
602  result += " Step: " + to_string(adaptive_sampling_.adaptive_step) + "\n";
603  result += " Min Samples: " + to_string(adaptive_sampling_.min_samples) + "\n";
604  result += " Threshold: " + to_string(adaptive_sampling_.threshold) + "\n";
605  }
606 
607  result += "\nDenoiser:\n";
608  result += " Use: " + string_from_bool(denoiser_params_.use) + "\n";
609  if (denoiser_params_.use) {
610  result += " Type: " + string(denoiserTypeToHumanReadable(denoiser_params_.type)) + "\n";
611  result += " Start Sample: " + to_string(denoiser_params_.start_sample) + "\n";
612 
613  string passes = "Color";
615  passes += ", Albedo";
616  }
618  passes += ", Normal";
619  }
620 
621  result += " Passes: " + passes + "\n";
622  }
623 
624  if (state_.num_rebalance_requested) {
625  result += "\nRebalancer:\n";
626  result += " Number of requested rebalances: " + to_string(state_.num_rebalance_requested) +
627  "\n";
628  result += " Number of performed rebalances: " + to_string(state_.num_rebalance_changes) +
629  "\n";
630  }
631 
632  result += "\nTime (in seconds):\n";
633  result += string_printf(" %20s %20s %20s\n", "", "Wall", "Average");
634  result += string_printf(" %20s %20f %20f\n",
635  "Path Tracing",
638 
639  if (adaptive_sampling_.use) {
640  result += string_printf(" %20s %20f %20f\n",
641  "Adaptive Filter",
644  }
645 
646  if (denoiser_params_.use) {
648  " %20s %20f %20f\n", "Denoiser", denoise_time_.get_wall(), denoise_time_.get_average());
649  }
650 
651  result += string_printf(" %20s %20f %20f\n",
652  "Display Update",
655 
656  if (state_.num_rebalance_requested) {
657  result += string_printf(" %20s %20f %20f\n",
658  "Rebalance",
661  }
662 
663  const double total_time = path_trace_time_.get_wall() + adaptive_filter_time_.get_wall() +
665  result += "\n Total: " + to_string(total_time) + "\n";
666 
668  "\nRendered %d samples in %f seconds\n", num_rendered_samples, render_wall_time);
669 
670  /* When adaptive sampling is used the average time becomes meaningless, because different samples
671  * will likely render different number of pixels. */
672  if (!adaptive_sampling_.use) {
673  result += string_printf("Average time per sample: %f seconds\n",
674  render_wall_time / num_rendered_samples);
675  }
676 
677  return result;
678 }
679 
681 {
683 }
684 
686  int num_rendered_samples) const
687 {
690 
691  if (time_limit_ != 0.0 && state_.start_render_time != 0.0) {
692  const double remaining_render_time = max(0.0,
693  time_limit_ - (time_dt() - state_.start_render_time));
694 
695  update_interval = min(update_interval, remaining_render_time);
696  }
697 
698  return update_interval;
699 }
700 
701 /* TODO(sergey): This is just a quick implementation, exact values might need to be tweaked based
702  * on a more careful experiments with viewport rendering. */
704  int num_rendered_samples) const
705 {
706  /* TODO(sergey): Need a decision on whether this should be using number of samples rendered
707  * within the current render session, or use absolute number of samples with the start sample
708  * taken into account. It will depend on whether the start sample offset clears the render
709  * buffer. */
710 
711  if (state_.need_rebalance_at_next_work) {
712  return 0.1;
713  }
714  if (state_.last_rebalance_changed) {
715  return 0.2;
716  }
717 
718  if (headless_) {
719  /* In headless mode do rare updates, so that the device occupancy is high, but there are still
720  * progress messages printed to the logs. */
721  return 30.0;
722  }
723 
724  if (background_) {
725  if (num_rendered_samples < 32) {
726  return 1.0;
727  }
728  return 2.0;
729  }
730 
731  /* Render time and number of samples rendered are used to figure out the display update interval.
732  * Render time is used to allow for fast display updates in the first few seconds of rendering
733  * on fast devices. Number of samples rendered is used to allow for potentially quicker display
734  * updates on slow devices during the first few samples. */
735  const double render_time = path_trace_time_.get_wall();
736  if (render_time < 1) {
737  return 0.1;
738  }
739  if (render_time < 2) {
740  return 0.25;
741  }
742  if (render_time < 4) {
743  return 0.5;
744  }
745  if (render_time < 8 || num_rendered_samples < 32) {
746  return 1.0;
747  }
748  return 2.0;
749 }
750 
752 {
753  const double time_per_sample_average = path_trace_time_.get_average();
754  /* Fall back to 1 sample if we have not recorded a time yet. */
755  if (time_per_sample_average == 0.0) {
756  return 1;
757  }
758 
759  const double num_samples_in_second = pixel_size_ * pixel_size_ / time_per_sample_average;
760 
761  const double update_interval_in_seconds = guess_display_update_interval_in_seconds();
762 
763  return max(int(num_samples_in_second * update_interval_in_seconds), 1);
764 }
765 
767 {
768  return start_sample_ + state_.num_rendered_samples;
769 }
770 
771 /* Round number of samples to the closest power of two.
772  * Rounding might happen to higher or lower value depending on which one is closer. Such behavior
773  * allows to have number of samples to be power of two without diverging from the planned number of
774  * samples too much. */
776 {
777  if (num_samples == 1) {
778  return 1;
779  }
780 
782  return num_samples;
783  }
784 
785  const uint num_samples_up = next_power_of_two(num_samples);
786  const uint num_samples_down = num_samples_up - (num_samples_up >> 1);
787 
788  const uint delta_up = num_samples_up - num_samples;
789  const uint delta_down = num_samples - num_samples_down;
790 
791  if (delta_up <= delta_down) {
792  return num_samples_up;
793  }
794 
795  return num_samples_down;
796 }
797 
799 {
800  if (state_.resolution_divider != pixel_size_) {
801  return get_num_samples_during_navigation(state_.resolution_divider);
802  }
803 
804  /* Always start full resolution render with a single sample. Gives more instant feedback to
805  * artists, and allows to gather information for a subsequent path tracing works. Do it in the
806  * headless mode as well, to give some estimate of how long samples are taking. */
807  if (state_.num_rendered_samples == 0) {
808  return 1;
809  }
810 
811  const int num_samples_per_update = calculate_num_samples_per_update();
812  const int path_trace_start_sample = get_start_sample_to_path_trace();
813 
814  /* Round number of samples to a power of two, so that division of path states into tiles goes in
815  * a more integer manner.
816  * This might make it so updates happens more rarely due to rounding up. In the test scenes this
817  * is not huge deal because it is not seen that more than 8 samples can be rendered between
818  * updates. If that becomes a problem we can add some extra rules like never allow to round up
819  * more than N samples. */
820  const int num_samples_pot = round_num_samples_to_power_of_2(num_samples_per_update);
821 
822  const int max_num_samples_to_render = start_sample_ + num_samples_ - path_trace_start_sample;
823 
824  int num_samples_to_render = min(num_samples_pot, max_num_samples_to_render);
825 
826  /* When enough statistics is available and doing an offline rendering prefer to keep device
827  * occupied. */
828  if (state_.occupancy_num_samples && (background_ || headless_)) {
829  /* Keep occupancy at about 0.5 (this is more of an empirical figure which seems to match scenes
830  * with good performance without forcing occupancy to be higher). */
831  int num_samples_to_occupy = state_.occupancy_num_samples;
832  if (state_.occupancy < 0.5f) {
833  num_samples_to_occupy = lround(state_.occupancy_num_samples * 0.7f / state_.occupancy);
834  }
835 
836  /* When time limit is used clamp the calculated number of samples to keep occupancy.
837  * This is because time limit causes the last render iteration to happen with less number of
838  * samples, which conflicts with the occupancy (lower number of samples causes lower
839  * occupancy, also the calculation is based on number of previously rendered samples).
840  *
841  * When time limit is not used the number of samples per render iteration is either increasing
842  * or stays the same, so there is no need to clamp number of samples calculated for occupancy.
843  */
844  if (time_limit_ != 0.0 && state_.start_render_time != 0.0) {
845  const double remaining_render_time = max(
846  0.0, time_limit_ - (time_dt() - state_.start_render_time));
847  const double time_per_sample_average = path_trace_time_.get_average();
848  const double predicted_render_time = num_samples_to_occupy * time_per_sample_average;
849 
850  if (predicted_render_time > remaining_render_time) {
851  num_samples_to_occupy = lround(num_samples_to_occupy *
852  (remaining_render_time / predicted_render_time));
853  }
854  }
855 
856  num_samples_to_render = max(num_samples_to_render,
857  min(num_samples_to_occupy, max_num_samples_to_render));
858  }
859 
860  /* If adaptive sampling is not use, render as many samples per update as possible, keeping the
861  * device fully occupied, without much overhead of display updates. */
862  if (!adaptive_sampling_.use) {
863  return num_samples_to_render;
864  }
865 
866  /* TODO(sergey): Add extra "clamping" here so that none of the filtering points is missing. This
867  * is to ensure that the final render is pixel-matched regardless of how many samples per second
868  * compute device can do. */
869 
870  return adaptive_sampling_.align_samples(path_trace_start_sample - sample_offset_,
871  num_samples_to_render);
872 }
873 
874 int RenderScheduler::get_num_samples_during_navigation(int resolution_divider) const
875 {
876  /* Special trick for fast navigation: schedule multiple samples during fast navigation
877  * (which will prefer to use lower resolution to keep up with refresh rate). This gives more
878  * usable visual feedback for artists. There are a couple of tricks though. */
879 
881  /* When denoising is used during navigation prefer using a higher resolution with less samples
882  * (scheduling less samples here will make it so the resolution_divider calculation will use a
883  * lower value for the divider). This is because both OpenImageDenoiser and OptiX denoiser
884  * give visually better results on a higher resolution image with less samples. */
885  return 1;
886  }
887 
889  /* When resolution divider is at or below pixel size, schedule one sample. This doesn't effect
890  * the sample count at this resolution division, but instead assists in the calculation of
891  * the resolution divider. */
892  return 1;
893  }
894 
895  if (resolution_divider == pixel_size_ * 2) {
896  /* When resolution divider is the previous step to the final resolution, schedule two samples.
897  * This is so that rendering on lower resolution does not exceed time that it takes to render
898  * first sample at the full resolution. */
899  return 2;
900  }
901 
902  /* Always render 4 samples, even if scene is configured for less.
903  * The idea here is to have enough information on the screen. Resolution divider of 2 allows us
904  * to have 4 time extra samples, so overall worst case timing is the same as the final resolution
905  * at one sample. */
906  return 4;
907 }
908 
910 {
912 }
913 
915 {
918  }
919 
920  return max(state_.adaptive_sampling_threshold, adaptive_sampling_.threshold);
921 }
922 
923 bool RenderScheduler::work_need_denoise(bool &delayed, bool &ready_to_display)
924 {
925  delayed = false;
926  ready_to_display = true;
927 
928  if (!denoiser_params_.use) {
929  /* Denoising is disabled, no need to scheduler work for it. */
930  return false;
931  }
932 
933  /* When multiple tiles are used the full frame will be denoised.
934  * Avoid per-tile denoising to save up render time. */
936  return false;
937  }
938 
939  if (done()) {
940  /* Always denoise at the last sample. */
941  return true;
942  }
943 
944  if (background_) {
945  /* Background render, only denoise when rendering the last sample. */
946  /* TODO(sergey): Follow similar logic to viewport, giving an overview of how final denoised
947  * image looks like even for the background rendering. */
948  return false;
949  }
950 
951  /* Viewport render. */
952 
953  /* Navigation might render multiple samples at a lower resolution. Those are not to be counted as
954  * final samples. */
955  const int num_samples_finished = state_.resolution_divider == pixel_size_ ?
956  state_.num_rendered_samples :
957  1;
958 
959  /* Immediately denoise when we reach the start sample or last sample. */
960  if (num_samples_finished == denoiser_params_.start_sample ||
961  num_samples_finished == num_samples_) {
962  return true;
963  }
964 
965  /* Do not denoise until the sample at which denoising should start is reached. */
966  if (num_samples_finished < denoiser_params_.start_sample) {
967  ready_to_display = false;
968  return false;
969  }
970 
971  /* Avoid excessive denoising in viewport after reaching a certain sample count and render time.
972  */
973  /* TODO(sergey): Consider making time interval and sample configurable. */
974  delayed = (path_trace_time_.get_wall() > 4 && num_samples_finished >= 20 &&
975  (time_dt() - state_.last_display_update_time) < 1.0);
976 
977  return !delayed;
978 }
979 
980 bool RenderScheduler::work_need_update_display(const bool denoiser_delayed)
981 {
982  if (headless_) {
983  /* Force disable display update in headless mode. There will be nothing to display the
984  * in-progress result. */
985  return false;
986  }
987 
988  if (denoiser_delayed) {
989  /* If denoiser has been delayed the display can not be updated as it will not contain
990  * up-to-date state of the render result. */
991  return false;
992  }
993 
994  if (!adaptive_sampling_.use) {
995  /* When adaptive sampling is not used the work is scheduled in a way that they keep render
996  * device busy for long enough, so that the display update can happen right after the
997  * rendering. */
998  return true;
999  }
1000 
1001  if (done() || state_.last_display_update_sample == -1) {
1002  /* Make sure an initial and final results of adaptive sampling is communicated ot the display.
1003  */
1004  return true;
1005  }
1006 
1007  /* For the development purposes of adaptive sampling it might be very useful to see all updates
1008  * of active pixels after convergence check. However, it would cause a slowdown for regular usage
1009  * users. Possibly, make it a debug panel option to allow rapid update to ease development
1010  * without need to re-compiled. */
1011  // if (work_need_adaptive_filter()) {
1012  // return true;
1013  // }
1014 
1015  /* When adaptive sampling is used, its possible that only handful of samples of a very simple
1016  * scene will be scheduled to a powerful device (in order to not "miss" any of filtering points).
1017  * We take care of skipping updates here based on when previous display update did happen. */
1018  const double update_interval = guess_display_update_interval_in_seconds_for_num_samples(
1019  state_.last_display_update_sample);
1020  return (time_dt() - state_.last_display_update_time) > update_interval;
1021 }
1022 
1024 {
1025  /* This is the minimum time, as the rebalancing can not happen more often than the path trace
1026  * work. */
1027  static const double kRebalanceIntervalInSeconds = 1;
1028 
1030  return false;
1031  }
1032 
1033  if (state_.resolution_divider != pixel_size_) {
1034  /* Don't rebalance at a non-final resolution divider. Some reasons for this:
1035  * - It will introduce unnecessary during navigation.
1036  * - Per-render device timing information is not very reliable yet. */
1037  return false;
1038  }
1039 
1040  if (state_.num_rendered_samples == 0) {
1041  state_.need_rebalance_at_next_work = true;
1042  return false;
1043  }
1044 
1045  if (state_.need_rebalance_at_next_work) {
1046  state_.need_rebalance_at_next_work = false;
1047  return true;
1048  }
1049 
1050  if (state_.last_rebalance_changed) {
1051  return true;
1052  }
1053 
1054  return (time_dt() - state_.last_rebalance_time) > kRebalanceIntervalInSeconds;
1055 }
1056 
1058 {
1060  return;
1061  }
1062 
1063  if (start_resolution_divider_ == 0) {
1064  /* Resolution divider has never been calculated before: use default resolution, so that we have
1065  * somewhat good initial behavior, giving a chance to collect real numbers. */
1067  VLOG_WORK << "Initial resolution divider is " << start_resolution_divider_;
1068  return;
1069  }
1070 
1071  if (first_render_time_.path_trace_per_sample == 0.0) {
1072  /* Not enough information to calculate better resolution, keep the existing one. */
1073  return;
1074  }
1075 
1076  const double desired_update_interval_in_seconds =
1078 
1079  const double actual_time_per_update = first_render_time_.path_trace_per_sample +
1080  first_render_time_.denoise_time +
1081  first_render_time_.display_update_time;
1082 
1083  /* Allow some percent of tolerance, so that if the render time is close enough to the higher
1084  * resolution we prefer to use it instead of going way lower resolution and time way below the
1085  * desired one. */
1086  const int resolution_divider_for_update = calculate_resolution_divider_for_time(
1087  desired_update_interval_in_seconds * 1.4, actual_time_per_update);
1088 
1089  /* TODO(sergey): Need to add hysteresis to avoid resolution divider bouncing around when actual
1090  * render time is somewhere on a boundary between two resolutions. */
1091 
1092  /* Never increase resolution to higher than the pixel size (which is possible if the scene is
1093  * simple and compute device is fast). */
1094  start_resolution_divider_ = max(resolution_divider_for_update, pixel_size_);
1095 
1096  VLOG_WORK << "Calculated resolution divider is " << start_resolution_divider_;
1097 }
1098 
1100 {
1102  /* Use lower value than the non-denoised case to allow having more pixels to reconstruct the
1103  * image from. With the faster updates and extra compute required the resolution becomes too
1104  * low to give usable feedback. */
1105  /* NOTE: Based on performance of OpenImageDenoiser on CPU. For OptiX denoiser or other denoiser
1106  * on GPU the value might need to become lower for faster navigation. */
1107  return 1.0 / 12.0;
1108  }
1109 
1110  /* For the best match with the Blender's viewport the refresh ratio should be 60fps. This will
1111  * avoid "jelly" effects. However, on a non-trivial scenes this can only be achieved with high
1112  * values of the resolution divider which does not give very pleasant updates during navigation.
1113  * Choose less frequent updates to allow more noise-free and higher resolution updates. */
1114 
1115  /* TODO(sergey): Can look into heuristic which will allow to have 60fps if the resolution divider
1116  * is not too high. Alternatively, synchronize Blender's overlays updates to Cycles updates. */
1117 
1118  return 1.0 / 30.0;
1119 }
1120 
1122 {
1123  if (!denoiser_params_.use) {
1124  return false;
1125  }
1126 
1127  if (denoiser_params_.start_sample > 1) {
1128  return false;
1129  }
1130 
1131  return true;
1132 }
1133 
1135 {
1136  return render_work.resolution_divider == pixel_size_ &&
1137  render_work.path_trace.start_sample == start_sample_;
1138 }
1139 
1141 {
1142  /* When rendering at a non-final resolution divider time average is not very useful because it
1143  * will either bias average down (due to lower render times on the smaller images) or will give
1144  * incorrect result when trying to estimate time which would have spent on the final resolution.
1145  *
1146  * So we only accumulate average for the latest resolution divider which was rendered. */
1147  return render_work.resolution_divider != pixel_size_;
1148 }
1149 
1151 {
1152  if (time_limit_ == 0.0) {
1153  /* No limit is enforced. */
1154  return;
1155  }
1156 
1157  if (state_.start_render_time == 0.0) {
1158  /* Rendering did not start yet. */
1159  return;
1160  }
1161 
1162  const double current_time = time_dt();
1163 
1164  if (current_time - state_.start_render_time < time_limit_) {
1165  /* Time limit is not reached yet. */
1166  return;
1167  }
1168 
1169  state_.time_limit_reached = true;
1170  state_.end_render_time = current_time;
1171 }
1172 
1173 /* --------------------------------------------------------------------
1174  * Utility functions.
1175  */
1176 
1177 int RenderScheduler::calculate_resolution_divider_for_time(double desired_time, double actual_time)
1178 {
1179  /* TODO(sergey): There should a non-iterative analytical formula here. */
1180 
1181  int resolution_divider = 1;
1182 
1183  /* This algorithm iterates through resolution dividers until a divider is found that achieves
1184  * the desired render time. A limit of default_start_resolution_divider_ is put in place as the
1185  * maximum resolution divider to avoid an unreadable viewport due to a low resolution.
1186  * pre_resolution_division_samples and post_resolution_division_samples are used in this
1187  * calculation to better predict the performance impact of changing resolution divisions as
1188  * the sample count can also change between resolution divisions. */
1189  while (actual_time > desired_time && resolution_divider < default_start_resolution_divider_) {
1190  int pre_resolution_division_samples = get_num_samples_during_navigation(resolution_divider);
1192  int post_resolution_division_samples = get_num_samples_during_navigation(resolution_divider);
1193  actual_time /= 4.0 * pre_resolution_division_samples / post_resolution_division_samples;
1194  }
1195 
1196  return resolution_divider;
1197 }
1198 
1200 {
1201  if (resolution == INT_MAX) {
1202  return 1;
1203  }
1204 
1205  int resolution_divider = 1;
1206  while (width * height > resolution * resolution) {
1207  width = max(1, width / 2);
1208  height = max(1, height / 2);
1209 
1210  resolution_divider <<= 1;
1211  }
1212 
1213  return resolution_divider;
1214 }
1215 
1216 int calculate_resolution_for_divider(int width, int height, int resolution_divider)
1217 {
1218  const int pixel_area = width * height;
1219  const int resolution = lround(sqrt(pixel_area));
1220 
1221  return resolution / resolution_divider;
1222 }
1223 
sqrt(x)+1/max(0
unsigned int uint
Definition: BLI_sys_types.h:67
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei height
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei width
bool need_filter(int sample) const
int align_samples(int start_sample, int num_samples) const
int height
Definition: buffers.h:72
NODE_DECLARE int width
Definition: buffers.h:71
DenoiserType type
Definition: denoise.h:53
int start_sample
Definition: denoise.h:56
NODE_DECLARE bool use
Definition: denoise.h:50
bool use_pass_normal
Definition: denoise.h:60
bool use_pass_albedo
Definition: denoise.h:59
void add_average(double time, int num_measurements=1)
int calculate_resolution_divider_for_time(double desired_time, double actual_time)
AdaptiveSampling adaptive_sampling_
DenoiseParams denoiser_params_
void update_state_for_render_work(const RenderWork &render_work)
void report_display_update_time(const RenderWork &render_work, double time)
float work_adaptive_threshold() const
int calculate_num_samples_per_update() const
int get_rendered_sample() const
void set_time_limit(double time_limit)
bool work_need_update_display(const bool denoiser_delayed)
BufferParams buffer_params_
void set_sample_offset(int sample_offset)
struct RenderScheduler::@1247 first_render_time_
void report_adaptive_filter_time(const RenderWork &render_work, double time, bool is_cancelled)
void set_need_schedule_rebalance(bool need_schedule_rebalance)
double guess_display_update_interval_in_seconds() const
bool work_need_denoise(bool &delayed, bool &ready_to_display)
bool work_need_adaptive_filter() const
struct RenderScheduler::@1246 state_
int get_num_rendered_samples() const
string full_report() const
bool need_schedule_rebalance_works_
void reset(const BufferParams &buffer_params, int num_samples, int sample_offset)
bool is_denoise_active_during_update() const
int get_num_samples_during_navigation(int resolution_divier) const
bool set_postprocess_render_work(RenderWork *render_work)
void report_rebalance_time(const RenderWork &render_work, double time, bool balance_changed)
double get_time_limit() const
double guess_display_update_interval_in_seconds_for_num_samples_no_limit(int num_rendered_samples) const
bool work_is_usable_for_first_render_estimation(const RenderWork &render_work)
void report_denoise_time(const RenderWork &render_work, double time)
bool is_adaptive_sampling_used() const
void set_denoiser_params(const DenoiseParams &params)
bool use_progressive_noise_floor_
double guess_display_update_interval_in_seconds_for_num_samples(int num_rendered_samples) const
bool render_work_reschedule_on_idle(RenderWork &render_work)
int get_num_samples() const
TimeWithAverage adaptive_filter_time_
TimeWithAverage rebalance_time_
int get_sample_offset() const
int default_start_resolution_divider_
void report_path_trace_time(const RenderWork &render_work, double time, bool is_cancelled)
TimeWithAverage display_update_time_
void set_full_frame_render_work(RenderWork *render_work)
double guess_viewport_navigation_update_interval_in_seconds() const
void update_start_resolution_divider()
void set_num_samples(int num_samples)
void report_path_trace_occupancy(const RenderWork &render_work, float occupancy)
void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling)
bool render_work_reschedule_on_converge(RenderWork &render_work)
TileManager & tile_manager_
bool work_report_reset_average(const RenderWork &render_work)
TimeWithAverage path_trace_time_
void set_start_sample(int start_sample)
int get_start_sample_to_path_trace() const
int get_num_samples_to_path_trace() const
void render_work_reschedule_on_cancel(RenderWork &render_work)
void report_work_begin(const RenderWork &render_work)
TimeWithAverage denoise_time_
void set_need_schedule_cryptomatte(bool need_schedule_cryptomatte)
int get_start_sample() const
RenderWork get_render_work()
RenderScheduler(TileManager &tile_manager, const SessionParams &params)
bool is_background() const
int resolution_divider
struct RenderWork::@1244 full
bool use_denoised_result
struct RenderWork::@1245 display
bool init_render_buffers
struct RenderWork::@1242 cryptomatte
struct RenderWork::@1241 adaptive_sampling
struct RenderWork::@1240 path_trace
struct RenderWork::@1243 tile
bool has_multiple_tiles() const
Definition: session/tile.h:68
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
double time
CCL_NAMESPACE_BEGIN const char * denoiserTypeToHumanReadable(DenoiserType type)
Definition: denoise.cpp:8
static const char * to_string(const Interpolation &interp)
Definition: gl_shader.cc:63
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
ccl_gpu_kernel_postfix ccl_global float int int int int ccl_global const float int int int int int int int int int int int int num_samples
#define VLOG_WORK
Definition: log.h:80
#define DCHECK_GT(a, b)
Definition: log.h:65
int calculate_resolution_divider_for_resolution(int width, int height, int resolution)
static double approximate_final_time(const RenderWork &render_work, double time)
static uint round_num_samples_to_power_of_2(const uint num_samples)
int calculate_resolution_for_divider(int width, int height, int resolution_divider)
#define min(a, b)
Definition: sort.c:35
string string_from_bool(bool var)
Definition: string.cpp:167
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition: string.cpp:22
CCL_NAMESPACE_BEGIN double time_dt()
Definition: time.cpp:35
float max
ccl_device_inline uint next_power_of_two(uint x)
Definition: util/math.h:933
ccl_device_inline bool is_power_of_two(size_t x)
Definition: util/types.h:66