Blender  V3.3
integrator/tile.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 #include "integrator/tile.h"
5 
6 #include "util/log.h"
7 #include "util/math.h"
8 
10 
11 std::ostream &operator<<(std::ostream &os, const TileSize &tile_size)
12 {
13  os << "size: (" << tile_size.width << ", " << tile_size.height << ")";
14  os << ", num_samples: " << tile_size.num_samples;
15  return os;
16 }
17 
19 {
20  if (is_power_of_two(x)) {
21  return x;
22  }
23 
24  return prev_power_of_two(x);
25 }
26 
28 {
29  if (is_power_of_two(x)) {
30  return x;
31  }
32 
33  return next_power_of_two(x);
34 }
35 
36 TileSize tile_calculate_best_size(const bool accel_rt,
37  const int2 &image_size,
38  const int num_samples,
39  const int max_num_path_states,
40  const float scrambling_distance)
41 {
42  if (max_num_path_states == 1) {
43  /* Simple case: avoid any calculation, which could cause rounding issues. */
44  return TileSize(1, 1, 1);
45  }
46 
47  const int64_t num_pixels = image_size.x * image_size.y;
48  const int64_t num_pixel_samples = num_pixels * num_samples;
49 
50  if (max_num_path_states >= num_pixel_samples) {
51  /* Image fully fits into the state (could be border render, for example). */
52  return TileSize(image_size.x, image_size.y, num_samples);
53  }
54 
55  /* The idea here is to keep number of samples per tile as much as possible to improve coherency
56  * across threads.
57  *
58  * Some general ideas:
59  * - Prefer smaller tiles with more samples, which improves spatial coherency of paths.
60  * - Keep values a power of two, for more integer fit into the maximum number of paths. */
61 
62  TileSize tile_size;
63  const int num_path_states_per_sample = max_num_path_states / num_samples;
64  if (scrambling_distance < 0.9f && accel_rt) {
65  /* Prefer large tiles for scrambling distance, bounded by max num path states. */
66  tile_size.width = min(image_size.x, max_num_path_states);
67  tile_size.height = min(image_size.y, max(max_num_path_states / tile_size.width, 1));
68  }
69  else {
70  /* Calculate tile size as if it is the most possible one to fit an entire range of samples.
71  * The idea here is to keep tiles as small as possible, and keep device occupied by scheduling
72  * multiple tiles with the same coordinates rendering different samples. */
73 
74  if (num_path_states_per_sample != 0) {
75  tile_size.width = round_down_to_power_of_two(lround(sqrt(num_path_states_per_sample)));
76  tile_size.height = tile_size.width;
77  }
78  else {
79  tile_size.width = tile_size.height = 1;
80  }
81  }
82 
83  if (num_samples == 1) {
84  tile_size.num_samples = 1;
85  }
86  else {
87  /* Heuristic here is to have more uniform division of the sample range: for example prefer
88  * [32 <38 times>, 8] over [1024, 200]. This allows to greedily add more tiles early on. */
89  tile_size.num_samples = min(round_up_to_power_of_two(lround(sqrt(num_samples / 2))),
90  static_cast<uint>(num_samples));
91 
92  const int tile_area = tile_size.width * tile_size.height;
93  tile_size.num_samples = min(tile_size.num_samples, max_num_path_states / tile_area);
94  }
95 
96  DCHECK_GE(tile_size.width, 1);
97  DCHECK_GE(tile_size.height, 1);
98  DCHECK_GE(tile_size.num_samples, 1);
99  DCHECK_LE(tile_size.width * tile_size.height * tile_size.num_samples, max_num_path_states);
100 
101  return tile_size;
102 }
103 
sqrt(x)+1/max(0
unsigned int uint
Definition: BLI_sys_types.h:67
#define ccl_device_inline
Definition: cuda/compat.h:34
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
CCL_NAMESPACE_BEGIN std::ostream & operator<<(std::ostream &os, const TileSize &tile_size)
ccl_device_inline uint round_down_to_power_of_two(uint x)
TileSize tile_calculate_best_size(const bool accel_rt, const int2 &image_size, const int num_samples, const int max_num_path_states, const float scrambling_distance)
ccl_device_inline uint round_up_to_power_of_two(uint x)
ccl_gpu_kernel_postfix ccl_global float int num_pixels
ccl_gpu_kernel_postfix ccl_global float int int int int ccl_global const float int int int int int int int int int int int int num_samples
#define DCHECK_GE(a, b)
Definition: log.h:62
#define DCHECK_LE(a, b)
Definition: log.h:67
#define min(a, b)
Definition: sort.c:35
__int64 int64_t
Definition: stdint.h:89
int x
Definition: types_int2.h:15
int y
Definition: types_int2.h:15
float max
ccl_device_inline uint next_power_of_two(uint x)
Definition: util/math.h:933
ccl_device_inline uint prev_power_of_two(uint x)
Definition: util/math.h:939
ccl_device_inline bool is_power_of_two(size_t x)
Definition: util/types.h:66