Blender  V3.3
device/metal/kernel.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2021-2022 Blender Foundation */
3 
4 #pragma once
5 
6 #ifdef WITH_METAL
7 
8 # include "device/kernel.h"
9 # include <Metal/Metal.h>
10 
12 
13 class MetalDevice;
14 
15 enum {
16  METALRT_FUNC_DEFAULT_TRI,
17  METALRT_FUNC_DEFAULT_BOX,
18  METALRT_FUNC_SHADOW_TRI,
19  METALRT_FUNC_SHADOW_BOX,
20  METALRT_FUNC_LOCAL_TRI,
21  METALRT_FUNC_LOCAL_BOX,
22  METALRT_FUNC_CURVE_RIBBON,
23  METALRT_FUNC_CURVE_RIBBON_SHADOW,
24  METALRT_FUNC_CURVE_ALL,
25  METALRT_FUNC_CURVE_ALL_SHADOW,
26  METALRT_FUNC_POINT,
27  METALRT_FUNC_POINT_SHADOW,
28  METALRT_FUNC_NUM
29 };
30 
31 enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM };
32 
33 /* Pipeline State Object types */
34 enum MetalPipelineType {
35  /* A kernel that can be used with all scenes, supporting all features.
36  * It is slow to compile, but only needs to be compiled once and is then
37  * cached for future render sessions. This allows a render to get underway
38  * on the GPU quickly.
39  */
40  PSO_GENERIC,
41 
42  /* A intersection kernel that is very quick to specialize and results in faster intersection
43  * kernel performance. It uses Metal function constants to replace several KernelData variables
44  * with fixed constants.
45  */
46  PSO_SPECIALIZED_INTERSECT,
47 
48  /* A shading kernel that is slow to specialize, but results in faster shading kernel performance
49  * rendered. It uses Metal function constants to replace several KernelData variables with fixed
50  * constants and short-circuit all unused SVM node case handlers.
51  */
52  PSO_SPECIALIZED_SHADE,
53 
54  PSO_NUM
55 };
56 
57 # define METALRT_FEATURE_MASK \
58  (KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD | \
59  KERNEL_FEATURE_OBJECT_MOTION)
60 
61 const char *kernel_type_as_string(MetalPipelineType pso_type);
62 
63 struct MetalKernelPipeline {
64 
65  void compile();
66 
67  id<MTLLibrary> mtlLibrary = nil;
68  MetalPipelineType pso_type;
69  string source_md5;
70  size_t usage_count = 0;
71 
72  KernelData kernel_data_;
73  bool use_metalrt;
74  uint32_t metalrt_features = 0;
75 
76  int threads_per_threadgroup;
77 
78  DeviceKernel device_kernel;
79  bool loaded = false;
80  id<MTLDevice> mtlDevice = nil;
81  id<MTLFunction> function = nil;
82  id<MTLComputePipelineState> pipeline = nil;
83  int num_threads_per_block = 0;
84 
85  bool should_use_binary_archive() const;
86 
87  string error_str;
88 
89  API_AVAILABLE(macos(11.0))
90  id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
91  id<MTLFunction> rt_intersection_function[METALRT_FUNC_NUM] = {nil};
92 };
93 
94 /* Cache of Metal kernels for each DeviceKernel. */
95 namespace MetalDeviceKernels {
96 
97 bool should_load_kernels(MetalDevice *device, MetalPipelineType pso_type);
98 bool load(MetalDevice *device, MetalPipelineType pso_type);
99 const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel);
100 
101 } /* namespace MetalDeviceKernels */
102 
104 
105 #endif /* WITH_METAL */
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
SyclQueue void void size_t num_bytes SyclQueue void const char void *memory_device_pointer KernelContext int kernel
DeviceKernel
unsigned int uint32_t
Definition: stdint.h:80