Blender  V3.3
optimization.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 #ifndef __UTIL_OPTIMIZATION_H__
5 #define __UTIL_OPTIMIZATION_H__
6 
7 #ifndef __KERNEL_GPU__
8 
9 /* x86
10  *
11  * Compile a regular, SSE2 and SSE3 kernel. */
12 
13 # if defined(i386) || defined(_M_IX86)
14 
15 /* We require minimum SSE2 support on x86, so auto enable. */
16 # define __KERNEL_SSE2__
17 # ifdef WITH_KERNEL_SSE2
18 # define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
19 # endif
20 # ifdef WITH_KERNEL_SSE3
21 # define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
22 # endif
23 
24 /* x86-64
25  *
26  * Compile a regular (includes SSE2), SSE3, SSE 4.1, AVX and AVX2 kernel. */
27 
28 # elif defined(__x86_64__) || defined(_M_X64)
29 
30 /* SSE2 is always available on x86-64 CPUs, so auto enable */
31 # define __KERNEL_SSE2__
32 /* no SSE2 kernel on x86-64, part of regular kernel */
33 # ifdef WITH_KERNEL_SSE3
34 # define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
35 # endif
36 # ifdef WITH_KERNEL_SSE41
37 # define WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
38 # endif
39 # ifdef WITH_KERNEL_AVX
40 # define WITH_CYCLES_OPTIMIZED_KERNEL_AVX
41 # endif
42 # ifdef WITH_KERNEL_AVX2
43 # define WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
44 # endif
45 
46 /* Arm Neon
47  *
48  * Compile a SSE4 kernel emulated with Neon. Most code is shared with
49  * SSE, some specializations for performance and compatibility are made
50  * made testing for __KERNEL_NEON__. */
51 
52 # elif defined(__ARM_NEON) && defined(WITH_SSE2NEON)
53 
54 # define __KERNEL_NEON__
55 # define __KERNEL_SSE__
56 # define __KERNEL_SSE2__
57 # define __KERNEL_SSE3__
58 # define __KERNEL_SSE41__
59 
60 # endif
61 
62 #endif
63 
64 #endif /* __UTIL_OPTIMIZATION_H__ */