Blender  V3.3
atomic.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2014-2022 Blender Foundation */
3 
4 #ifndef __UTIL_ATOMIC_H__
5 #define __UTIL_ATOMIC_H__
6 
7 #ifndef __KERNEL_GPU__
8 
9 /* Using atomic ops header from Blender. */
10 # include "atomic_ops.h"
11 
12 # define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x))
13 # define atomic_compare_and_swap_float(p, old_val, new_val) \
14  atomic_cas_float((p), (old_val), (new_val))
15 
16 # define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
17 # define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1)
18 
19 # define CCL_LOCAL_MEM_FENCE 0
20 # define ccl_barrier(flags) ((void)0)
21 
22 #else /* __KERNEL_GPU__ */
23 
24 # if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
25 
26 # define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x))
27 
28 # define atomic_fetch_and_add_uint32(p, x) atomicAdd((unsigned int *)(p), (unsigned int)(x))
29 # define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int *)(p), (unsigned int)(x))
30 # define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
31 # define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1)
32 # define atomic_fetch_and_or_uint32(p, x) atomicOr((unsigned int *)(p), (unsigned int)(x))
33 
35  const float old_val,
36  const float new_val)
37 {
38  union {
39  unsigned int int_value;
40  float float_value;
41  } new_value, prev_value, result;
42  prev_value.float_value = old_val;
43  new_value.float_value = new_val;
44  result.int_value = atomicCAS((unsigned int *)dest, prev_value.int_value, new_value.int_value);
45  return result.float_value;
46 }
47 
48 # define CCL_LOCAL_MEM_FENCE
49 # define ccl_barrier(flags) __syncthreads()
50 
51 # endif /* __KERNEL_CUDA__ */
52 
53 # ifdef __KERNEL_METAL__
54 
55 // global address space versions
56 ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *_source,
57  const float operand)
58 {
59  volatile ccl_global atomic_int *source = (ccl_global atomic_int *)_source;
60  union {
61  int int_value;
62  float float_value;
63  } new_value, prev_value;
64  prev_value.int_value = atomic_load_explicit(source, memory_order_relaxed);
65  do {
66  new_value.float_value = prev_value.float_value + operand;
67  } while (!atomic_compare_exchange_weak_explicit(source,
68  &prev_value.int_value,
69  new_value.int_value,
70  memory_order_relaxed,
71  memory_order_relaxed));
72 
73  return new_value.float_value;
74 }
75 
76 # define atomic_fetch_and_add_uint32(p, x) \
77  atomic_fetch_add_explicit((device atomic_uint *)p, x, memory_order_relaxed)
78 # define atomic_fetch_and_sub_uint32(p, x) \
79  atomic_fetch_sub_explicit((device atomic_uint *)p, x, memory_order_relaxed)
80 # define atomic_fetch_and_inc_uint32(p) \
81  atomic_fetch_add_explicit((device atomic_uint *)p, 1, memory_order_relaxed)
82 # define atomic_fetch_and_dec_uint32(p) \
83  atomic_fetch_sub_explicit((device atomic_uint *)p, 1, memory_order_relaxed)
84 # define atomic_fetch_and_or_uint32(p, x) \
85  atomic_fetch_or_explicit((device atomic_uint *)p, x, memory_order_relaxed)
86 
88  const float old_val,
89  const float new_val)
90 {
91  int prev_value;
92  prev_value = __float_as_int(old_val);
93  atomic_compare_exchange_weak_explicit((ccl_global atomic_int *)dest,
94  &prev_value,
95  __float_as_int(new_val),
96  memory_order_relaxed,
97  memory_order_relaxed);
98  return __int_as_float(prev_value);
99 }
100 
101 # define atomic_store(p, x) atomic_store_explicit(p, x, memory_order_relaxed)
102 # define atomic_fetch(p) atomic_load_explicit(p, memory_order_relaxed)
103 
104 # define CCL_LOCAL_MEM_FENCE mem_flags::mem_threadgroup
105 # define ccl_barrier(flags) threadgroup_barrier(flags)
106 
107 # endif /* __KERNEL_METAL__ */
108 
109 # ifdef __KERNEL_ONEAPI__
110 
112 {
113  sycl::atomic_ref<float,
114  sycl::memory_order::relaxed,
115  sycl::memory_scope::device,
116  sycl::access::address_space::ext_intel_global_device_space>
117  atomic(*p);
118  return atomic.fetch_add(x);
119 }
120 
122  float old_val,
123  float new_val)
124 {
125  sycl::atomic_ref<float,
126  sycl::memory_order::relaxed,
127  sycl::memory_scope::device,
128  sycl::access::address_space::ext_intel_global_device_space>
129  atomic(*source);
130  atomic.compare_exchange_weak(old_val, new_val);
131  return old_val;
132 }
133 
134 ccl_device_inline unsigned int atomic_fetch_and_add_uint32(ccl_global unsigned int *p,
135  unsigned int x)
136 {
137  sycl::atomic_ref<unsigned int,
138  sycl::memory_order::relaxed,
139  sycl::memory_scope::device,
140  sycl::access::address_space::ext_intel_global_device_space>
141  atomic(*p);
142  return atomic.fetch_add(x);
143 }
144 
146 {
147  sycl::atomic_ref<int,
148  sycl::memory_order::relaxed,
149  sycl::memory_scope::device,
150  sycl::access::address_space::ext_intel_global_device_space>
151  atomic(*p);
152  return atomic.fetch_add(x);
153 }
154 
155 ccl_device_inline unsigned int atomic_fetch_and_sub_uint32(ccl_global unsigned int *p,
156  unsigned int x)
157 {
158  sycl::atomic_ref<unsigned int,
159  sycl::memory_order::relaxed,
160  sycl::memory_scope::device,
161  sycl::access::address_space::ext_intel_global_device_space>
162  atomic(*p);
163  return atomic.fetch_sub(x);
164 }
165 
166 ccl_device_inline int atomic_fetch_and_sub_uint32(ccl_global int *p, int x)
167 {
168  sycl::atomic_ref<int,
169  sycl::memory_order::relaxed,
170  sycl::memory_scope::device,
171  sycl::access::address_space::ext_intel_global_device_space>
172  atomic(*p);
173  return atomic.fetch_sub(x);
174 }
175 
176 ccl_device_inline unsigned int atomic_fetch_and_inc_uint32(ccl_global unsigned int *p)
177 {
178  return atomic_fetch_and_add_uint32(p, 1);
179 }
180 
182 {
183  return atomic_fetch_and_add_uint32(p, 1);
184 }
185 
186 ccl_device_inline unsigned int atomic_fetch_and_dec_uint32(ccl_global unsigned int *p)
187 {
188  return atomic_fetch_and_sub_uint32(p, 1);
189 }
190 
192 {
193  return atomic_fetch_and_sub_uint32(p, 1);
194 }
195 
196 ccl_device_inline unsigned int atomic_fetch_and_or_uint32(ccl_global unsigned int *p,
197  unsigned int x)
198 {
199  sycl::atomic_ref<unsigned int,
200  sycl::memory_order::relaxed,
201  sycl::memory_scope::device,
202  sycl::access::address_space::ext_intel_global_device_space>
203  atomic(*p);
204  return atomic.fetch_or(x);
205 }
206 
208 {
209  sycl::atomic_ref<int,
210  sycl::memory_order::relaxed,
211  sycl::memory_scope::device,
212  sycl::access::address_space::ext_intel_global_device_space>
213  atomic(*p);
214  return atomic.fetch_or(x);
215 }
216 
217 # endif /* __KERNEL_ONEAPI__ */
218 
219 #endif /* __KERNEL_GPU__ */
220 
221 #endif /* __UTIL_ATOMIC_H__ */
typedef float(TangentPoint)[2]
#define atomic_fetch_and_dec_uint32(p)
Definition: atomic.h:17
#define atomic_compare_and_swap_float(p, old_val, new_val)
Definition: atomic.h:13
#define atomic_fetch_and_inc_uint32(p)
Definition: atomic.h:16
#define atomic_add_and_fetch_float(p, x)
Definition: atomic.h:12
Provides wrapper around system-specific atomic primitives, and some extensions (faked-atomic operatio...
ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x)
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
#define ccl_device_inline
Definition: cuda/compat.h:34
#define ccl_global
Definition: cuda/compat.h:43
SyclQueue void * dest
ccl_device_inline int __float_as_int(float f)
Definition: util/math.h:243
ccl_device_inline float __int_as_float(int i)
Definition: util/math.h:253