Halide
runtime_atomics.h
Go to the documentation of this file.
1 #ifndef HALIDE_RUNTIME_RUNTIME_ATOMICS_H
2 #define HALIDE_RUNTIME_RUNTIME_ATOMICS_H
3 
4 // This file provides an abstraction layer over the __sync/__atomic builtins
5 // in Clang; for various reasons, we use __sync for 32-bit targets, and
6 // __atomic for 64-bit. At some point it may be desirable/necessary to
7 // migrate 32-bit to __atomic as well, at which time this file can
8 // likely go away. See https://github.com/halide/Halide/issues/7431 for
9 // a discussion of the history and issues as to why we work this way.
10 
11 #include "HalideRuntime.h"
12 
13 namespace Halide {
14 namespace Runtime {
15 namespace Internal {
16 namespace Synchronization {
17 
18 namespace {
19 
20 // TODO: most of these wrappers should do the remove_volatile for secondary arguments;
21 // I've only put it in place for the locations necessary at this time.
22 template<class T>
23 struct remove_volatile { typedef T type; };
24 template<class T>
25 struct remove_volatile<volatile T> { typedef T type; };
26 
27 #ifdef BITS_32
28 ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
29  return __sync_and_and_fetch(addr, val);
30 }
31 
32 template<typename T>
33 ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
34  return __sync_fetch_and_add(addr, val);
35 }
36 
37 template<typename T, typename TV = typename remove_volatile<T>::type>
38 ALWAYS_INLINE T atomic_fetch_add_sequentially_consistent(T *addr, TV val) {
39  return __sync_fetch_and_add(addr, val);
40 }
41 
42 template<typename T, typename TV = typename remove_volatile<T>::type>
43 ALWAYS_INLINE T atomic_fetch_sub_sequentially_consistent(T *addr, TV val) {
44  return __sync_fetch_and_sub(addr, val);
45 }
46 
47 template<typename T, typename TV = typename remove_volatile<T>::type>
48 ALWAYS_INLINE T atomic_fetch_or_sequentially_consistent(T *addr, TV val) {
49  return __sync_fetch_and_or(addr, val);
50 }
51 
52 template<typename T>
53 ALWAYS_INLINE T atomic_add_fetch_sequentially_consistent(T *addr, T val) {
54  return __sync_add_and_fetch(addr, val);
55 }
56 
57 template<typename T>
58 ALWAYS_INLINE T atomic_sub_fetch_sequentially_consistent(T *addr, T val) {
59  return __sync_sub_and_fetch(addr, val);
60 }
61 
62 template<typename T, typename TV = typename remove_volatile<T>::type>
63 ALWAYS_INLINE bool cas_strong_sequentially_consistent_helper(T *addr, TV *expected, TV *desired) {
64  TV oldval = *expected;
65  TV gotval = __sync_val_compare_and_swap(addr, oldval, *desired);
66  *expected = gotval;
67  return oldval == gotval;
68 }
69 
70 ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
71  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
72 }
73 
74 template<typename T, typename TV = typename remove_volatile<T>::type>
75 ALWAYS_INLINE bool atomic_cas_strong_sequentially_consistent(T *addr, TV *expected, TV *desired) {
76  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
77 }
78 
79 ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
80  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
81 }
82 
83 template<typename T>
84 ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
85  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
86 }
87 
88 ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
89  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
90 }
91 
92 ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
93  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
94 }
95 
96 template<typename T>
97 ALWAYS_INLINE T atomic_fetch_and_release(T *addr, T val) {
98  return __sync_fetch_and_and(addr, val);
99 }
100 
101 template<typename T, typename TV = typename remove_volatile<T>::type>
102 ALWAYS_INLINE T atomic_fetch_and_sequentially_consistent(T *addr, TV val) {
103  return __sync_fetch_and_and(addr, val);
104 }
105 
106 template<typename T>
107 ALWAYS_INLINE void atomic_load_relaxed(T *addr, T *val) {
108  *val = *addr;
109 }
110 
111 template<typename T>
112 ALWAYS_INLINE void atomic_load_acquire(T *addr, T *val) {
113  __sync_synchronize();
114  *val = *addr;
115 }
116 
117 template<typename T>
118 ALWAYS_INLINE T atomic_exchange_acquire(T *addr, T val) {
119  // Despite the name, this is really just an exchange operation with acquire ordering.
120  return __sync_lock_test_and_set(addr, val);
121 }
122 
123 ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
124  return __sync_or_and_fetch(addr, val);
125 }
126 
127 ALWAYS_INLINE void atomic_store_relaxed(uintptr_t *addr, uintptr_t *val) {
128  *addr = *val;
129 }
130 
131 template<typename T>
132 ALWAYS_INLINE void atomic_store_release(T *addr, T *val) {
133  *addr = *val;
134  __sync_synchronize();
135 }
136 
137 template<typename T, typename TV = typename remove_volatile<T>::type>
138 ALWAYS_INLINE void atomic_store_sequentially_consistent(T *addr, TV *val) {
139  *addr = *val;
140  __sync_synchronize();
141 }
142 
143 ALWAYS_INLINE void atomic_thread_fence_acquire() {
144  __sync_synchronize();
145 }
146 
147 ALWAYS_INLINE void atomic_thread_fence_sequentially_consistent() {
148  __sync_synchronize();
149 }
150 
151 #else
152 
153 ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
154  return __atomic_and_fetch(addr, val, __ATOMIC_RELEASE);
155 }
156 
157 template<typename T>
158 ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
159  return __atomic_fetch_add(addr, val, __ATOMIC_ACQ_REL);
160 }
161 
162 template<typename T, typename TV = typename remove_volatile<T>::type>
163 ALWAYS_INLINE T atomic_fetch_add_sequentially_consistent(T *addr, TV val) {
164  return __atomic_fetch_add(addr, val, __ATOMIC_SEQ_CST);
165 }
166 
167 template<typename T, typename TV = typename remove_volatile<T>::type>
168 ALWAYS_INLINE T atomic_fetch_sub_sequentially_consistent(T *addr, TV val) {
169  return __atomic_fetch_sub(addr, val, __ATOMIC_SEQ_CST);
170 }
171 
172 template<typename T, typename TV = typename remove_volatile<T>::type>
173 ALWAYS_INLINE T atomic_fetch_or_sequentially_consistent(T *addr, TV val) {
174  return __atomic_fetch_or(addr, val, __ATOMIC_SEQ_CST);
175 }
176 
177 template<typename T>
178 ALWAYS_INLINE T atomic_add_fetch_sequentially_consistent(T *addr, T val) {
179  return __atomic_add_fetch(addr, val, __ATOMIC_SEQ_CST);
180 }
181 
182 template<typename T>
183 ALWAYS_INLINE T atomic_sub_fetch_sequentially_consistent(T *addr, T val) {
184  return __atomic_sub_fetch(addr, val, __ATOMIC_SEQ_CST);
185 }
186 
187 ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
188  return __atomic_compare_exchange(addr, expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
189 }
190 
191 template<typename T, typename TV = typename remove_volatile<T>::type>
192 ALWAYS_INLINE bool atomic_cas_strong_sequentially_consistent(T *addr, TV *expected, TV *desired) {
193  return __atomic_compare_exchange(addr, expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
194 }
195 
196 template<typename T>
197 ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
198  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
199 }
200 
201 ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
202  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
203 }
204 
205 ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
206  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
207 }
208 
209 ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
210  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
211 }
212 
213 template<typename T>
214 ALWAYS_INLINE uintptr_t atomic_fetch_and_release(T *addr, T val) {
215  return __atomic_fetch_and(addr, val, __ATOMIC_RELEASE);
216 }
217 
218 template<typename T, typename TV = typename remove_volatile<T>::type>
219 ALWAYS_INLINE uintptr_t atomic_fetch_and_sequentially_consistent(T *addr, TV val) {
220  return __atomic_fetch_and(addr, val, __ATOMIC_SEQ_CST);
221 }
222 
223 template<typename T>
224 ALWAYS_INLINE void atomic_load_relaxed(T *addr, T *val) {
225  __atomic_load(addr, val, __ATOMIC_RELAXED);
226 }
227 
228 template<typename T>
229 ALWAYS_INLINE void atomic_load_acquire(T *addr, T *val) {
230  __atomic_load(addr, val, __ATOMIC_ACQUIRE);
231  __sync_synchronize();
232  *val = *addr;
233 }
234 
235 template<typename T>
236 ALWAYS_INLINE T atomic_exchange_acquire(T *addr, T val) {
237  T result;
238  __atomic_exchange(addr, &val, &result, __ATOMIC_ACQUIRE);
239  return result;
240 }
241 
242 ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
243  return __atomic_or_fetch(addr, val, __ATOMIC_RELAXED);
244 }
245 
246 ALWAYS_INLINE void atomic_store_relaxed(uintptr_t *addr, uintptr_t *val) {
247  __atomic_store(addr, val, __ATOMIC_RELAXED);
248 }
249 
250 template<typename T>
251 ALWAYS_INLINE void atomic_store_release(T *addr, T *val) {
252  __atomic_store(addr, val, __ATOMIC_RELEASE);
253 }
254 
255 template<typename T, typename TV = typename remove_volatile<T>::type>
256 ALWAYS_INLINE void atomic_store_sequentially_consistent(T *addr, TV *val) {
257  __atomic_store(addr, val, __ATOMIC_SEQ_CST);
258 }
259 
260 ALWAYS_INLINE void atomic_thread_fence_acquire() {
261  __atomic_thread_fence(__ATOMIC_ACQUIRE);
262 }
263 
264 ALWAYS_INLINE void atomic_thread_fence_sequentially_consistent() {
265  __atomic_thread_fence(__ATOMIC_SEQ_CST);
266 }
267 
268 #endif
269 
270 } // namespace
271 
272 } // namespace Synchronization
273 } // namespace Internal
274 } // namespace Runtime
275 } // namespace Halide
276 
277 #endif // HALIDE_RUNTIME_RUNTIME_ATOMICS_H
uintptr_t
__UINTPTR_TYPE__ uintptr_t
Definition: runtime_internal.h:73
Halide
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
Definition: AbstractGenerator.h:19
Halide::LinkageType::Internal
@ Internal
Not visible externally, similar to 'static' linkage in C.
ALWAYS_INLINE
#define ALWAYS_INLINE
Definition: runtime_internal.h:55
HalideRuntime.h