Halide
synchronization_common.h
Go to the documentation of this file.
1 #include "HalideRuntime.h"
2 #include "printer.h"
3 #include "scoped_spin_lock.h"
4 
5 /* This provides an implementation of pthreads-like mutex and
6  * condition variables with fast default case performance. The code
7  * is based on the "parking lot" design and specifically Amanieu
8  * d'Antras' Rust implementation:
9  * https://github.com/Amanieu/parking_lot
10  * and the one in WTF:
11  * https://webkit.org/blog/6161/locking-in-webkit/
12  *
13  * Neither of the above implementations were used directly largely for
14  * dependency management. This implementation lacks a few features
15  * relative to those two. Specifically timeouts are not
16  * supported. Nor is optional fairness or deadlock detection.
17  * This implementation should provide a faily standalone "one file"
18  * fast synchronization layer on top of readily available system primitives.
19  *
20  * TODO: Implement pthread_once equivalent.
21  * TODO: Add read/write lock and move SharedExclusiveSpinLock from tracing.cpp
22  * to this mechanism.
23  * TODO: Add timeouts and optional fairness if needed.
24  * TODO: Relying on condition variables has issues for old versions of Windows
25  * and likely has portability issues to some very bare bones embedded OSes.
26  * Doing an implementation using only semaphores or event counters should
27  * be doable.
28  */
29 
30 // Copied from tsan_interface.h
31 #ifndef TSAN_ANNOTATIONS
32 #define TSAN_ANNOTATIONS 0
33 #endif
34 
35 #if TSAN_ANNOTATIONS
36 extern "C" {
37 const unsigned __tsan_mutex_linker_init = 1 << 0;
38 void __tsan_mutex_pre_lock(void *addr, unsigned flags);
39 void __tsan_mutex_post_lock(void *addr, unsigned flags, int recursion);
40 int __tsan_mutex_pre_unlock(void *addr, unsigned flags);
41 void __tsan_mutex_post_unlock(void *addr, unsigned flags);
42 void __tsan_mutex_pre_signal(void *addr, unsigned flags);
43 void __tsan_mutex_post_signal(void *addr, unsigned flags);
44 }
45 #endif
46 
47 namespace Halide {
48 namespace Runtime {
49 namespace Internal {
50 
51 namespace Synchronization {
52 
53 namespace {
54 
55 #if TSAN_ANNOTATIONS
56 ALWAYS_INLINE void if_tsan_pre_lock(void *mutex) {
57  __tsan_mutex_pre_lock(mutex, __tsan_mutex_linker_init);
58 };
59 // TODO(zalman|dvyukov): Is 1 the right value for a non-recursive lock? pretty sure value is ignored.
60 ALWAYS_INLINE void if_tsan_post_lock(void *mutex) {
61  __tsan_mutex_post_lock(mutex, __tsan_mutex_linker_init, 1);
62 }
63 // TODO(zalman|dvyukov): Is it safe to ignore return value here if locks are not recursive?
64 ALWAYS_INLINE void if_tsan_pre_unlock(void *mutex) {
65  (void)__tsan_mutex_pre_unlock(mutex, __tsan_mutex_linker_init);
66 }
67 ALWAYS_INLINE void if_tsan_post_unlock(void *mutex) {
68  __tsan_mutex_post_unlock(mutex, __tsan_mutex_linker_init);
69 }
70 ALWAYS_INLINE void if_tsan_pre_signal(void *cond) {
71  __tsan_mutex_pre_signal(cond, 0);
72 }
73 ALWAYS_INLINE void if_tsan_post_signal(void *cond) {
74  __tsan_mutex_post_signal(cond, 0);
75 }
76 #else
77 ALWAYS_INLINE void if_tsan_pre_lock(void *) {
78 }
79 ALWAYS_INLINE void if_tsan_post_lock(void *) {
80 }
81 ALWAYS_INLINE void if_tsan_pre_unlock(void *) {
82 }
83 ALWAYS_INLINE void if_tsan_post_unlock(void *) {
84 }
85 ALWAYS_INLINE void if_tsan_pre_signal(void *) {
86 }
87 ALWAYS_INLINE void if_tsan_post_signal(void *) {
88 }
89 #endif
90 
91 #ifdef BITS_32
92 ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
93  return __sync_and_and_fetch(addr, val);
94 }
95 
96 template<typename T>
97 ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
98  return __sync_fetch_and_add(addr, val);
99 }
100 
101 template<typename T>
102 ALWAYS_INLINE bool cas_strong_sequentially_consistent_helper(T *addr, T *expected, T *desired) {
103  T oldval = *expected;
104  T gotval = __sync_val_compare_and_swap(addr, oldval, *desired);
105  *expected = gotval;
106  return oldval == gotval;
107 }
108 
109 ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
110  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
111 }
112 
113 ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
114  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
115 }
116 
117 template<typename T>
118 ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
119  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
120 }
121 
122 ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
123  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
124 }
125 
126 ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
127  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
128 }
129 
130 ALWAYS_INLINE uintptr_t atomic_fetch_and_release(uintptr_t *addr, uintptr_t val) {
131  return __sync_fetch_and_and(addr, val);
132 }
133 
134 template<typename T>
135 ALWAYS_INLINE void atomic_load_relaxed(T *addr, T *val) {
136  *val = *addr;
137 }
138 
139 template<typename T>
140 ALWAYS_INLINE void atomic_load_acquire(T *addr, T *val) {
141  __sync_synchronize();
142  *val = *addr;
143 }
144 
145 ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
146  return __sync_or_and_fetch(addr, val);
147 }
148 
149 ALWAYS_INLINE void atomic_store_relaxed(uintptr_t *addr, uintptr_t *val) {
150  *addr = *val;
151 }
152 
153 template<typename T>
154 ALWAYS_INLINE void atomic_store_release(T *addr, T *val) {
155  *addr = *val;
156  __sync_synchronize();
157 }
158 
159 ALWAYS_INLINE void atomic_thread_fence_acquire() {
160  __sync_synchronize();
161 }
162 
163 #else
164 
165 ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
166  return __atomic_and_fetch(addr, val, __ATOMIC_RELEASE);
167 }
168 
169 template<typename T>
170 ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
171  return __atomic_fetch_add(addr, val, __ATOMIC_ACQ_REL);
172 }
173 
174 ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
175  return __atomic_compare_exchange(addr, expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
176 }
177 
178 template<typename T>
179 ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
180  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
181 }
182 
183 ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
184  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
185 }
186 
187 ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
188  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
189 }
190 
191 ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
192  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
193 }
194 
195 ALWAYS_INLINE uintptr_t atomic_fetch_and_release(uintptr_t *addr, uintptr_t val) {
196  return __atomic_fetch_and(addr, val, __ATOMIC_RELEASE);
197 }
198 
199 template<typename T>
200 ALWAYS_INLINE void atomic_load_relaxed(T *addr, T *val) {
201  __atomic_load(addr, val, __ATOMIC_RELAXED);
202 }
203 
204 template<typename T>
205 ALWAYS_INLINE void atomic_load_acquire(T *addr, T *val) {
206  __atomic_load(addr, val, __ATOMIC_ACQUIRE);
207 }
208 
209 ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
210  return __atomic_or_fetch(addr, val, __ATOMIC_RELAXED);
211 }
212 
213 ALWAYS_INLINE void atomic_store_relaxed(uintptr_t *addr, uintptr_t *val) {
214  __atomic_store(addr, val, __ATOMIC_RELAXED);
215 }
216 
217 template<typename T>
218 ALWAYS_INLINE void atomic_store_release(T *addr, T *val) {
219  __atomic_store(addr, val, __ATOMIC_RELEASE);
220 }
221 
222 ALWAYS_INLINE void atomic_thread_fence_acquire() {
223  __atomic_thread_fence(__ATOMIC_ACQUIRE);
224 }
225 
226 #endif
227 
228 } // namespace
229 
231  int spin_count;
232 
233 public:
234  // Everyone says this should be 40. Have not measured it.
236  : spin_count(40) {
237  }
238 
240  if (spin_count > 0) {
241  spin_count--;
242  }
243  return spin_count > 0;
244  }
245 
247  spin_count = 40;
248  }
249 };
250 
251 #if __cplusplus >= 201103L
252 // Low order two bits are used for locking state,
253 static constexpr uint8_t lock_bit = 0x01;
254 static constexpr uint8_t queue_lock_bit = 0x02;
255 static constexpr uint8_t parked_bit = 0x02;
256 #else
257 #define lock_bit 0x01
258 #define queue_lock_bit 0x02
259 #define parked_bit 0x02
260 #endif
261 
263  thread_parker parker; // TODO: member or pointer?
264 
265  // This design is from the Rust parking lot implementation by Amanieu d'Antras.
266  // Comment from original:
267  //
268  // Linked list of threads in the queue. The queue is split into two parts:
269  // the processed part and the unprocessed part. When new nodes are added to
270  // the list, they only have the next pointer set, and queue_tail is null.
271  //
272  // Nodes are processed with the queue lock held, which consists of setting
273  // the prev pointer for each node and setting the queue_tail pointer on the
274  // first processed node of the list.
275  //
276  // This setup allows nodes to be added to the queue without a lock, while
277  // still allowing O(1) removal of nodes from the processed part of the list.
278  // The only cost is the O(n) processing, but this only needs to be done
279  // once for each node, and therefore isn't too expensive.
280 
283 
285 
287  : next(NULL), prev(NULL), tail(NULL) {
288  }
289 
290  // Inlined, empty dtor needed to avoid confusing MachO builds
292  }
293 };
294 
295 class word_lock {
296  uintptr_t state;
297 
298  void lock_full();
299  void unlock_full();
300 
301 public:
303  : state(0) {
304  }
306  if_tsan_pre_lock(this);
307 
308  uintptr_t expected = 0;
309  uintptr_t desired = lock_bit;
310  // Try for a fast grab of the lock bit. If this does not work, call the full adaptive looping code.
311  if (!atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
312  lock_full();
313  }
314 
315  if_tsan_post_lock(this);
316  }
317 
319  if_tsan_pre_unlock(this);
320 
321  uintptr_t val = atomic_fetch_and_release(&state, ~(uintptr_t)lock_bit);
322  // If another thread is currently queueing, that thread will ensure
323  // it acquires the lock or wakes a waiting thread.
324  bool no_thread_queuing = (val & queue_lock_bit) == 0;
325  // Only need to do a wakeup if there are threads waiting.
326  bool some_queued = (val & ~(uintptr_t)(queue_lock_bit | lock_bit)) != 0;
327  if (no_thread_queuing && some_queued) {
328  unlock_full();
329  }
330 
331  if_tsan_post_unlock(this);
332  }
333 };
334 
335 WEAK void word_lock::lock_full() {
336  spin_control spinner;
337  uintptr_t expected;
338  atomic_load_relaxed(&state, &expected);
339 
340  while (true) {
341  if (!(expected & lock_bit)) {
342  uintptr_t desired = expected | lock_bit;
343 
344  if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
345  return;
346  }
347  continue;
348  }
349 
350  if (((expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) != 0) && spinner.should_spin()) {
352  atomic_load_relaxed(&state, &expected);
353  continue;
354  }
355 
356  word_lock_queue_data node;
357 
358  node.parker.prepare_park();
359  // TODO set up prelinkage parking state
360 
361  word_lock_queue_data *head = (word_lock_queue_data *)(expected & ~(uintptr_t)(queue_lock_bit | lock_bit));
362  if (head == NULL) {
363  node.tail = &node;
364  // constructor set node.prev = NULL;
365  } else {
366  // Mark the tail as NULL. The unlock routine will walk the list and wakeup
367  // the thread at the end.
368  // constructor set node.tail = NULL;
369  // constructor set node.prev = NULL;
370  node.next = head;
371  }
372 
373  uintptr_t desired = ((uintptr_t)&node) | (expected & (queue_lock_bit | lock_bit));
374  if (atomic_cas_weak_release_relaxed(&state, &expected, &desired)) {
375  node.parker.park();
376  spinner.reset();
377  atomic_load_relaxed(&state, &expected);
378  }
379  }
380 }
381 
382 WEAK void word_lock::unlock_full() {
383  uintptr_t expected;
384  atomic_load_relaxed(&state, &expected);
385 
386  while (true) {
387  // If another thread is currently queueing, that thread will ensure
388  // it acquires the lock or wakes a waiting thread.
389  bool thread_queuing = (expected & queue_lock_bit);
390  // Only need to do a wakeup if there are threads waiting.
391  bool none_queued = (expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) == 0;
392  if (thread_queuing || none_queued) {
393  return;
394  }
395 
396  uintptr_t desired = expected | queue_lock_bit;
397  if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
398  break;
399  }
400  }
401 
402  while (true) {
403  word_lock_queue_data *head = (word_lock_queue_data *)(expected & ~(uintptr_t)(queue_lock_bit | lock_bit));
404  word_lock_queue_data *current = head;
405  word_lock_queue_data *tail = current->tail;
406  int times_through = 0;
407  while (tail == NULL) {
408  word_lock_queue_data *next = current->next;
409  halide_assert(NULL, next != NULL);
410  next->prev = current;
411  current = next;
412  tail = current->tail;
413  times_through++;
414  }
415  head->tail = tail;
416 
417  // If the lock is now locked, unlock the queue and have the thread
418  // that currently holds the lock do the wakeup
419  if (expected & lock_bit) {
420  uintptr_t desired = expected & ~(uintptr_t)queue_lock_bit;
421  if (atomic_cas_weak_relacq_relaxed(&state, &expected, &desired)) {
422  return;
423  }
424  atomic_thread_fence_acquire();
425  continue;
426  }
427 
428  word_lock_queue_data *new_tail = tail->prev;
429  if (new_tail == NULL) {
430  bool continue_outer = false;
431  while (!continue_outer) {
432  uintptr_t desired = expected & lock_bit;
433  if (atomic_cas_weak_relacq_relaxed(&state, &expected, &desired)) {
434  break;
435  }
436  if ((expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) == 0) {
437  continue;
438  } else {
439  atomic_thread_fence_acquire();
440  continue_outer = true;
441  }
442  }
443 
444  if (continue_outer) {
445  continue;
446  }
447  } else {
448  head->tail = new_tail;
449  atomic_and_fetch_release(&state, ~(uintptr_t)queue_lock_bit);
450  }
451 
452  // TODO: The reason there are three calls here is other things can happen between them.
453  // Also it is not clear if unpark_start has to return the mutex/flag used by unpark
454  // and unpark_finish due to memory lifetime reasons.
455  tail->parker.unpark_start();
456  tail->parker.unpark();
457  tail->parker.unpark_finish();
458  break;
459  }
460 }
461 
462 struct queue_data {
463  thread_parker parker; // TODO: member or pointer?
464 
465  uintptr_t sleep_address;
466 
468 
469  uintptr_t unpark_info;
470 
472  : sleep_address(0), next(NULL), unpark_info(0) {
473  }
474  // Inlined, empty dtor needed to avoid confusing MachO builds
476  }
477 };
478 
479 // Must be a power of two.
480 #define LOAD_FACTOR 4
481 
482 struct hash_bucket {
484 
485  queue_data *head; // Is this queue_data or thread_data?
486  queue_data *tail; // Is this queue_data or thread_data?
487 };
488 
489 // The use of a #define here and table_storage is because if
490 // a class with a constructor is used, clang generates a COMDAT
491 // which cannot be lowered for Mac OS due to MachO. A better
492 // solution is desired of course.
493 #define HASH_TABLE_BITS 10
494 struct hash_table {
496 };
498 #define table (*(hash_table *)table_storage)
499 
500 ALWAYS_INLINE void check_hash(uintptr_t hash) {
501  halide_assert(NULL, hash < sizeof(table.buckets) / sizeof(table.buckets[0]));
502 }
503 
504 #if 0
505 WEAK void dump_hash() {
506  int i = 0;
507  for (auto &bucket : table.buckets) {
508  queue_data *head = bucket.head;
509  while (head != NULL) {
510  print(NULL) << "Bucket index " << i << " addr " << (void *)head->sleep_address << "\n";
511  head = head->next;
512  }
513  i++;
514  }
515 }
516 #endif
517 
518 static ALWAYS_INLINE uintptr_t addr_hash(uintptr_t addr, uint32_t bits) {
519  // Fibonacci hashing. The golden ratio is 1.9E3779B97F4A7C15F39...
520  // in hexadecimal.
521  if (sizeof(uintptr_t) >= 8) {
522  return (addr * (uintptr_t)0x9E3779B97F4A7C15) >> (64 - bits);
523  } else {
524  return (addr * (uintptr_t)0x9E3779B9) >> (32 - bits);
525  }
526 }
527 
528 WEAK hash_bucket &lock_bucket(uintptr_t addr) {
529  uintptr_t hash = addr_hash(addr, HASH_TABLE_BITS);
530 
531  check_hash(hash);
532 
533  // TODO: if resizing is implemented, loop, etc.
534  hash_bucket &bucket = table.buckets[hash];
535 
536  bucket.mutex.lock();
537 
538  return bucket;
539 }
540 
541 struct bucket_pair {
544 
546  : from(from), to(to) {
547  }
548 };
549 
550 WEAK bucket_pair lock_bucket_pair(uintptr_t addr_from, uintptr_t addr_to) {
551  // TODO: if resizing is implemented, loop, etc.
552  uintptr_t hash_from = addr_hash(addr_from, HASH_TABLE_BITS);
553  uintptr_t hash_to = addr_hash(addr_to, HASH_TABLE_BITS);
554 
555  check_hash(hash_from);
556  check_hash(hash_to);
557 
558  // Lock the bucket with the smaller hash first in order
559  // to prevent deadlock.
560  if (hash_from == hash_to) {
561  hash_bucket &first = table.buckets[hash_from];
562  first.mutex.lock();
563  return bucket_pair(first, first);
564  } else if (hash_from < hash_to) {
565  hash_bucket &first = table.buckets[hash_from];
566  hash_bucket &second = table.buckets[hash_to];
567  first.mutex.lock();
568  second.mutex.lock();
569  return bucket_pair(first, second);
570  } else {
571  hash_bucket &first = table.buckets[hash_to];
572  hash_bucket &second = table.buckets[hash_from];
573  first.mutex.lock();
574  second.mutex.lock();
575  return bucket_pair(second, first);
576  }
577 }
578 
580  // In the lock routine, the buckets are locked smaller hash index first.
581  // Here we reverse this ordering by comparing the pointers. This works
582  // since the pointers are obtained by indexing an array with the hash
583  // values.
584  if (&buckets.from == &buckets.to) {
585  buckets.from.mutex.unlock();
586  } else if (&buckets.from > &buckets.to) {
587  buckets.from.mutex.unlock();
588  buckets.to.mutex.unlock();
589  } else {
590  buckets.to.mutex.unlock();
591  buckets.from.mutex.unlock();
592  }
593 }
594 
598 
600  : unpark_one(false), invalid_unpark_info(0) {
601  }
602 };
603 
604 WEAK bool parking_control_validate(void *control, validate_action &action) {
605  return true;
606 };
607 WEAK void parking_control_before_sleep(void *control){};
608 WEAK uintptr_t parking_control_unpark(void *control, int unparked, bool more_waiters) {
609  return 0;
610 };
611 WEAK void parking_control_requeue_callback(void *control, const validate_action &action, bool one_to_wake, bool some_requeued){};
612 
614  bool (*validate)(void *control, validate_action &action);
615  void (*before_sleep)(void *control);
616  uintptr_t (*unpark)(void *control, int unparked, bool more_waiters);
617  void (*requeue_callback)(void *control, const validate_action &action, bool one_to_wake, bool some_requeued);
618 
622  }
623 };
624 
625 // TODO: Do we need a park_result thing here?
626 WEAK uintptr_t park(uintptr_t addr, parking_control &control) {
628 
629  hash_bucket &bucket = lock_bucket(addr);
630 
631  validate_action action;
632  if (!control.validate(&control, action)) {
633  bucket.mutex.unlock();
634  return action.invalid_unpark_info;
635  }
636 
637  queue_data.next = NULL;
638  queue_data.sleep_address = addr;
639  queue_data.parker.prepare_park();
640  if (bucket.head != NULL) {
641  bucket.tail->next = &queue_data;
642  } else {
643  bucket.head = &queue_data;
644  }
645  bucket.tail = &queue_data;
646  bucket.mutex.unlock();
647 
648  control.before_sleep(&control);
649 
650  queue_data.parker.park();
651 
652  return queue_data.unpark_info;
653 
654  // TODO: handling timeout.
655 }
656 
657 WEAK uintptr_t unpark_one(uintptr_t addr, parking_control &control) {
658  hash_bucket &bucket = lock_bucket(addr);
659 
660  queue_data **data_location = &bucket.head;
661  queue_data *prev = NULL;
662  queue_data *data = *data_location;
663  while (data != NULL) {
664  uintptr_t cur_addr;
665  atomic_load_relaxed(&data->sleep_address, &cur_addr);
666  if (cur_addr == addr) {
667  queue_data *next = data->next;
668  *data_location = next;
669 
670  bool more_waiters = false;
671 
672  if (bucket.tail == data) {
673  bucket.tail = prev;
674  } else {
675  queue_data *data2 = next;
676  while (data2 != NULL && !more_waiters) {
677  uintptr_t cur_addr2;
678  atomic_load_relaxed(&data2->sleep_address, &cur_addr2);
679  more_waiters = (cur_addr2 == addr);
680  data2 = data2->next;
681  }
682  }
683 
684  data->unpark_info = control.unpark(&control, 1, more_waiters);
685 
686  data->parker.unpark_start();
687  bucket.mutex.unlock();
688  data->parker.unpark();
689  data->parker.unpark_finish();
690 
691  // TODO: Figure out ret type.
692  return more_waiters ? 1 : 0;
693  } else {
694  data_location = &data->next;
695  prev = data;
696  data = data->next;
697  }
698  }
699 
700  control.unpark(&control, 0, false);
701 
702  bucket.mutex.unlock();
703 
704  // TODO: decide if this is the right return value.
705  return 0;
706 }
707 
708 WEAK uintptr_t unpark_all(uintptr_t addr, uintptr_t unpark_info) {
709  hash_bucket &bucket = lock_bucket(addr);
710 
711  queue_data **data_location = &bucket.head;
712  queue_data *prev = NULL;
713  queue_data *data = *data_location;
714  size_t waiters = 0;
715  queue_data *temp_list_storage[16];
716  queue_data **temp_list = &temp_list_storage[0];
717  size_t max_waiters = sizeof(temp_list_storage) / sizeof(temp_list_storage[0]);
718 
719  while (data != NULL) {
720  uintptr_t cur_addr;
721  atomic_load_relaxed(&data->sleep_address, &cur_addr);
722 
723  queue_data *next = data->next;
724  if (cur_addr == addr) {
725  *data_location = next;
726 
727  if (bucket.tail == data) {
728  bucket.tail = prev;
729  }
730 
731  if (waiters == max_waiters) {
732  queue_data **temp = temp_list;
733  temp_list = (queue_data **)malloc(sizeof(queue_data *) * max_waiters * 2);
734  for (size_t i = 0; i < max_waiters; i++) {
735  temp_list[i] = temp[i];
736  }
737  max_waiters *= 2;
738  if (temp != &temp_list_storage[0]) {
739  free(temp);
740  }
741  }
742 
743  data->unpark_info = unpark_info;
744 
745  temp_list[waiters++] = data;
746  data->parker.unpark_start();
747 
748  data = next;
749  } else {
750  *data_location = data->next;
751  prev = data;
752  data = next;
753  }
754  }
755 
756  bucket.mutex.unlock();
757 
758  for (size_t i = 0; i < waiters; i++) {
759  temp_list[i]->parker.unpark();
760  }
761 
762  // TODO: decide if this really needs to be two loops.
763  for (size_t i = 0; i < waiters; i++) {
764  temp_list[i]->parker.unpark_finish();
765  }
766 
767  if (temp_list != &temp_list_storage[0]) {
768  free(temp_list);
769  }
770 
771  return waiters;
772 }
773 
774 WEAK int unpark_requeue(uintptr_t addr_from, uintptr_t addr_to, parking_control &control, uintptr_t unpark_info) {
775  bucket_pair buckets = lock_bucket_pair(addr_from, addr_to);
776 
777  validate_action action;
778  if (!control.validate(&control, action)) {
779  unlock_bucket_pair(buckets);
780  return 0;
781  }
782 
783  queue_data **data_location = &buckets.from.head;
784  queue_data *prev = NULL;
785  queue_data *data = *data_location;
786  queue_data *requeue = NULL;
787  queue_data *requeue_tail = NULL;
788  queue_data *wakeup = NULL;
789 
790  while (data != NULL) {
791  uintptr_t cur_addr;
792  atomic_load_relaxed(&data->sleep_address, &cur_addr);
793 
794  queue_data *next = data->next;
795  if (cur_addr == addr_from) {
796  *data_location = next;
797 
798  if (buckets.from.tail == data) {
799  buckets.from.tail = prev;
800  }
801 
802  if (action.unpark_one && wakeup == NULL) {
803  wakeup = data;
804  } else {
805  if (requeue == NULL) {
806  requeue = data;
807  } else {
808  requeue_tail->next = data;
809  }
810 
811  requeue_tail = data;
812  atomic_store_relaxed(&data->sleep_address, &addr_to);
813  }
814  data = next;
815  // TODO: prev ptr?
816  } else {
817  data_location = &data->next;
818  prev = data;
819  data = next;
820  }
821  }
822 
823  if (requeue != NULL) {
824  requeue_tail->next = NULL;
825  if (buckets.to.head == NULL) {
826  buckets.to.head = requeue;
827  } else {
828  buckets.to.tail->next = requeue;
829  }
830  buckets.to.tail = requeue_tail;
831  }
832 
833  control.requeue_callback(&control, action, wakeup != NULL, requeue != NULL);
834 
835  if (wakeup != NULL) {
836  wakeup->unpark_info = unpark_info;
837  wakeup->parker.unpark_start();
838  unlock_bucket_pair(buckets);
839  wakeup->parker.unpark();
840  wakeup->parker.unpark_finish();
841  } else {
842  unlock_bucket_pair(buckets);
843  }
844 
845  return wakeup != NULL && action.unpark_one;
846 }
847 
848 WEAK bool mutex_parking_control_validate(void *control, validate_action &action);
849 WEAK uintptr_t mutex_parking_control_unpark(void *control, int unparked, bool more_waiters);
851  uintptr_t *lock_state;
852 
857  }
858 };
859 
860 // Only used in parking -- lock_full.
862  mutex_parking_control *mutex_control = (mutex_parking_control *)control;
863 
864  uintptr_t result;
865  atomic_load_relaxed(mutex_control->lock_state, &result);
866  return result == (lock_bit | parked_bit);
867 }
868 
869 // Only used in unparking -- unlock_full.
870 WEAK uintptr_t mutex_parking_control_unpark(void *control, int unparked, bool more_waiters) {
871  mutex_parking_control *mutex_control = (mutex_parking_control *)control;
872 
873  // TODO: consider handling fairness.
874  uintptr_t return_state = more_waiters ? parked_bit : 0;
875  atomic_store_release(mutex_control->lock_state, &return_state);
876 
877  return 0;
878 }
879 
880 class fast_mutex {
881  uintptr_t state;
882 
883  ALWAYS_INLINE void lock_full() {
884  // Everyone says this should be 40. Have not measured it.
885  spin_control spinner;
886  uintptr_t expected;
887  atomic_load_relaxed(&state, &expected);
888 
889  while (true) {
890  if (!(expected & lock_bit)) {
891  uintptr_t desired = expected | lock_bit;
892  if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
893  return;
894  }
895  continue;
896  }
897 
898  // If no one is parked, spin with spin count.
899  if ((expected & parked_bit) == 0 && spinner.should_spin()) {
901  atomic_load_relaxed(&state, &expected);
902  continue;
903  }
904 
905  // Mark mutex as having parked threads if not already done.
906  if ((expected & parked_bit) == 0) {
907  uintptr_t desired = expected | parked_bit;
908  if (!atomic_cas_weak_relaxed_relaxed(&state, &expected, &desired)) {
909  continue;
910  }
911  }
912 
913  // TODO: consider handling fairness, timeout
914  mutex_parking_control control(&state);
915  uintptr_t result = park((uintptr_t)this, control);
916  if (result == (uintptr_t)this) {
917  return;
918  }
919 
920  spinner.reset();
921  atomic_load_relaxed(&state, &expected);
922  }
923  }
924 
925  ALWAYS_INLINE void unlock_full() {
926  uintptr_t expected = lock_bit;
927  uintptr_t desired = 0;
928  // Try for a fast release of the lock. Redundant with code in lock, but done
929  // to make unlock_full a standalone unlock that can be called directly.
930  if (atomic_cas_strong_release_relaxed(&state, &expected, &desired)) {
931  return;
932  }
933 
934  mutex_parking_control control(&state);
935  unpark_one((uintptr_t)this, control);
936  }
937 
938 public:
940  uintptr_t expected = 0;
941  uintptr_t desired = lock_bit;
942  // Try for a fast grab of the lock bit. If this does not work, call the full adaptive looping code.
943  if (!atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
944  lock_full();
945  }
946  }
947 
949  uintptr_t expected = lock_bit;
950  uintptr_t desired = 0;
951  // Try for a fast grab of the lock bit. If this does not work, call the full adaptive looping code.
952  if (!atomic_cas_weak_release_relaxed(&state, &expected, &desired)) {
953  unlock_full();
954  }
955  }
956 
958  uintptr_t val;
959  atomic_load_relaxed(&state, &val);
960  while (true) {
961  if (!(val & lock_bit)) {
962  return false;
963  }
964 
965  uintptr_t desired = val | parked_bit;
966  if (atomic_cas_weak_relaxed_relaxed(&state, &val, &desired)) {
967  return true;
968  }
969  }
970  }
971 
973  atomic_or_fetch_relaxed(&state, parked_bit);
974  }
975 };
976 WEAK uintptr_t signal_parking_control_unpark(void *control, int unparked, bool more_waiters);
978  uintptr_t *cond_state;
980 
984  }
985 };
986 
987 WEAK uintptr_t signal_parking_control_unpark(void *control, int unparked, bool more_waiters) {
988  signal_parking_control *signal_control = (signal_parking_control *)control;
989 
990  if (!more_waiters) {
991  uintptr_t val = 0;
992  atomic_store_relaxed(signal_control->cond_state, &val);
993  }
994 
995 #if 0 // TODO: figure out why this was here.
996  return (uintptr_t)signal_control->mutex;
997 #else
998  return 0;
999 #endif
1000 }
1001 WEAK bool broadcast_parking_control_validate(void *control, validate_action &action);
1002 WEAK void broadcast_parking_control_requeue_callback(void *control, const validate_action &action,
1003  bool one_to_wake, bool some_requeued);
1005  uintptr_t *cond_state;
1007 
1012  }
1013 };
1014 
1016  broadcast_parking_control *broadcast_control = (broadcast_parking_control *)control;
1017 
1018  uintptr_t val;
1019  atomic_load_relaxed(broadcast_control->cond_state, &val);
1020  // By the time this broadcast locked everything and was processed, the cond
1021  // has progressed to a new mutex, do nothing since any waiting threads have
1022  // to be waiting on what is effectively a different condition.
1023  if (val != (uintptr_t)broadcast_control->mutex) {
1024  return false;
1025  }
1026  // Clear the cond's connection to the mutex as all waiting threads are going to reque onto the mutex.
1027  val = 0;
1028  atomic_store_relaxed(broadcast_control->cond_state, &val);
1029 
1030  action.unpark_one = !broadcast_control->mutex->make_parked_if_locked();
1031 
1032  return true;
1033 }
1034 
1035 WEAK void broadcast_parking_control_requeue_callback(void *control, const validate_action &action, bool one_to_wake, bool some_requeued) {
1036  broadcast_parking_control *broadcast_control = (broadcast_parking_control *)control;
1037 
1038  if (action.unpark_one && some_requeued) {
1039  broadcast_control->mutex->make_parked();
1040  }
1041 }
1042 WEAK bool wait_parking_control_validate(void *control, validate_action &action);
1043 WEAK void wait_parking_control_before_sleep(void *control);
1044 WEAK uintptr_t wait_parking_control_unpark(void *control, int unparked, bool more_waiters);
1046  uintptr_t *cond_state;
1048 
1054  }
1055 };
1056 
1058  wait_parking_control *wait_control = (wait_parking_control *)control;
1059 
1060  uintptr_t val;
1061  atomic_load_relaxed(wait_control->cond_state, &val);
1062 
1063  if (val == 0) {
1064  val = (uintptr_t)wait_control->mutex;
1065  atomic_store_relaxed(wait_control->cond_state, &val);
1066  } else if (val != (uintptr_t)wait_control->mutex) {
1067  // TODO: signal error.
1068  action.invalid_unpark_info = (uintptr_t)wait_control->mutex;
1069  return false;
1070  }
1071 
1072  return true;
1073 }
1074 
1076  wait_parking_control *wait_control = (wait_parking_control *)control;
1077 
1078  wait_control->mutex->unlock();
1079 }
1080 
1081 WEAK uintptr_t wait_parking_control_unpark(void *control, int unparked, bool more_waiters) {
1082  wait_parking_control *wait_control = (wait_parking_control *)control;
1083 
1084  if (!more_waiters) {
1085  uintptr_t val = 0;
1086  atomic_store_relaxed(wait_control->cond_state, &val);
1087  }
1088  return 0;
1089 }
1090 
1091 class fast_cond {
1092  uintptr_t state;
1093 
1094 public:
1096  : state(0) {
1097  }
1098 
1100  if_tsan_pre_signal(this);
1101 
1102  uintptr_t val;
1103  atomic_load_relaxed(&state, &val);
1104  if (val == 0) {
1105  if_tsan_post_signal(this);
1106  return;
1107  }
1108  signal_parking_control control(&state, (fast_mutex *)val);
1109  unpark_one((uintptr_t)this, control);
1110  if_tsan_post_signal(this);
1111  }
1112 
1114  if_tsan_pre_signal(this);
1115  uintptr_t val;
1116  atomic_load_relaxed(&state, &val);
1117  if (val == 0) {
1118  if_tsan_post_signal(this);
1119  return;
1120  }
1121  broadcast_parking_control control(&state, (fast_mutex *)val);
1122  unpark_requeue((uintptr_t)this, val, control, 0);
1123  if_tsan_post_signal(this);
1124  }
1125 
1127  wait_parking_control control(&state, mutex);
1128  uintptr_t result = park((uintptr_t)this, control);
1129  if (result != (uintptr_t)mutex) {
1130  mutex->lock();
1131  } else {
1132  if_tsan_pre_lock(mutex);
1133 
1134  // TODO: this is debug only.
1135  uintptr_t val;
1136  atomic_load_relaxed((uintptr_t *)mutex, &val);
1137  halide_assert(NULL, val & 0x1);
1138 
1139  if_tsan_post_lock(mutex);
1140  }
1141  }
1142 };
1143 
1144 } // namespace Synchronization
1145 
1146 } // namespace Internal
1147 } // namespace Runtime
1148 } // namespace Halide
1149 
1150 extern "C" {
1151 
1155  fast_mutex->lock();
1156 }
1157 
1161  fast_mutex->unlock();
1162 }
1163 
1167  fast_cond->broadcast();
1168 }
1169 
1173  fast_cond->signal();
1174 }
1175 
1176 WEAK void halide_cond_wait(struct halide_cond *cond, struct halide_mutex *mutex) {
1181  fast_cond->wait(fast_mutex);
1182 }
1183 
1184 // Actual definition of the mutex array.
1187 };
1188 
1190  // TODO: If sz is huge, we should probably hash it down to something smaller
1191  // in the accessors below. Check for deadlocks before doing so.
1193  NULL, sizeof(halide_mutex_array));
1194  if (array == NULL) {
1195  // Will result in a failed assertion and a call to halide_error.
1196  return NULL;
1197  }
1198  array->array = (halide_mutex *)halide_malloc(
1199  NULL, sz * sizeof(halide_mutex));
1200  if (array->array == NULL) {
1201  halide_free(NULL, array);
1202  // Will result in a failed assertion and a call to halide_error.
1203  return NULL;
1204  }
1205  memset(array->array, 0, sz * sizeof(halide_mutex));
1206  return array;
1207 }
1208 
1210  struct halide_mutex_array *arr_ptr = (struct halide_mutex_array *)array;
1211  halide_free(user_context, arr_ptr->array);
1212  halide_free(user_context, arr_ptr);
1213 }
1214 
1216  halide_mutex_lock(&array->array[entry]);
1217  return 0;
1218 }
1219 
1221  halide_mutex_unlock(&array->array[entry]);
1222  return 0;
1223 }
1224 }
Halide::Runtime::Internal::Synchronization::spin_control::reset
ALWAYS_INLINE void reset()
Definition: synchronization_common.h:246
halide_mutex_array_lock
WEAK int halide_mutex_array_lock(struct halide_mutex_array *array, int entry)
Definition: synchronization_common.h:1215
Halide::Runtime::Internal::Synchronization::word_lock_queue_data
Definition: synchronization_common.h:262
halide_free
void halide_free(void *user_context, void *ptr)
Halide::Runtime::Internal::Synchronization::word_lock::unlock
ALWAYS_INLINE void unlock()
Definition: synchronization_common.h:318
Halide::Runtime::Internal::Synchronization::fast_mutex::make_parked_if_locked
ALWAYS_INLINE bool make_parked_if_locked()
Definition: synchronization_common.h:957
halide_mutex_array::array
struct halide_mutex * array
Definition: synchronization_common.h:1186
Halide::Runtime::Internal::Synchronization::signal_parking_control::mutex
fast_mutex * mutex
Definition: synchronization_common.h:979
MAX_THREADS
#define MAX_THREADS
Definition: thread_pool_common.h:67
NULL
#define NULL
Definition: runtime_internal.h:32
Halide::Runtime::Internal::Synchronization::hash_bucket::mutex
word_lock mutex
Definition: synchronization_common.h:483
uint8_t
unsigned __INT8_TYPE__ uint8_t
Definition: runtime_internal.h:25
Halide::Runtime::Internal::Synchronization::spin_control::spin_control
ALWAYS_INLINE spin_control()
Definition: synchronization_common.h:235
Halide::Runtime::Internal::Synchronization::fast_mutex::unlock
ALWAYS_INLINE void unlock()
Definition: synchronization_common.h:948
Halide::Runtime::Internal::Synchronization::wait_parking_control_validate
WEAK bool wait_parking_control_validate(void *control, validate_action &action)
Definition: synchronization_common.h:1057
Halide::Runtime::Internal::Synchronization::signal_parking_control_unpark
WEAK uintptr_t signal_parking_control_unpark(void *control, int unparked, bool more_waiters)
Definition: synchronization_common.h:987
Halide::Runtime::Internal::Synchronization::fast_cond::wait
ALWAYS_INLINE void wait(fast_mutex *mutex)
Definition: synchronization_common.h:1126
Halide::Runtime::Internal::Synchronization::wait_parking_control_before_sleep
WEAK void wait_parking_control_before_sleep(void *control)
Definition: synchronization_common.h:1075
Halide::Runtime::Internal::Synchronization::hash_table::buckets
hash_bucket buckets[MAX_THREADS *LOAD_FACTOR]
Definition: synchronization_common.h:495
Halide::Runtime::Internal::Synchronization::word_lock::lock
ALWAYS_INLINE void lock()
Definition: synchronization_common.h:305
Halide::Runtime::Internal::Synchronization::spin_control
Definition: synchronization_common.h:230
Halide::Runtime::Internal::Synchronization::fast_cond::broadcast
ALWAYS_INLINE void broadcast()
Definition: synchronization_common.h:1113
Halide::Runtime::Internal::Synchronization::queue_data::sleep_address
uintptr_t sleep_address
Definition: synchronization_common.h:465
Halide::Runtime::Internal::Synchronization::wait_parking_control
Definition: synchronization_common.h:1045
halide_cond
Cross platform condition variable.
Definition: HalideRuntime.h:119
Halide::Runtime::Internal::Synchronization::parking_control_requeue_callback
WEAK void parking_control_requeue_callback(void *control, const validate_action &action, bool one_to_wake, bool some_requeued)
Definition: synchronization_common.h:611
Halide::Runtime::Internal::Synchronization::queue_data::unpark_info
uintptr_t unpark_info
Definition: synchronization_common.h:469
lock_bit
#define lock_bit
Definition: synchronization_common.h:257
Halide::Runtime::Internal::Synchronization::spin_control::should_spin
ALWAYS_INLINE bool should_spin()
Definition: synchronization_common.h:239
Halide::Runtime::Internal::Synchronization::parking_control
Definition: synchronization_common.h:613
Halide::Runtime::Internal::Synchronization::unpark_one
WEAK uintptr_t unpark_one(uintptr_t addr, parking_control &control)
Definition: synchronization_common.h:657
Halide::Runtime::Internal::Synchronization::mutex_parking_control_unpark
WEAK uintptr_t mutex_parking_control_unpark(void *control, int unparked, bool more_waiters)
Definition: synchronization_common.h:870
Halide::Runtime::Internal::Synchronization::mutex_parking_control::mutex_parking_control
ALWAYS_INLINE mutex_parking_control(uintptr_t *lock_state)
Definition: synchronization_common.h:853
halide_cond_wait
WEAK void halide_cond_wait(struct halide_cond *cond, struct halide_mutex *mutex)
Definition: synchronization_common.h:1176
Halide::Runtime::Internal::Synchronization::signal_parking_control::cond_state
uintptr_t * cond_state
Definition: synchronization_common.h:978
Halide::Runtime::Internal::Synchronization::wait_parking_control::mutex
fast_mutex * mutex
Definition: synchronization_common.h:1047
Halide::Runtime::Internal::Synchronization::queue_data::queue_data
ALWAYS_INLINE queue_data()
Definition: synchronization_common.h:471
Halide::Runtime::Internal::Synchronization::wait_parking_control_unpark
WEAK uintptr_t wait_parking_control_unpark(void *control, int unparked, bool more_waiters)
Definition: synchronization_common.h:1081
malloc
void * malloc(size_t)
halide_mutex_array
Definition: synchronization_common.h:1185
Halide::Runtime::Internal::Synchronization::wait_parking_control::cond_state
uintptr_t * cond_state
Definition: synchronization_common.h:1046
Halide
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
Definition: AddAtomicMutex.h:21
Halide::Runtime::Internal::Synchronization::park
WEAK uintptr_t park(uintptr_t addr, parking_control &control)
Definition: synchronization_common.h:626
Halide::Runtime::Internal::Synchronization::bucket_pair::to
hash_bucket & to
Definition: synchronization_common.h:543
Halide::Runtime::Internal::Synchronization::hash_bucket::head
queue_data * head
Definition: synchronization_common.h:485
Halide::Runtime::Internal::Synchronization::hash_bucket::tail
queue_data * tail
Definition: synchronization_common.h:486
Halide::Runtime::Internal::Synchronization::broadcast_parking_control::broadcast_parking_control
ALWAYS_INLINE broadcast_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
Definition: synchronization_common.h:1008
Halide::Runtime::Internal::Synchronization::fast_mutex::lock
ALWAYS_INLINE void lock()
Definition: synchronization_common.h:939
Halide::Runtime::Internal::Synchronization::word_lock_queue_data::~word_lock_queue_data
ALWAYS_INLINE ~word_lock_queue_data()
Definition: synchronization_common.h:291
Halide::LinkageType::Internal
@ Internal
Not visible externally, similar to 'static' linkage in C.
Halide::Runtime::Internal::Synchronization::fast_mutex
Definition: synchronization_common.h:880
Halide::Runtime::Internal::Synchronization::broadcast_parking_control::cond_state
uintptr_t * cond_state
Definition: synchronization_common.h:1005
halide_cond_signal
WEAK void halide_cond_signal(struct halide_cond *cond)
Definition: synchronization_common.h:1170
Halide::Runtime::Internal::Synchronization::lock_bucket_pair
WEAK bucket_pair lock_bucket_pair(uintptr_t addr_from, uintptr_t addr_to)
Definition: synchronization_common.h:550
memset
void * memset(void *s, int val, size_t n)
queue_lock_bit
#define queue_lock_bit
Definition: synchronization_common.h:258
Halide::Runtime::Internal::Synchronization::word_lock_queue_data::parker
thread_parker parker
Definition: synchronization_common.h:263
Halide::Runtime::Internal::Synchronization::table_storage
WEAK char table_storage[sizeof(hash_table)]
Definition: synchronization_common.h:497
Halide::print
Expr print(const std::vector< Expr > &values)
Create an Expr that prints out its value whenever it is evaluated.
Halide::Runtime::Internal::Synchronization::unpark_all
WEAK uintptr_t unpark_all(uintptr_t addr, uintptr_t unpark_info)
Definition: synchronization_common.h:708
Halide::Runtime::Internal::Synchronization::bucket_pair
Definition: synchronization_common.h:541
Halide::Runtime::Internal::Synchronization::check_hash
ALWAYS_INLINE void check_hash(uintptr_t hash)
Definition: synchronization_common.h:500
Halide::Runtime::Internal::Synchronization::broadcast_parking_control_validate
WEAK bool broadcast_parking_control_validate(void *control, validate_action &action)
Definition: synchronization_common.h:1015
halide_assert
#define halide_assert(user_context, cond)
Definition: runtime_internal.h:240
scoped_spin_lock.h
Halide::Runtime::Internal::Synchronization::word_lock
Definition: synchronization_common.h:295
halide_mutex_array_create
WEAK halide_mutex_array * halide_mutex_array_create(int sz)
Definition: synchronization_common.h:1189
Halide::Runtime::Internal::Synchronization::mutex_parking_control_validate
WEAK bool mutex_parking_control_validate(void *control, validate_action &action)
Definition: synchronization_common.h:861
printer.h
Halide::Runtime::Internal::Synchronization::broadcast_parking_control
Definition: synchronization_common.h:1004
Halide::Runtime::Internal::Synchronization::queue_data
Definition: synchronization_common.h:462
Halide::Runtime::Internal::Synchronization::wait_parking_control::wait_parking_control
ALWAYS_INLINE wait_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
Definition: synchronization_common.h:1049
Halide::Runtime::Internal::Synchronization::parking_control_validate
WEAK bool parking_control_validate(void *control, validate_action &action)
Definition: synchronization_common.h:604
Halide::Runtime::Internal::Synchronization::parking_control::parking_control
ALWAYS_INLINE parking_control()
Definition: synchronization_common.h:619
Halide::Runtime::Internal::Synchronization::parking_control_before_sleep
WEAK void parking_control_before_sleep(void *control)
Definition: synchronization_common.h:607
halide_mutex_array_destroy
WEAK void halide_mutex_array_destroy(void *user_context, void *array)
Definition: synchronization_common.h:1209
halide_mutex
Cross-platform mutex.
Definition: HalideRuntime.h:114
halide_malloc
void * halide_malloc(void *user_context, size_t x)
Halide calls these functions to allocate and free memory.
Halide::Runtime::Internal::Synchronization::queue_data::~queue_data
ALWAYS_INLINE ~queue_data()
Definition: synchronization_common.h:475
HASH_TABLE_BITS
#define HASH_TABLE_BITS
Definition: synchronization_common.h:493
Halide::Runtime::Internal::Synchronization::unpark_requeue
WEAK int unpark_requeue(uintptr_t addr_from, uintptr_t addr_to, parking_control &control, uintptr_t unpark_info)
Definition: synchronization_common.h:774
halide_thread_yield
void halide_thread_yield()
LOAD_FACTOR
#define LOAD_FACTOR
Definition: synchronization_common.h:480
Halide::Runtime::Internal::Synchronization::word_lock::word_lock
ALWAYS_INLINE word_lock()
Definition: synchronization_common.h:302
ALWAYS_INLINE
#define ALWAYS_INLINE
Definition: runtime_internal.h:53
Halide::Runtime::Internal::Synchronization::parking_control::before_sleep
void(* before_sleep)(void *control)
Definition: synchronization_common.h:615
parked_bit
#define parked_bit
Definition: synchronization_common.h:259
Halide::Runtime::Internal::Synchronization::signal_parking_control
Definition: synchronization_common.h:977
Halide::Runtime::Internal::Synchronization::parking_control_unpark
WEAK uintptr_t parking_control_unpark(void *control, int unparked, bool more_waiters)
Definition: synchronization_common.h:608
Halide::Runtime::Internal::Synchronization::validate_action::validate_action
ALWAYS_INLINE validate_action()
Definition: synchronization_common.h:599
Halide::Runtime::Internal::Synchronization::broadcast_parking_control_requeue_callback
WEAK void broadcast_parking_control_requeue_callback(void *control, const validate_action &action, bool one_to_wake, bool some_requeued)
Definition: synchronization_common.h:1035
HalideRuntime.h
Halide::Runtime::Internal::Synchronization::word_lock_queue_data::tail
word_lock_queue_data * tail
Definition: synchronization_common.h:284
Halide::Runtime::Internal::Synchronization::unlock_bucket_pair
WEAK void unlock_bucket_pair(bucket_pair &buckets)
Definition: synchronization_common.h:579
free
void free(void *)
halide_mutex_lock
WEAK void halide_mutex_lock(halide_mutex *mutex)
A basic set of mutex and condition variable functions, which call platform specific code for mutual e...
Definition: synchronization_common.h:1152
Halide::Runtime::Internal::Synchronization::broadcast_parking_control::mutex
fast_mutex * mutex
Definition: synchronization_common.h:1006
Halide::Runtime::Internal::Synchronization::fast_mutex::make_parked
ALWAYS_INLINE void make_parked()
Definition: synchronization_common.h:972
Halide::Runtime::Internal::Synchronization::parking_control::requeue_callback
void(* requeue_callback)(void *control, const validate_action &action, bool one_to_wake, bool some_requeued)
Definition: synchronization_common.h:617
Halide::Runtime::Internal::Synchronization::fast_cond::signal
ALWAYS_INLINE void signal()
Definition: synchronization_common.h:1099
halide_mutex_unlock
WEAK void halide_mutex_unlock(halide_mutex *mutex)
Definition: synchronization_common.h:1158
Halide::Runtime::Internal::Synchronization::queue_data::next
queue_data * next
Definition: synchronization_common.h:467
Halide::Runtime::Internal::Synchronization::hash_bucket
Definition: synchronization_common.h:482
Halide::Runtime::Internal::Synchronization::validate_action::invalid_unpark_info
uintptr_t invalid_unpark_info
Definition: synchronization_common.h:597
Halide::Runtime::Internal::Synchronization::fast_cond
Definition: synchronization_common.h:1091
Halide::Runtime::Internal::Synchronization::bucket_pair::bucket_pair
ALWAYS_INLINE bucket_pair(hash_bucket &from, hash_bucket &to)
Definition: synchronization_common.h:545
Halide::Runtime::Internal::Synchronization::parking_control::unpark
uintptr_t(* unpark)(void *control, int unparked, bool more_waiters)
Definition: synchronization_common.h:616
halide_mutex_array_unlock
WEAK int halide_mutex_array_unlock(struct halide_mutex_array *array, int entry)
Definition: synchronization_common.h:1220
Halide::Runtime::Internal::Synchronization::word_lock_queue_data::word_lock_queue_data
ALWAYS_INLINE word_lock_queue_data()
Definition: synchronization_common.h:286
WEAK
#define WEAK
Definition: runtime_internal.h:50
halide_cond_broadcast
WEAK void halide_cond_broadcast(struct halide_cond *cond)
Definition: synchronization_common.h:1164
Halide::Runtime::Internal::Synchronization::validate_action
Definition: synchronization_common.h:595
Halide::Runtime::Internal::Synchronization::lock_bucket
WEAK hash_bucket & lock_bucket(uintptr_t addr)
Definition: synchronization_common.h:528
Halide::Runtime::Internal::Synchronization::validate_action::unpark_one
bool unpark_one
Definition: synchronization_common.h:596
uint32_t
unsigned __INT32_TYPE__ uint32_t
Definition: runtime_internal.h:21
Halide::Runtime::Internal::Synchronization::parking_control::validate
bool(* validate)(void *control, validate_action &action)
Definition: synchronization_common.h:614
Halide::Runtime::Internal::Synchronization::word_lock_queue_data::next
word_lock_queue_data * next
Definition: synchronization_common.h:281
Halide::Runtime::Internal::Synchronization::fast_cond::fast_cond
ALWAYS_INLINE fast_cond()
Definition: synchronization_common.h:1095
Halide::Runtime::Internal::Synchronization::mutex_parking_control::lock_state
uintptr_t * lock_state
Definition: synchronization_common.h:851
user_context
void * user_context
Definition: printer.h:33
table
#define table
Definition: synchronization_common.h:498
Halide::Runtime::Internal::Synchronization::signal_parking_control::signal_parking_control
ALWAYS_INLINE signal_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
Definition: synchronization_common.h:981
Halide::Runtime::Internal::Synchronization::hash_table
Definition: synchronization_common.h:494
Halide::Runtime::Internal::Synchronization::queue_data::parker
thread_parker parker
Definition: synchronization_common.h:463
Halide::Runtime::Internal::Synchronization::bucket_pair::from
hash_bucket & from
Definition: synchronization_common.h:542
Halide::Runtime::Internal::Synchronization::word_lock_queue_data::prev
word_lock_queue_data * prev
Definition: synchronization_common.h:282
Halide::Runtime::Internal::Synchronization::mutex_parking_control
Definition: synchronization_common.h:850