Halide 19.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
synchronization_common.h
Go to the documentation of this file.
1#include "HalideRuntime.h"
2#include "printer.h"
3#include "scoped_spin_lock.h"
4
5/* This provides an implementation of pthreads-like mutex and
6 * condition variables with fast default case performance. The code
7 * is based on the "parking lot" design and specifically Amanieu
8 * d'Antras' Rust implementation:
9 * https://github.com/Amanieu/parking_lot
10 * and the one in WTF:
11 * https://webkit.org/blog/6161/locking-in-webkit/
12 *
13 * Neither of the above implementations were used directly largely for
14 * dependency management. This implementation lacks a few features
15 * relative to those two. Specifically timeouts are not
16 * supported. Nor is optional fairness or deadlock detection.
17 * This implementation should provide a faily standalone "one file"
18 * fast synchronization layer on top of readily available system primitives.
19 *
20 * TODO: Implement pthread_once equivalent.
21 * TODO: Add read/write lock and move SharedExclusiveSpinLock from tracing.cpp
22 * to this mechanism.
23 * TODO: Add timeouts and optional fairness if needed.
24 * TODO: Relying on condition variables has issues for old versions of Windows
25 * and likely has portability issues to some very bare bones embedded OSes.
26 * Doing an implementation using only semaphores or event counters should
27 * be doable.
28 */
29
30#include "runtime_atomics.h"
31
32// Copied from tsan_interface.h
33#ifndef TSAN_ANNOTATIONS
34#define TSAN_ANNOTATIONS 0
35#endif
36
37#if TSAN_ANNOTATIONS
38extern "C" {
39const unsigned __tsan_mutex_linker_init = 1 << 0;
40void __tsan_mutex_pre_lock(void *addr, unsigned flags);
41void __tsan_mutex_post_lock(void *addr, unsigned flags, int recursion);
42int __tsan_mutex_pre_unlock(void *addr, unsigned flags);
43void __tsan_mutex_post_unlock(void *addr, unsigned flags);
44void __tsan_mutex_pre_signal(void *addr, unsigned flags);
45void __tsan_mutex_post_signal(void *addr, unsigned flags);
46}
47#endif
48
49namespace Halide {
50namespace Runtime {
51namespace Internal {
52
53namespace Synchronization {
54
55namespace {
56
57#if TSAN_ANNOTATIONS
58ALWAYS_INLINE void if_tsan_pre_lock(void *mutex) {
59 __tsan_mutex_pre_lock(mutex, __tsan_mutex_linker_init);
60};
61// TODO(zalman|dvyukov): Is 1 the right value for a non-recursive lock? pretty sure value is ignored.
62ALWAYS_INLINE void if_tsan_post_lock(void *mutex) {
63 __tsan_mutex_post_lock(mutex, __tsan_mutex_linker_init, 1);
64}
65// TODO(zalman|dvyukov): Is it safe to ignore return value here if locks are not recursive?
66ALWAYS_INLINE void if_tsan_pre_unlock(void *mutex) {
67 (void)__tsan_mutex_pre_unlock(mutex, __tsan_mutex_linker_init);
68}
69ALWAYS_INLINE void if_tsan_post_unlock(void *mutex) {
70 __tsan_mutex_post_unlock(mutex, __tsan_mutex_linker_init);
71}
72ALWAYS_INLINE void if_tsan_pre_signal(void *cond) {
73 __tsan_mutex_pre_signal(cond, 0);
74}
75ALWAYS_INLINE void if_tsan_post_signal(void *cond) {
76 __tsan_mutex_post_signal(cond, 0);
77}
78#else
79ALWAYS_INLINE void if_tsan_pre_lock(void *) {
80}
81ALWAYS_INLINE void if_tsan_post_lock(void *) {
82}
83ALWAYS_INLINE void if_tsan_pre_unlock(void *) {
84}
85ALWAYS_INLINE void if_tsan_post_unlock(void *) {
86}
87ALWAYS_INLINE void if_tsan_pre_signal(void *) {
88}
89ALWAYS_INLINE void if_tsan_post_signal(void *) {
90}
91#endif
92
93} // namespace
94
96 // Everyone says this should be 40. Have not measured it.
97 int spin_count = 40;
98
99public:
101 if (spin_count > 0) {
102 spin_count--;
103 }
104 return spin_count > 0;
105 }
106
108 spin_count = 40;
109 }
110};
111
112// Low order two bits are used for locking state,
113static constexpr uint8_t lock_bit = 0x01;
114static constexpr uint8_t queue_lock_bit = 0x02;
115static constexpr uint8_t parked_bit = 0x02;
116
118 thread_parker parker; // TODO: member or pointer?
119
120 // This design is from the Rust parking lot implementation by Amanieu d'Antras.
121 // Comment from original:
122 //
123 // Linked list of threads in the queue. The queue is split into two parts:
124 // the processed part and the unprocessed part. When new nodes are added to
125 // the list, they only have the next pointer set, and queue_tail is null.
126 //
127 // Nodes are processed with the queue lock held, which consists of setting
128 // the prev pointer for each node and setting the queue_tail pointer on the
129 // first processed node of the list.
130 //
131 // This setup allows nodes to be added to the queue without a lock, while
132 // still allowing O(1) removal of nodes from the processed part of the list.
133 // The only cost is the O(n) processing, but this only needs to be done
134 // once for each node, and therefore isn't too expensive.
135
139};
140
142 uintptr_t state = 0;
143
144 void lock_full();
145 void unlock_full();
146
147public:
149 if_tsan_pre_lock(this);
150
151 uintptr_t expected = 0;
152 uintptr_t desired = lock_bit;
153 // Try for a fast grab of the lock bit. If this does not work, call the full adaptive looping code.
154 if (!atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
155 lock_full();
156 }
157
158 if_tsan_post_lock(this);
159 }
160
162 if_tsan_pre_unlock(this);
163
164 uintptr_t val = atomic_fetch_and_release(&state, ~(uintptr_t)lock_bit);
165 // If another thread is currently queueing, that thread will ensure
166 // it acquires the lock or wakes a waiting thread.
167 bool no_thread_queuing = (val & queue_lock_bit) == 0;
168 // Only need to do a wakeup if there are threads waiting.
169 bool some_queued = (val & ~(uintptr_t)(queue_lock_bit | lock_bit)) != 0;
170 if (no_thread_queuing && some_queued) {
171 unlock_full();
172 }
173
174 if_tsan_post_unlock(this);
175 }
176};
177
178WEAK void word_lock::lock_full() {
179 spin_control spinner;
180 uintptr_t expected;
181 atomic_load_relaxed(&state, &expected);
182
183 while (true) {
184 if (!(expected & lock_bit)) {
185 uintptr_t desired = expected | lock_bit;
186
187 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
188 return;
189 }
190 continue;
191 }
192
193 if (((expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) != 0) && spinner.should_spin()) {
195 atomic_load_relaxed(&state, &expected);
196 continue;
197 }
198
199 word_lock_queue_data node;
200
201 node.parker.prepare_park();
202 // TODO set up prelinkage parking state
203
204 word_lock_queue_data *head = (word_lock_queue_data *)(expected & ~(uintptr_t)(queue_lock_bit | lock_bit));
205 if (head == nullptr) {
206 node.tail = &node;
207 // constructor set node.prev = nullptr;
208 } else {
209 // Mark the tail as nullptr. The unlock routine will walk the list and wakeup
210 // the thread at the end.
211 // constructor set node.tail = nullptr;
212 // constructor set node.prev = nullptr;
213 node.next = head;
214 }
215
216 uintptr_t desired = ((uintptr_t)&node) | (expected & (queue_lock_bit | lock_bit));
217 if (atomic_cas_weak_release_relaxed(&state, &expected, &desired)) {
218 node.parker.park();
219 spinner.reset();
220 atomic_load_relaxed(&state, &expected);
221 }
222 }
223}
224
225WEAK void word_lock::unlock_full() {
226 uintptr_t expected;
227 atomic_load_relaxed(&state, &expected);
228
229 while (true) {
230 // If another thread is currently queueing, that thread will ensure
231 // it acquires the lock or wakes a waiting thread.
232 bool thread_queuing = (expected & queue_lock_bit);
233 // Only need to do a wakeup if there are threads waiting.
234 bool none_queued = (expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) == 0;
235 if (thread_queuing || none_queued) {
236 return;
237 }
238
239 uintptr_t desired = expected | queue_lock_bit;
240 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
241 break;
242 }
243 }
244
245 while (true) {
246 word_lock_queue_data *head = (word_lock_queue_data *)(expected & ~(uintptr_t)(queue_lock_bit | lock_bit));
247 word_lock_queue_data *current = head;
248 word_lock_queue_data *tail = current->tail;
249 while (tail == nullptr) {
250 word_lock_queue_data *next = current->next;
251 halide_abort_if_false(nullptr, next != nullptr);
252 next->prev = current;
253 current = next;
254 tail = current->tail;
255 }
256 head->tail = tail;
257
258 // If the lock is now locked, unlock the queue and have the thread
259 // that currently holds the lock do the wakeup
260 if (expected & lock_bit) {
261 uintptr_t desired = expected & ~(uintptr_t)queue_lock_bit;
262 if (atomic_cas_weak_relacq_relaxed(&state, &expected, &desired)) {
263 return;
264 }
265 atomic_thread_fence_acquire();
266 continue;
267 }
268
269 word_lock_queue_data *new_tail = tail->prev;
270 if (new_tail == nullptr) {
271 bool continue_outer = false;
272 while (!continue_outer) {
273 uintptr_t desired = expected & lock_bit;
274 if (atomic_cas_weak_relacq_relaxed(&state, &expected, &desired)) {
275 break;
276 }
277 if ((expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) == 0) {
278 continue;
279 } else {
280 atomic_thread_fence_acquire();
281 continue_outer = true;
282 }
283 }
284
285 if (continue_outer) {
286 continue;
287 }
288 } else {
289 head->tail = new_tail;
290 atomic_and_fetch_release(&state, ~(uintptr_t)queue_lock_bit);
291 }
292
293 // TODO: The reason there are three calls here is other things can happen between them.
294 // Also it is not clear if unpark_start has to return the mutex/flag used by unpark
295 // and unpark_finish due to memory lifetime reasons.
296 tail->parker.unpark_start();
297 tail->parker.unpark();
298 tail->parker.unpark_finish();
299 break;
300 }
301}
302
304 thread_parker parker; // TODO: member or pointer?
305
307 queue_data *next = nullptr;
309};
310
311// Must be a power of two.
312constexpr int LOAD_FACTOR = 4;
313
316
317 queue_data *head = nullptr; // Is this queue_data or thread_data?
318 queue_data *tail = nullptr; // Is this queue_data or thread_data?
319};
320
321constexpr int HASH_TABLE_SIZE = MAX_THREADS * LOAD_FACTOR;
326
327constexpr int HASH_TABLE_BITS = 10;
328static_assert((1 << HASH_TABLE_BITS) >= MAX_THREADS * LOAD_FACTOR);
329
330#if 0
331WEAK void dump_hash() {
332 int i = 0;
333 for (auto &bucket : table.buckets) {
334 queue_data *head = bucket.head;
335 while (head != nullptr) {
336 print(nullptr) << "Bucket index " << i << " addr " << (void *)head->sleep_address << "\n";
337 head = head->next;
338 }
339 i++;
340 }
341}
342#endif
343
344static ALWAYS_INLINE uintptr_t addr_hash(uintptr_t addr) {
345 // Fibonacci hashing. The golden ratio is 1.9E3779B97F4A7C15F39...
346 // in hexadecimal.
347 if (sizeof(uintptr_t) >= 8) {
348 return (addr * (uintptr_t)0x9E3779B97F4A7C15) >> (64 - HASH_TABLE_BITS);
349 } else {
350 return (addr * (uintptr_t)0x9E3779B9) >> (32 - HASH_TABLE_BITS);
351 }
352}
353
355 uintptr_t hash = addr_hash(addr);
356
357 halide_debug_assert(nullptr, hash < HASH_TABLE_SIZE);
358
359 // TODO: if resizing is implemented, loop, etc.
360 hash_bucket &bucket = table.buckets[hash];
361
362 bucket.mutex.lock();
363
364 return bucket;
365}
366
375
377 // TODO: if resizing is implemented, loop, etc.
378 uintptr_t hash_from = addr_hash(addr_from);
379 uintptr_t hash_to = addr_hash(addr_to);
380
381 halide_debug_assert(nullptr, hash_from < HASH_TABLE_SIZE);
382 halide_debug_assert(nullptr, hash_to < HASH_TABLE_SIZE);
383
384 // Lock the bucket with the smaller hash first in order
385 // to prevent deadlock.
386 if (hash_from == hash_to) {
387 hash_bucket &first = table.buckets[hash_from];
388 first.mutex.lock();
389 return bucket_pair(first, first);
390 } else if (hash_from < hash_to) {
391 hash_bucket &first = table.buckets[hash_from];
392 hash_bucket &second = table.buckets[hash_to];
393 first.mutex.lock();
394 second.mutex.lock();
395 return bucket_pair(first, second);
396 } else {
397 hash_bucket &first = table.buckets[hash_to];
398 hash_bucket &second = table.buckets[hash_from];
399 first.mutex.lock();
400 second.mutex.lock();
401 return bucket_pair(second, first);
402 }
403}
404
406 // In the lock routine, the buckets are locked smaller hash index first.
407 // Here we reverse this ordering by comparing the pointers. This works
408 // since the pointers are obtained by indexing an array with the hash
409 // values.
410 if (&buckets.from == &buckets.to) {
411 buckets.from.mutex.unlock();
412 } else if (&buckets.from > &buckets.to) {
413 buckets.from.mutex.unlock();
414 buckets.to.mutex.unlock();
415 } else {
416 buckets.to.mutex.unlock();
417 buckets.from.mutex.unlock();
418 }
419}
420
425
429 int unpark_requeue(uintptr_t addr_from, uintptr_t addr_to, uintptr_t unpark_info);
430
431protected:
432 virtual bool validate(validate_action &action) {
433 return true;
434 }
435 virtual void before_sleep() {
436 // nothing
437 }
438 virtual uintptr_t unpark(int unparked, bool more_waiters) {
439 return 0;
440 }
441 virtual void requeue_callback(const validate_action &action, bool one_to_wake, bool some_requeued) {
442 // nothing
443 }
444};
445
446// TODO: Do we need a park_result thing here?
449
450 hash_bucket &bucket = lock_bucket(addr);
451
452 validate_action action;
453 if (!validate(action)) {
454 bucket.mutex.unlock();
455 return action.invalid_unpark_info;
456 }
457
458 queue_data.next = nullptr;
460 queue_data.parker.prepare_park();
461 if (bucket.head != nullptr) {
462 bucket.tail->next = &queue_data;
463 } else {
464 bucket.head = &queue_data;
465 }
466 bucket.tail = &queue_data;
467 bucket.mutex.unlock();
468
469 before_sleep();
470
471 queue_data.parker.park();
472
473 return queue_data.unpark_info;
474
475 // TODO: handling timeout.
476}
477
479 hash_bucket &bucket = lock_bucket(addr);
480
481 queue_data **data_location = &bucket.head;
482 queue_data *prev = nullptr;
483 queue_data *data = *data_location;
484 while (data != nullptr) {
485 uintptr_t cur_addr;
486 atomic_load_relaxed(&data->sleep_address, &cur_addr);
487 if (cur_addr == addr) {
488 queue_data *next = data->next;
489 *data_location = next;
490
491 bool more_waiters = false;
492
493 if (bucket.tail == data) {
494 bucket.tail = prev;
495 } else {
496 queue_data *data2 = next;
497 while (data2 != nullptr && !more_waiters) {
498 uintptr_t cur_addr2;
499 atomic_load_relaxed(&data2->sleep_address, &cur_addr2);
500 more_waiters = (cur_addr2 == addr);
501 data2 = data2->next;
502 }
503 }
504
505 data->unpark_info = unpark(1, more_waiters);
506
507 data->parker.unpark_start();
508 bucket.mutex.unlock();
509 data->parker.unpark();
510 data->parker.unpark_finish();
511
512 // TODO: Figure out ret type.
513 return more_waiters ? 1 : 0;
514 } else {
515 data_location = &data->next;
516 prev = data;
517 data = data->next;
518 }
519 }
520
521 unpark(0, false);
522
523 bucket.mutex.unlock();
524
525 // TODO: decide if this is the right return value.
526 return 0;
527}
528
530 bucket_pair buckets = lock_bucket_pair(addr_from, addr_to);
531
532 validate_action action;
533 if (!validate(action)) {
534 unlock_bucket_pair(buckets);
535 return 0;
536 }
537
538 queue_data **data_location = &buckets.from.head;
539 queue_data *prev = nullptr;
540 queue_data *data = *data_location;
541 queue_data *requeue = nullptr;
542 queue_data *requeue_tail = nullptr;
543 queue_data *wakeup = nullptr;
544
545 while (data != nullptr) {
546 uintptr_t cur_addr;
547 atomic_load_relaxed(&data->sleep_address, &cur_addr);
548
549 queue_data *next = data->next;
550 if (cur_addr == addr_from) {
551 *data_location = next;
552
553 if (buckets.from.tail == data) {
554 buckets.from.tail = prev;
555 }
556
557 if (action.unpark_one && wakeup == nullptr) {
558 wakeup = data;
559 } else {
560 if (requeue == nullptr) {
561 requeue = data;
562 } else {
563 requeue_tail->next = data;
564 }
565
566 requeue_tail = data;
567 atomic_store_relaxed(&data->sleep_address, &addr_to);
568 }
569 data = next;
570 // TODO: prev ptr?
571 } else {
572 data_location = &data->next;
573 prev = data;
574 data = next;
575 }
576 }
577
578 if (requeue != nullptr) {
579 requeue_tail->next = nullptr;
580 if (buckets.to.head == nullptr) {
581 buckets.to.head = requeue;
582 } else {
583 buckets.to.tail->next = requeue;
584 }
585 buckets.to.tail = requeue_tail;
586 }
587
588 requeue_callback(action, wakeup != nullptr, requeue != nullptr);
589
590 if (wakeup != nullptr) {
591 wakeup->unpark_info = unpark_info;
592 wakeup->parker.unpark_start();
593 unlock_bucket_pair(buckets);
594 wakeup->parker.unpark();
595 wakeup->parker.unpark_finish();
596 } else {
597 unlock_bucket_pair(buckets);
598 }
599
600 return wakeup != nullptr && action.unpark_one;
601}
602
605
609
610protected:
611 bool validate(validate_action &action) final {
612 uintptr_t result;
613 atomic_load_relaxed(lock_state, &result);
614 return result == (lock_bit | parked_bit);
615 }
616
617 uintptr_t unpark(int unparked, bool more_waiters) final {
618 // TODO: consider handling fairness.
619 uintptr_t return_state = more_waiters ? parked_bit : 0;
620 atomic_store_release(lock_state, &return_state);
621 return 0;
622 }
623};
624
626 uintptr_t state = 0;
627
628 ALWAYS_INLINE void lock_full() {
629 // Everyone says this should be 40. Have not measured it.
630 spin_control spinner;
631 uintptr_t expected;
632 atomic_load_relaxed(&state, &expected);
633
634 while (true) {
635 if (!(expected & lock_bit)) {
636 uintptr_t desired = expected | lock_bit;
637 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
638 return;
639 }
640 continue;
641 }
642
643 // Spin with spin count. Note that this occurs even if
644 // threads are parked. We're prioritizing throughput over
645 // fairness by letting sleeping threads lie.
646 if (spinner.should_spin()) {
648 atomic_load_relaxed(&state, &expected);
649 continue;
650 }
651
652 // Mark mutex as having parked threads if not already done.
653 if ((expected & parked_bit) == 0) {
654 uintptr_t desired = expected | parked_bit;
655 if (!atomic_cas_weak_relaxed_relaxed(&state, &expected, &desired)) {
656 continue;
657 }
658 }
659
660 // TODO: consider handling fairness, timeout
661 mutex_parking_control control(&state);
662 uintptr_t result = control.park((uintptr_t)this);
663 if (result == (uintptr_t)this) {
664 return;
665 }
666
667 spinner.reset();
668 atomic_load_relaxed(&state, &expected);
669 }
670 }
671
672 ALWAYS_INLINE void unlock_full() {
673 uintptr_t expected = lock_bit;
674 uintptr_t desired = 0;
675 // Try for a fast release of the lock. Redundant with code in lock, but done
676 // to make unlock_full a standalone unlock that can be called directly.
677 if (atomic_cas_strong_release_relaxed(&state, &expected, &desired)) {
678 return;
679 }
680
681 mutex_parking_control control(&state);
682 control.unpark_one((uintptr_t)this);
683 }
684
685public:
687 uintptr_t expected = 0;
688 uintptr_t desired = lock_bit;
689 // Try for a fast grab of the lock bit. If this does not work, call the full adaptive looping code.
690 if (!atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
691 lock_full();
692 }
693 }
694
696 uintptr_t expected = lock_bit;
697 uintptr_t desired = 0;
698 // Try for a fast grab of the lock bit. If this does not work, call the full adaptive looping code.
699 if (!atomic_cas_weak_release_relaxed(&state, &expected, &desired)) {
700 unlock_full();
701 }
702 }
703
705 uintptr_t val;
706 atomic_load_relaxed(&state, &val);
707 while (true) {
708 if (!(val & lock_bit)) {
709 return false;
710 }
711
712 uintptr_t desired = val | parked_bit;
713 if (atomic_cas_weak_relaxed_relaxed(&state, &val, &desired)) {
714 return true;
715 }
716 }
717 }
718
720 atomic_or_fetch_relaxed(&state, parked_bit);
721 }
722};
723
727
731
732protected:
733 uintptr_t unpark(int unparked, bool more_waiters) final {
734 if (!more_waiters) {
735 uintptr_t val = 0;
736 atomic_store_relaxed(cond_state, &val);
737 }
738
739#if 0 // TODO: figure out why this was here.
740 return (uintptr_t)mutex;
741#else
742 return 0;
743#endif
744 }
745};
746
750
754
755protected:
756 bool validate(validate_action &action) final {
757 uintptr_t val;
758 atomic_load_relaxed(cond_state, &val);
759 // By the time this broadcast locked everything and was processed, the cond
760 // has progressed to a new mutex, do nothing since any waiting threads have
761 // to be waiting on what is effectively a different condition.
762 if (val != (uintptr_t)mutex) {
763 return false;
764 }
765 // Clear the cond's connection to the mutex as all waiting threads are going to reque onto the mutex.
766 val = 0;
767 atomic_store_relaxed(cond_state, &val);
768 action.unpark_one = !mutex->make_parked_if_locked();
769 return true;
770 }
771
772 void requeue_callback(const validate_action &action, bool one_to_wake, bool some_requeued) final {
773 if (action.unpark_one && some_requeued) {
775 }
776 }
777};
778
782
786
787protected:
788 bool validate(validate_action &action) final {
789 uintptr_t val;
790 atomic_load_relaxed(cond_state, &val);
791
792 if (val == 0) {
793 val = (uintptr_t)mutex;
794 atomic_store_relaxed(cond_state, &val);
795 } else if (val != (uintptr_t)mutex) {
796 // TODO: signal error.
797 action.invalid_unpark_info = (uintptr_t)mutex;
798 return false;
799 }
800
801 return true;
802 }
803
804 void before_sleep() final {
805 mutex->unlock();
806 }
807
808 uintptr_t unpark(int unparked, bool more_waiters) final {
809 if (!more_waiters) {
810 uintptr_t val = 0;
811 atomic_store_relaxed(cond_state, &val);
812 }
813 return 0;
814 }
815};
816
818 uintptr_t state = 0;
819
820public:
822 if_tsan_pre_signal(this);
823
824 uintptr_t val;
825 atomic_load_relaxed(&state, &val);
826 if (val == 0) {
827 if_tsan_post_signal(this);
828 return;
829 }
830 signal_parking_control control(&state, (fast_mutex *)val);
831 control.unpark_one((uintptr_t)this);
832 if_tsan_post_signal(this);
833 }
834
836 if_tsan_pre_signal(this);
837
838 uintptr_t val;
839 atomic_load_relaxed(&state, &val);
840 if (val == 0) {
841 if_tsan_post_signal(this);
842 return;
843 }
844 broadcast_parking_control control(&state, (fast_mutex *)val);
845 control.unpark_requeue((uintptr_t)this, val, 0);
846 if_tsan_post_signal(this);
847 }
848
850 // Go to sleep until signaled
851 wait_parking_control control(&state, mutex);
852 uintptr_t result = control.park((uintptr_t)this);
853 if (result != (uintptr_t)mutex) {
854 mutex->lock();
855 } else {
856 if_tsan_pre_lock(mutex);
857
858 // TODO: this is debug only.
859 uintptr_t val;
860 atomic_load_relaxed((uintptr_t *)mutex, &val);
861 halide_abort_if_false(nullptr, val & 0x1);
862
863 if_tsan_post_lock(mutex);
864 }
865 }
866};
867
868} // namespace Synchronization
869
870} // namespace Internal
871} // namespace Runtime
872} // namespace Halide
873
874extern "C" {
875
881
887
893
899
907
908// Actual definition of the mutex array.
912
914 // TODO: If sz is huge, we should probably hash it down to something smaller
915 // in the accessors below. Check for deadlocks before doing so.
917 nullptr, sizeof(halide_mutex_array));
918 if (array == nullptr) {
919 // Will result in a failed assertion and a call to halide_error.
920 return nullptr;
921 }
922 array->array = (halide_mutex *)halide_malloc(
923 nullptr, sz * sizeof(halide_mutex));
924 if (array->array == nullptr) {
925 halide_free(nullptr, array);
926 // Will result in a failed assertion and a call to halide_error.
927 return nullptr;
928 }
929 memset(array->array, 0, sz * sizeof(halide_mutex));
930 return array;
931}
932
934 struct halide_mutex_array *arr_ptr = (struct halide_mutex_array *)array;
935 halide_free(user_context, arr_ptr->array);
936 halide_free(user_context, arr_ptr);
937}
938
940 halide_mutex_lock(&array->array[entry]);
942}
943
945 halide_mutex_unlock(&array->array[entry]);
947}
948}
This file declares the routines used by Halide internally in its runtime.
void * halide_malloc(void *user_context, size_t x)
Halide calls these functions to allocate and free memory.
void halide_free(void *user_context, void *ptr)
@ halide_error_code_success
There was no error.
WEAK hash_bucket & lock_bucket(uintptr_t addr)
WEAK void unlock_bucket_pair(bucket_pair &buckets)
WEAK bucket_pair lock_bucket_pair(uintptr_t addr_from, uintptr_t addr_to)
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr print(const std::vector< Expr > &values)
Create an Expr that prints out its value whenever it is evaluated.
unsigned __INT64_TYPE__ uint64_t
#define halide_debug_assert(user_context, cond)
halide_debug_assert() is like halide_assert(), but only expands into a check when DEBUG_RUNTIME is de...
__UINTPTR_TYPE__ uintptr_t
unsigned __INT8_TYPE__ uint8_t
void halide_thread_yield()
#define ALWAYS_INLINE
void * memset(void *s, int val, size_t n)
#define halide_abort_if_false(user_context, cond)
#define WEAK
ALWAYS_INLINE broadcast_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
void requeue_callback(const validate_action &action, bool one_to_wake, bool some_requeued) final
ALWAYS_INLINE bucket_pair(hash_bucket &from, hash_bucket &to)
virtual uintptr_t unpark(int unparked, bool more_waiters)
virtual void requeue_callback(const validate_action &action, bool one_to_wake, bool some_requeued)
int unpark_requeue(uintptr_t addr_from, uintptr_t addr_to, uintptr_t unpark_info)
ALWAYS_INLINE signal_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
ALWAYS_INLINE wait_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
Cross platform condition variable.
struct halide_mutex * array
Cross-platform mutex.
WEAK int halide_mutex_array_lock(struct halide_mutex_array *array, int entry)
WEAK void halide_mutex_array_destroy(void *user_context, void *array)
WEAK void halide_mutex_unlock(halide_mutex *mutex)
WEAK void halide_cond_signal(struct halide_cond *cond)
WEAK int halide_mutex_array_unlock(struct halide_mutex_array *array, int entry)
WEAK void halide_mutex_lock(halide_mutex *mutex)
A basic set of mutex and condition variable functions, which call platform specific code for mutual e...
WEAK void halide_cond_wait(struct halide_cond *cond, struct halide_mutex *mutex)
WEAK void halide_cond_broadcast(struct halide_cond *cond)
WEAK halide_mutex_array * halide_mutex_array_create(uint64_t sz)
void * user_context