SatDump 2.0.0-alpha-76a915210
Loading...
Searching...
No Matches
atomicops.h
1// ©2013-2016 Cameron Desrochers.
2// Distributed under the simplified BSD license (see the license file that
3// should have come with this header).
4// Uses Jeff Preshing's semaphore implementation (under the terms of its
5// separate zlib license, embedded below).
6
7#pragma once
8
9// Provides portable (VC++2010+, Intel ICC 13, GCC 4.7+, and anything C++11 compliant) implementation
10// of low-level memory barriers, plus a few semi-portable utility macros (for inlining and alignment).
11// Also has a basic atomic type (limited to hardware-supported atomics with no memory ordering guarantees).
12// Uses the AE_* prefix for macros (historical reasons), and the "moodycamel" namespace for symbols.
13
14#include <cerrno>
15#include <cassert>
16#include <type_traits>
17#include <cerrno>
18#include <cstdint>
19#include <ctime>
20
21// Platform detection
22#if defined(__INTEL_COMPILER)
23#define AE_ICC
24#elif defined(_MSC_VER)
25#define AE_VCPP
26#elif defined(__GNUC__)
27#define AE_GCC
28#endif
29
30#if defined(_M_IA64) || defined(__ia64__)
31#define AE_ARCH_IA64
32#elif defined(_WIN64) || defined(__amd64__) || defined(_M_X64) || defined(__x86_64__)
33#define AE_ARCH_X64
34#elif defined(_M_IX86) || defined(__i386__)
35#define AE_ARCH_X86
36#elif defined(_M_PPC) || defined(__powerpc__)
37#define AE_ARCH_PPC
38#else
39#define AE_ARCH_UNKNOWN
40#endif
41
42
43// AE_UNUSED
44#define AE_UNUSED(x) ((void)x)
45
46// AE_NO_TSAN/AE_TSAN_ANNOTATE_*
47// For GCC
48#if defined(__SANITIZE_THREAD__)
49#define AE_TSAN_IS_ENABLED
50#endif
51// For clang
52#if defined(__has_feature)
53#if __has_feature(thread_sanitizer) && !defined(AE_TSAN_IS_ENABLED)
54#define AE_TSAN_IS_ENABLED
55#endif
56#endif
57
58#ifdef AE_TSAN_IS_ENABLED
59#if __cplusplus >= 201703L // inline variables require C++17
60namespace moodycamel { inline int ae_tsan_global; }
61#define AE_TSAN_ANNOTATE_RELEASE() AnnotateHappensBefore(__FILE__, __LINE__, (void *)(&::moodycamel::ae_tsan_global))
62#define AE_TSAN_ANNOTATE_ACQUIRE() AnnotateHappensAfter(__FILE__, __LINE__, (void *)(&::moodycamel::ae_tsan_global))
63extern "C" void AnnotateHappensBefore(const char*, int, void*);
64extern "C" void AnnotateHappensAfter(const char*, int, void*);
65#else // when we can't work with tsan, attempt to disable its warnings
66#define AE_NO_TSAN __attribute__((no_sanitize("thread")))
67#endif
68#endif
69
70#ifndef AE_NO_TSAN
71#define AE_NO_TSAN
72#endif
73
74#ifndef AE_TSAN_ANNOTATE_RELEASE
75#define AE_TSAN_ANNOTATE_RELEASE()
76#define AE_TSAN_ANNOTATE_ACQUIRE()
77#endif
78
79
80// AE_FORCEINLINE
81#if defined(AE_VCPP) || defined(AE_ICC)
82#define AE_FORCEINLINE __forceinline
83#elif defined(AE_GCC)
84//#define AE_FORCEINLINE __attribute__((always_inline))
85#define AE_FORCEINLINE inline
86#else
87#define AE_FORCEINLINE inline
88#endif
89
90
91// AE_ALIGN
92#if defined(AE_VCPP) || defined(AE_ICC)
93#define AE_ALIGN(x) __declspec(align(x))
94#elif defined(AE_GCC)
95#define AE_ALIGN(x) __attribute__((aligned(x)))
96#else
97// Assume GCC compliant syntax...
98#define AE_ALIGN(x) __attribute__((aligned(x)))
99#endif
100
101
102// Portable atomic fences implemented below:
103
104namespace moodycamel {
105
106enum memory_order {
107 memory_order_relaxed,
108 memory_order_acquire,
109 memory_order_release,
110 memory_order_acq_rel,
111 memory_order_seq_cst,
112
113 // memory_order_sync: Forces a full sync:
114 // #LoadLoad, #LoadStore, #StoreStore, and most significantly, #StoreLoad
115 memory_order_sync = memory_order_seq_cst
116};
117
118} // end namespace moodycamel
119
120#if (defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))) || (defined(AE_ICC) && __INTEL_COMPILER < 1600)
121// VS2010 and ICC13 don't support std::atomic_*_fence, implement our own fences
122
123#include <intrin.h>
124
125#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
126#define AeFullSync _mm_mfence
127#define AeLiteSync _mm_mfence
128#elif defined(AE_ARCH_IA64)
129#define AeFullSync __mf
130#define AeLiteSync __mf
131#elif defined(AE_ARCH_PPC)
132#include <ppcintrinsics.h>
133#define AeFullSync __sync
134#define AeLiteSync __lwsync
135#endif
136
137
138#ifdef AE_VCPP
139#pragma warning(push)
140#pragma warning(disable: 4365) // Disable erroneous 'conversion from long to unsigned int, signed/unsigned mismatch' error when using `assert`
141#ifdef __cplusplus_cli
142#pragma managed(push, off)
143#endif
144#endif
145
146namespace moodycamel {
147
148AE_FORCEINLINE void compiler_fence(memory_order order) AE_NO_TSAN
149{
150 switch (order) {
151 case memory_order_relaxed: break;
152 case memory_order_acquire: _ReadBarrier(); break;
153 case memory_order_release: _WriteBarrier(); break;
154 case memory_order_acq_rel: _ReadWriteBarrier(); break;
155 case memory_order_seq_cst: _ReadWriteBarrier(); break;
156 default: assert(false);
157 }
158}
159
160// x86/x64 have a strong memory model -- all loads and stores have
161// acquire and release semantics automatically (so only need compiler
162// barriers for those).
163#if defined(AE_ARCH_X86) || defined(AE_ARCH_X64)
164AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN
165{
166 switch (order) {
167 case memory_order_relaxed: break;
168 case memory_order_acquire: _ReadBarrier(); break;
169 case memory_order_release: _WriteBarrier(); break;
170 case memory_order_acq_rel: _ReadWriteBarrier(); break;
171 case memory_order_seq_cst:
172 _ReadWriteBarrier();
173 AeFullSync();
174 _ReadWriteBarrier();
175 break;
176 default: assert(false);
177 }
178}
179#else
180AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN
181{
182 // Non-specialized arch, use heavier memory barriers everywhere just in case :-(
183 switch (order) {
184 case memory_order_relaxed:
185 break;
186 case memory_order_acquire:
187 _ReadBarrier();
188 AeLiteSync();
189 _ReadBarrier();
190 break;
191 case memory_order_release:
192 _WriteBarrier();
193 AeLiteSync();
194 _WriteBarrier();
195 break;
196 case memory_order_acq_rel:
197 _ReadWriteBarrier();
198 AeLiteSync();
199 _ReadWriteBarrier();
200 break;
201 case memory_order_seq_cst:
202 _ReadWriteBarrier();
203 AeFullSync();
204 _ReadWriteBarrier();
205 break;
206 default: assert(false);
207 }
208}
209#endif
210} // end namespace moodycamel
211#else
212// Use standard library of atomics
213#include <atomic>
214
215namespace moodycamel {
216
217AE_FORCEINLINE void compiler_fence(memory_order order) AE_NO_TSAN
218{
219 switch (order) {
220 case memory_order_relaxed: break;
221 case memory_order_acquire: std::atomic_signal_fence(std::memory_order_acquire); break;
222 case memory_order_release: std::atomic_signal_fence(std::memory_order_release); break;
223 case memory_order_acq_rel: std::atomic_signal_fence(std::memory_order_acq_rel); break;
224 case memory_order_seq_cst: std::atomic_signal_fence(std::memory_order_seq_cst); break;
225 default: assert(false);
226 }
227}
228
229AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN
230{
231 switch (order) {
232 case memory_order_relaxed: break;
233 case memory_order_acquire: AE_TSAN_ANNOTATE_ACQUIRE(); std::atomic_thread_fence(std::memory_order_acquire); break;
234 case memory_order_release: AE_TSAN_ANNOTATE_RELEASE(); std::atomic_thread_fence(std::memory_order_release); break;
235 case memory_order_acq_rel: AE_TSAN_ANNOTATE_ACQUIRE(); AE_TSAN_ANNOTATE_RELEASE(); std::atomic_thread_fence(std::memory_order_acq_rel); break;
236 case memory_order_seq_cst: AE_TSAN_ANNOTATE_ACQUIRE(); AE_TSAN_ANNOTATE_RELEASE(); std::atomic_thread_fence(std::memory_order_seq_cst); break;
237 default: assert(false);
238 }
239}
240
241} // end namespace moodycamel
242
243#endif
244
245
246#if !defined(AE_VCPP) || (_MSC_VER >= 1700 && !defined(__cplusplus_cli))
247#define AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
248#endif
249
250#ifdef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
251#include <atomic>
252#endif
253#include <utility>
254
255// WARNING: *NOT* A REPLACEMENT FOR std::atomic. READ CAREFULLY:
256// Provides basic support for atomic variables -- no memory ordering guarantees are provided.
257// The guarantee of atomicity is only made for types that already have atomic load and store guarantees
258// at the hardware level -- on most platforms this generally means aligned pointers and integers (only).
259namespace moodycamel {
260template<typename T>
261class weak_atomic
262{
263public:
264 AE_NO_TSAN weak_atomic() : value() { }
265#ifdef AE_VCPP
266#pragma warning(push)
267#pragma warning(disable: 4100) // Get rid of (erroneous) 'unreferenced formal parameter' warning
268#endif
269 template<typename U> AE_NO_TSAN weak_atomic(U&& x) : value(std::forward<U>(x)) { }
270#ifdef __cplusplus_cli
271 // Work around bug with universal reference/nullptr combination that only appears when /clr is on
272 AE_NO_TSAN weak_atomic(nullptr_t) : value(nullptr) { }
273#endif
274 AE_NO_TSAN weak_atomic(weak_atomic const& other) : value(other.load()) { }
275 AE_NO_TSAN weak_atomic(weak_atomic&& other) : value(std::move(other.load())) { }
276#ifdef AE_VCPP
277#pragma warning(pop)
278#endif
279
280 AE_FORCEINLINE operator T() const AE_NO_TSAN { return load(); }
281
282
283#ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
284 template<typename U> AE_FORCEINLINE weak_atomic const& operator=(U&& x) AE_NO_TSAN { value = std::forward<U>(x); return *this; }
285 AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) AE_NO_TSAN { value = other.value; return *this; }
286
287 AE_FORCEINLINE T load() const AE_NO_TSAN { return value; }
288
289 AE_FORCEINLINE T fetch_add_acquire(T increment) AE_NO_TSAN
290 {
291#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
292 if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
293#if defined(_M_AMD64)
294 else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
295#endif
296#else
297#error Unsupported platform
298#endif
299 assert(false && "T must be either a 32 or 64 bit type");
300 return value;
301 }
302
303 AE_FORCEINLINE T fetch_add_release(T increment) AE_NO_TSAN
304 {
305#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
306 if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
307#if defined(_M_AMD64)
308 else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
309#endif
310#else
311#error Unsupported platform
312#endif
313 assert(false && "T must be either a 32 or 64 bit type");
314 return value;
315 }
316#else
317 template<typename U>
318 AE_FORCEINLINE weak_atomic const& operator=(U&& x) AE_NO_TSAN
319 {
320 value.store(std::forward<U>(x), std::memory_order_relaxed);
321 return *this;
322 }
323
324 AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) AE_NO_TSAN
325 {
326 value.store(other.value.load(std::memory_order_relaxed), std::memory_order_relaxed);
327 return *this;
328 }
329
330 AE_FORCEINLINE T load() const AE_NO_TSAN { return value.load(std::memory_order_relaxed); }
331
332 AE_FORCEINLINE T fetch_add_acquire(T increment) AE_NO_TSAN
333 {
334 return value.fetch_add(increment, std::memory_order_acquire);
335 }
336
337 AE_FORCEINLINE T fetch_add_release(T increment) AE_NO_TSAN
338 {
339 return value.fetch_add(increment, std::memory_order_release);
340 }
341#endif
342
343
344private:
345#ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
346 // No std::atomic support, but still need to circumvent compiler optimizations.
347 // `volatile` will make memory access slow, but is guaranteed to be reliable.
348 volatile T value;
349#else
350 std::atomic<T> value;
351#endif
352};
353
354} // end namespace moodycamel
355
356
357
358// Portable single-producer, single-consumer semaphore below:
359
360#if defined(_WIN32)
361// Avoid including windows.h in a header; we only need a handful of
362// items, so we'll redeclare them here (this is relatively safe since
363// the API generally has to remain stable between Windows versions).
364// I know this is an ugly hack but it still beats polluting the global
365// namespace with thousands of generic names or adding a .cpp for nothing.
366extern "C" {
367 struct _SECURITY_ATTRIBUTES;
368 __declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
369 __declspec(dllimport) int __stdcall CloseHandle(void* hObject);
370 __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
371 __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
372}
373#elif defined(__MACH__)
374#include <mach/mach.h>
375#elif defined(__unix__)
376#include <semaphore.h>
377#elif defined(FREERTOS)
378#include <FreeRTOS.h>
379#include <semphr.h>
380#include <task.h>
381#endif
382
383namespace moodycamel
384{
385 // Code in the spsc_sema namespace below is an adaptation of Jeff Preshing's
386 // portable + lightweight semaphore implementations, originally from
387 // https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
388 // LICENSE:
389 // Copyright (c) 2015 Jeff Preshing
390 //
391 // This software is provided 'as-is', without any express or implied
392 // warranty. In no event will the authors be held liable for any damages
393 // arising from the use of this software.
394 //
395 // Permission is granted to anyone to use this software for any purpose,
396 // including commercial applications, and to alter it and redistribute it
397 // freely, subject to the following restrictions:
398 //
399 // 1. The origin of this software must not be misrepresented; you must not
400 // claim that you wrote the original software. If you use this software
401 // in a product, an acknowledgement in the product documentation would be
402 // appreciated but is not required.
403 // 2. Altered source versions must be plainly marked as such, and must not be
404 // misrepresented as being the original software.
405 // 3. This notice may not be removed or altered from any source distribution.
406 namespace spsc_sema
407 {
408#if defined(_WIN32)
409 class Semaphore
410 {
411 private:
412 void* m_hSema;
413
414 Semaphore(const Semaphore& other);
415 Semaphore& operator=(const Semaphore& other);
416
417 public:
418 AE_NO_TSAN Semaphore(int initialCount = 0) : m_hSema()
419 {
420 assert(initialCount >= 0);
421 const long maxLong = 0x7fffffff;
422 m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
423 assert(m_hSema);
424 }
425
426 AE_NO_TSAN ~Semaphore()
427 {
428 CloseHandle(m_hSema);
429 }
430
431 bool wait() AE_NO_TSAN
432 {
433 const unsigned long infinite = 0xffffffff;
434 return WaitForSingleObject(m_hSema, infinite) == 0;
435 }
436
437 bool try_wait() AE_NO_TSAN
438 {
439 return WaitForSingleObject(m_hSema, 0) == 0;
440 }
441
442 bool timed_wait(std::uint64_t usecs) AE_NO_TSAN
443 {
444 return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0;
445 }
446
447 void signal(int count = 1) AE_NO_TSAN
448 {
449 while (!ReleaseSemaphore(m_hSema, count, nullptr));
450 }
451 };
452#elif defined(__MACH__)
453 //---------------------------------------------------------
454 // Semaphore (Apple iOS and OSX)
455 // Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
456 //---------------------------------------------------------
457 class Semaphore
458 {
459 private:
460 semaphore_t m_sema;
461
462 Semaphore(const Semaphore& other);
463 Semaphore& operator=(const Semaphore& other);
464
465 public:
466 AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema()
467 {
468 assert(initialCount >= 0);
469 kern_return_t rc = semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
470 assert(rc == KERN_SUCCESS);
471 AE_UNUSED(rc);
472 }
473
474 AE_NO_TSAN ~Semaphore()
475 {
476 semaphore_destroy(mach_task_self(), m_sema);
477 }
478
479 bool wait() AE_NO_TSAN
480 {
481 return semaphore_wait(m_sema) == KERN_SUCCESS;
482 }
483
484 bool try_wait() AE_NO_TSAN
485 {
486 return timed_wait(0);
487 }
488
489 bool timed_wait(std::uint64_t timeout_usecs) AE_NO_TSAN
490 {
491 mach_timespec_t ts;
492 ts.tv_sec = static_cast<unsigned int>(timeout_usecs / 1000000);
493 ts.tv_nsec = static_cast<int>((timeout_usecs % 1000000) * 1000);
494
495 // added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
496 kern_return_t rc = semaphore_timedwait(m_sema, ts);
497 return rc == KERN_SUCCESS;
498 }
499
500 void signal() AE_NO_TSAN
501 {
502 while (semaphore_signal(m_sema) != KERN_SUCCESS);
503 }
504
505 void signal(int count) AE_NO_TSAN
506 {
507 while (count-- > 0)
508 {
509 while (semaphore_signal(m_sema) != KERN_SUCCESS);
510 }
511 }
512 };
513#elif defined(__unix__)
514 //---------------------------------------------------------
515 // Semaphore (POSIX, Linux)
516 //---------------------------------------------------------
517 class Semaphore
518 {
519 private:
520 sem_t m_sema;
521
522 Semaphore(const Semaphore& other);
523 Semaphore& operator=(const Semaphore& other);
524
525 public:
526 AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema()
527 {
528 assert(initialCount >= 0);
529 int rc = sem_init(&m_sema, 0, static_cast<unsigned int>(initialCount));
530 assert(rc == 0);
531 AE_UNUSED(rc);
532 }
533
534 AE_NO_TSAN ~Semaphore()
535 {
536 sem_destroy(&m_sema);
537 }
538
539 bool wait() AE_NO_TSAN
540 {
541 // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
542 int rc;
543 do
544 {
545 rc = sem_wait(&m_sema);
546 }
547 while (rc == -1 && errno == EINTR);
548 return rc == 0;
549 }
550
551 bool try_wait() AE_NO_TSAN
552 {
553 int rc;
554 do {
555 rc = sem_trywait(&m_sema);
556 } while (rc == -1 && errno == EINTR);
557 return rc == 0;
558 }
559
560 bool timed_wait(std::uint64_t usecs) AE_NO_TSAN
561 {
562 struct timespec ts;
563 const int usecs_in_1_sec = 1000000;
564 const int nsecs_in_1_sec = 1000000000;
565 clock_gettime(CLOCK_REALTIME, &ts);
566 ts.tv_sec += static_cast<time_t>(usecs / usecs_in_1_sec);
567 ts.tv_nsec += static_cast<long>(usecs % usecs_in_1_sec) * 1000;
568 // sem_timedwait bombs if you have more than 1e9 in tv_nsec
569 // so we have to clean things up before passing it in
570 if (ts.tv_nsec >= nsecs_in_1_sec) {
571 ts.tv_nsec -= nsecs_in_1_sec;
572 ++ts.tv_sec;
573 }
574
575 int rc;
576 do {
577 rc = sem_timedwait(&m_sema, &ts);
578 } while (rc == -1 && errno == EINTR);
579 return rc == 0;
580 }
581
582 void signal() AE_NO_TSAN
583 {
584 while (sem_post(&m_sema) == -1);
585 }
586
587 void signal(int count) AE_NO_TSAN
588 {
589 while (count-- > 0)
590 {
591 while (sem_post(&m_sema) == -1);
592 }
593 }
594 };
595#elif defined(FREERTOS)
596 //---------------------------------------------------------
597 // Semaphore (FreeRTOS)
598 //---------------------------------------------------------
599 class Semaphore
600 {
601 private:
602 SemaphoreHandle_t m_sema;
603
604 Semaphore(const Semaphore& other);
605 Semaphore& operator=(const Semaphore& other);
606
607 public:
608 AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema()
609 {
610 assert(initialCount >= 0);
611 m_sema = xSemaphoreCreateCounting(static_cast<UBaseType_t>(~0ull), static_cast<UBaseType_t>(initialCount));
612 assert(m_sema);
613 }
614
615 AE_NO_TSAN ~Semaphore()
616 {
617 vSemaphoreDelete(m_sema);
618 }
619
620 bool wait() AE_NO_TSAN
621 {
622 return xSemaphoreTake(m_sema, portMAX_DELAY) == pdTRUE;
623 }
624
625 bool try_wait() AE_NO_TSAN
626 {
627 // Note: In an ISR context, if this causes a task to unblock,
628 // the caller won't know about it
629 if (xPortIsInsideInterrupt())
630 return xSemaphoreTakeFromISR(m_sema, NULL) == pdTRUE;
631 return xSemaphoreTake(m_sema, 0) == pdTRUE;
632 }
633
634 bool timed_wait(std::uint64_t usecs) AE_NO_TSAN
635 {
636 std::uint64_t msecs = usecs / 1000;
637 TickType_t ticks = static_cast<TickType_t>(msecs / portTICK_PERIOD_MS);
638 if (ticks == 0)
639 return try_wait();
640 return xSemaphoreTake(m_sema, ticks) == pdTRUE;
641 }
642
643 void signal() AE_NO_TSAN
644 {
645 // Note: In an ISR context, if this causes a task to unblock,
646 // the caller won't know about it
647 BaseType_t rc;
648 if (xPortIsInsideInterrupt())
649 rc = xSemaphoreGiveFromISR(m_sema, NULL);
650 else
651 rc = xSemaphoreGive(m_sema);
652 assert(rc == pdTRUE);
653 AE_UNUSED(rc);
654 }
655
656 void signal(int count) AE_NO_TSAN
657 {
658 while (count-- > 0)
659 signal();
660 }
661 };
662#else
663#error Unsupported platform! (No semaphore wrapper available)
664#endif
665
666 //---------------------------------------------------------
667 // LightweightSemaphore
668 //---------------------------------------------------------
669 class LightweightSemaphore
670 {
671 public:
672 typedef std::make_signed<std::size_t>::type ssize_t;
673
674 private:
675 weak_atomic<ssize_t> m_count;
676 Semaphore m_sema;
677
678 bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1) AE_NO_TSAN
679 {
680 ssize_t oldCount;
681 // Is there a better way to set the initial spin count?
682 // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
683 // as threads start hitting the kernel semaphore.
684 int spin = 1024;
685 while (--spin >= 0)
686 {
687 if (m_count.load() > 0)
688 {
689 m_count.fetch_add_acquire(-1);
690 return true;
691 }
692 compiler_fence(memory_order_acquire); // Prevent the compiler from collapsing the loop.
693 }
694 oldCount = m_count.fetch_add_acquire(-1);
695 if (oldCount > 0)
696 return true;
697 if (timeout_usecs < 0)
698 {
699 if (m_sema.wait())
700 return true;
701 }
702 if (timeout_usecs > 0 && m_sema.timed_wait(static_cast<uint64_t>(timeout_usecs)))
703 return true;
704 // At this point, we've timed out waiting for the semaphore, but the
705 // count is still decremented indicating we may still be waiting on
706 // it. So we have to re-adjust the count, but only if the semaphore
707 // wasn't signaled enough times for us too since then. If it was, we
708 // need to release the semaphore too.
709 while (true)
710 {
711 oldCount = m_count.fetch_add_release(1);
712 if (oldCount < 0)
713 return false; // successfully restored things to the way they were
714 // Oh, the producer thread just signaled the semaphore after all. Try again:
715 oldCount = m_count.fetch_add_acquire(-1);
716 if (oldCount > 0 && m_sema.try_wait())
717 return true;
718 }
719 }
720
721 public:
722 AE_NO_TSAN LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount), m_sema()
723 {
724 assert(initialCount >= 0);
725 }
726
727 bool tryWait() AE_NO_TSAN
728 {
729 if (m_count.load() > 0)
730 {
731 m_count.fetch_add_acquire(-1);
732 return true;
733 }
734 return false;
735 }
736
737 bool wait() AE_NO_TSAN
738 {
739 return tryWait() || waitWithPartialSpinning();
740 }
741
742 bool wait(std::int64_t timeout_usecs) AE_NO_TSAN
743 {
744 return tryWait() || waitWithPartialSpinning(timeout_usecs);
745 }
746
747 void signal(ssize_t count = 1) AE_NO_TSAN
748 {
749 assert(count >= 0);
750 ssize_t oldCount = m_count.fetch_add_release(count);
751 assert(oldCount >= -1);
752 if (oldCount < 0)
753 {
754 m_sema.signal(1);
755 }
756 }
757
758 std::size_t availableApprox() const AE_NO_TSAN
759 {
760 ssize_t count = m_count.load();
761 return count > 0 ? static_cast<std::size_t>(count) : 0;
762 }
763 };
764 } // end namespace spsc_sema
765} // end namespace moodycamel
766
767#if defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))
768#pragma warning(pop)
769#ifdef __cplusplus_cli
770#pragma managed(pop)
771#endif
772#endif
Definition atomicops.h:262