Short live q20::atomic_wait!

atomic_wait() and atomic_notify_{one,all}() are available in C++20, which we can't depend on right now. So we implement our own fallback implementation. This is a simple implementation for systems which don't have either futexes or the C++20 atomic wait API. That means it's not very efficient, just simple. The unit test tests the fallback implementation only: it is not Qt's business to test the Standard Library. The fallback implementation and the Standard Library's are not binary-compatible and cannot be mixed. Callers must ensure that all sides use the same implementation and the easiest way to do that is to only use this in non-inline code, or at worst inline code that isn't shared across libraries/modules/plugins. Change-Id: Ib5ce7a497e034ebabb2cfffd1761a0e497dd17d4 Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
author: Thiago Macieira <thiago.macieira@intel.com> 2023-05-22 18:17:19 -0700
committer: Thiago Macieira <thiago.macieira@intel.com> 2025-06-27 07:50:31 -0700
commit: b540732975db81fa327663069af3fc1c4311d8b9 (patch)
tree: 37439612c9c1e752093f65a0e34ad98bf8f24d07 /src/corelib/thread/qatomicwait.cpp
parent: 286bb6c32bbf892d31e8aa54b492eade02e0ede3 (diff)
1 files changed, 164 insertions, 0 deletions
diff --git a/src/corelib/thread/qatomicwait.cpp b/src/corelib/thread/qatomicwait.cpp
new file mode 100644
index 00000000000..3d70333ca7b
--- /dev/null
+++ b/src/corelib/thread/qatomicwait.cpp
@@ -0,0 +1,164 @@
+// Copyright (C) 2025 Intel Corporation.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
+
+#include "qatomicwait_p.h"
+
+#include <qendian.h>
+#include "qwaitcondition_p.h"
+
+#include <array>
+#include <condition_variable>
+#include <mutex>
+
+QT_BEGIN_NAMESPACE
+
+/*
+ * Implementation details:
+ *
+ * Normally, a call from atomic_notify_one() or atomic_notify_all() corresponds
+ * to a call to cond.notify_one() or cond.notify_all(). This simple
+ * implementation would require that we keep a distinct wait condition variable
+ * per atomic variable, of which there could be an arbitrary number.
+ *
+ * Instead, we have a limited set of std::condition_variable, which are
+ * selected based on the address of the variable being waited/notified on.
+ * Because multiple, distinct variables could be sharing the same condition
+ * variable, we need to perform a cond.notify_all() call in case the lock is
+ * contended by waiters to more than one address, as we can't be sure which one
+ * would be woken up by the cond.notify_one() call.
+ *
+ * We recover some of the performance of notifying a single waker by also
+ * storing the address of the variable that was being waited on. If it matches
+ * the address of the variable being notified, we can perform a notify_one()
+ * call. This also allows us to avoid any system call in case no waiter has yet
+ * joined the queue. In case of contention, we store a sentinel address
+ * instead, indicating there are waiters to multiple variables. The last waiter
+ * to leave a wait queue is responsible for resetting the watched address back
+ * to nullptr.
+ *
+ * Performance details:
+ *
+ * This implementation is designed for systems where neither the Standard
+ * Library's own std::atomic_wait nor operating system futexes are available
+ * (read: usually, not the systems Qt is used most frequently on). Therefore,
+ * it's designed for simplicity, not performance. Simplifications applied
+ * include:
+ * - the total number of possible condition variables
+ * - the hashing algorithm to select a condition variable (simple XOR)
+ * - no attempt to wait for a variable change before calling cond.wait()
+ *   (no spinning, no HW-assisted wait)
+ *
+ * Other limitations:
+ *
+ * We only support 8-, 16-, 32- and 64-bit variables and we require bit-exact
+ * comparisons for all bits. This means we do not support objects with padding
+ * bits or without unique representations.
+ */
+
+namespace {
+struct QAtomicWaitLocks
+{
+    // Sentinel address used to indicate this Lock is being used by waiters to
+    // multiple addresses, implying we must make a notify_all() call.
+    static void *contendedWatchAddress() { return reinterpret_cast<void *>(-1); }
+
+    struct Lock {
+        alignas(QtPrivate::IdealMutexAlignment) std::mutex mutex;
+        alignas(QtPrivate::IdealMutexAlignment) std::condition_variable cond;
+
+        // Can assume values:
+        // - nullptr: no waiter is waiting
+        // - contendedWatchAddress(): waiters to distinct addresses
+        // - any other value: all waiters are waiting on the same address
+        const void *watchedAddress = nullptr;
+        qsizetype watcherCount = 0;
+    };
+
+    static constexpr int LockCount = 16;
+    std::array<Lock, LockCount> locks;
+
+    int indexFor(const void *ptr)
+    {
+        static_assert((LockCount & (LockCount - 1)) == 0, "LockCount is not a power of two");
+
+        quintptr value = quintptr(ptr) / sizeof(int);
+        quintptr idx = value % LockCount;
+#ifndef QATOMICWAIT_USE_FALLBACK
+        // XOR some higher bits too to avoid hashing collisions
+        // (we don't do it in the unit test because we *want* those collisions)
+        idx ^= (value / LockCount) % LockCount;
+        idx ^= (value / LockCount / LockCount) % LockCount;
+#endif
+        return int(idx);
+    }
+
+    Lock &lockFor(const void *ptr) { return locks[indexFor(ptr)]; }
+};
+} // unnamed namespace
+
+static QAtomicWaitLocks &atomicLocks() noexcept
+{
+    static QAtomicWaitLocks global {};
+    return global;
+}
+
+template <typename T> static bool isEqual(const void *address, const void *old)
+{
+    auto atomic = static_cast<const std::atomic<T> *>(address);
+    auto expected = static_cast<const T *>(old);
+    return atomic->load(std::memory_order_relaxed) == *expected;
+}
+
+static bool isEqual(const void *address, const void *old, qsizetype size) noexcept
+{
+    switch (size) {
+    case 1: return isEqual<quint8 >(address, old);
+    case 2: return isEqual<quint16>(address, old);
+    case 4: return isEqual<quint32>(address, old);
+    case 8: return isEqual<quint64>(address, old);
+    }
+    Q_UNREACHABLE_RETURN(false);
+}
+
+void QtPrivate::_q_atomicWait(const void *address, const void *old, qsizetype size) noexcept
+{
+    auto &locker = atomicLocks().lockFor(address);
+
+    // NOT noexcept; we'll terminate if locking the mutex fails
+    std::unique_lock lock(locker.mutex);
+
+    // is the value still current?
+    if (!isEqual(address, old, size))
+        return;                     // no, failed to wait
+
+    if (locker.watchedAddress && locker.watchedAddress != address)
+        locker.watchedAddress = QAtomicWaitLocks::contendedWatchAddress();
+    else
+        locker.watchedAddress = address;
+    ++locker.watcherCount;
+
+    do {
+        locker.cond.wait(lock);
+    } while (isEqual(address, old, size));
+
+    if (--locker.watcherCount == 0)
+        locker.watchedAddress = nullptr;
+}
+
+void QtPrivate::_q_atomicWake(void *address, WakeMode mode) noexcept
+{
+    auto &locker = atomicLocks().lockFor(address);
+
+    // NOT noexcept; we'll terminate if locking the mutex fails
+    std::unique_lock lock(locker.mutex);
+
+    // can we wake just one?
+    if (mode == WakeMode::One && locker.watchedAddress == address)
+        locker.cond.notify_one();
+    else if (locker.watchedAddress != nullptr)
+        locker.cond.notify_all();
+    else
+        qt_noop();          // no one was waiting
+}
+
+QT_END_NAMESPACE
author	Thiago Macieira <thiago.macieira@intel.com>	2023-05-22 18:17:19 -0700
committer	Thiago Macieira <thiago.macieira@intel.com>	2025-06-27 07:50:31 -0700
commit	b540732975db81fa327663069af3fc1c4311d8b9 (patch)
tree	37439612c9c1e752093f65a0e34ad98bf8f24d07 /src/corelib/thread/qatomicwait.cpp
parent	286bb6c32bbf892d31e8aa54b492eade02e0ede3 (diff)