This is an automated email from the git hooks/post-receive script.
pierov pushed a commit to branch tor-browser-102.4.0esr-12.0-1 in repository tor-browser.
commit 8574b473f461f2a5008cf1dbf2a00d53b5f9adde Author: Gabriele Svelto gsvelto@mozilla.com AuthorDate: Wed Aug 24 09:18:57 2022 +0000
Bug 1784018 - Remove deprecated OSSpinLocks r=glandium
On macOS versions prior to 10.15 os_unfair_locks cannot spin in kernel-space which degrades performance significantly. To obviate for this we spin in user-space like OSSpinLock does, for the same number of times and invoking x86-specific pause instructions in-between the locking attempts to avoid starving a thread that might be running on the same physical core.
Differential Revision: https://phabricator.services.mozilla.com/D154205 --- memory/build/Mutex.cpp | 4 +-- memory/build/Mutex.h | 82 +++++++++++++++++++++++++++----------------------- 2 files changed, 47 insertions(+), 39 deletions(-)
diff --git a/memory/build/Mutex.cpp b/memory/build/Mutex.cpp index 5a7c6d46e99c..8bc69635efa5 100644 --- a/memory/build/Mutex.cpp +++ b/memory/build/Mutex.cpp @@ -7,7 +7,7 @@ #if defined(XP_DARWIN)
// static -bool Mutex::UseUnfairLocks() { +bool Mutex::SpinInKernelSpace() { if (__builtin_available(macOS 10.15, *)) { return true; } @@ -16,6 +16,6 @@ bool Mutex::UseUnfairLocks() { }
// static -bool Mutex::gFallbackToOSSpinLock = !UseUnfairLocks(); +bool Mutex::gSpinInKernelSpace = SpinInKernelSpace();
#endif // defined(XP_DARWIN) diff --git a/memory/build/Mutex.h b/memory/build/Mutex.h index 3827a8a6757f..3a86d0e2e75d 100644 --- a/memory/build/Mutex.h +++ b/memory/build/Mutex.h @@ -10,7 +10,7 @@ #if defined(XP_WIN) # include <windows.h> #elif defined(XP_DARWIN) -# include <libkern/OSAtomic.h> +# include "mozilla/Assertions.h" # include <os/lock.h> #else # include <pthread.h> @@ -31,12 +31,6 @@ OS_UNFAIR_LOCK_AVAILABILITY OS_EXPORT OS_NOTHROW OS_NONNULL_ALL void os_unfair_lock_lock_with_options( os_unfair_lock_t lock, os_unfair_lock_options_t options); } - -static_assert(OS_UNFAIR_LOCK_INIT._os_unfair_lock_opaque == OS_SPINLOCK_INIT, - "OS_UNFAIR_LOCK_INIT and OS_SPINLOCK_INIT have the same " - "value"); -static_assert(sizeof(os_unfair_lock) == sizeof(OSSpinLock), - "os_unfair_lock and OSSpinLock are the same size"); #endif // defined(XP_DARWIN)
// Mutexes based on spinlocks. We can't use normal pthread spinlocks in all @@ -48,10 +42,7 @@ struct Mutex { #if defined(XP_WIN) CRITICAL_SECTION mMutex; #elif defined(XP_DARWIN) - union { - os_unfair_lock mUnfairLock; - OSSpinLock mSpinLock; - } mMutex; + os_unfair_lock mMutex; #else pthread_mutex_t mMutex; #endif @@ -63,10 +54,7 @@ struct Mutex { return false; } #elif defined(XP_DARWIN) - // The hack below works because both OS_UNFAIR_LOCK_INIT and - // OS_SPINLOCK_INIT initialize the lock to 0 and in both case it's a 32-bit - // integer. - mMutex.mUnfairLock = OS_UNFAIR_LOCK_INIT; + mMutex = OS_UNFAIR_LOCK_INIT; #elif defined(XP_LINUX) && !defined(ANDROID) pthread_mutexattr_t attr; if (pthread_mutexattr_init(&attr) != 0) { @@ -90,19 +78,46 @@ struct Mutex { #if defined(XP_WIN) EnterCriticalSection(&mMutex); #elif defined(XP_DARWIN) - if (Mutex::gFallbackToOSSpinLock) { - OSSpinLockLock(&mMutex.mSpinLock); - } else { - // We rely on a non-public function to improve performance here. - // The OS_UNFAIR_LOCK_DATA_SYNCHRONIZATION flag informs the kernel that - // the calling thread is able to make progress even in absence of actions - // from other threads and the OS_UNFAIR_LOCK_ADAPTIVE_SPIN one causes the - // kernel to spin on a contested lock if the owning thread is running on - // the same physical core (presumably only on x86 CPUs given that ARM - // macs don't have cores capable of SMT). + // We rely on a non-public function to improve performance here. + // The OS_UNFAIR_LOCK_DATA_SYNCHRONIZATION flag informs the kernel that + // the calling thread is able to make progress even in absence of actions + // from other threads and the OS_UNFAIR_LOCK_ADAPTIVE_SPIN one causes the + // kernel to spin on a contested lock if the owning thread is running on + // the same physical core (presumably only on x86 CPUs given that ARM + // macs don't have cores capable of SMT). On versions of macOS older than + // 10.15 the latter is not available and we spin in userspace instead. + if (Mutex::gSpinInKernelSpace) { os_unfair_lock_lock_with_options( - &mMutex.mUnfairLock, + &mMutex, OS_UNFAIR_LOCK_DATA_SYNCHRONIZATION | OS_UNFAIR_LOCK_ADAPTIVE_SPIN); + } else { +# if defined(__x86_64__) + // On older versions of macOS (10.14 and older) the + // `OS_UNFAIR_LOCK_ADAPTIVE_SPIN` flag is not supported by the kernel, + // we spin in user-space instead like `OSSpinLock` does: + // https://github.com/apple/darwin-libplatform/blob/215b09856ab5765b7462a91be70... + // Note that `OSSpinLock` uses 1000 iterations on x86-64: + // https://github.com/apple/darwin-libplatform/blob/215b09856ab5765b7462a91be70... + // ...but we only use 100 like it does on ARM: + // https://github.com/apple/darwin-libplatform/blob/215b09856ab5765b7462a91be70... + // We choose this value because it yields the same results in our + // benchmarks but is less likely to have detrimental effects caused by + // excessive spinning. + uint32_t retries = 100; + + do { + if (os_unfair_lock_trylock(&mMutex)) { + return; + } + + __asm__ __volatile__("pause"); + } while (retries--); + + os_unfair_lock_lock_with_options(&mMutex, + OS_UNFAIR_LOCK_DATA_SYNCHRONIZATION); +# else + MOZ_CRASH("User-space spin-locks should never be used on ARM"); +# endif // defined(__x86_64__) } #else pthread_mutex_lock(&mMutex); @@ -113,19 +128,15 @@ struct Mutex { #if defined(XP_WIN) LeaveCriticalSection(&mMutex); #elif defined(XP_DARWIN) - if (Mutex::gFallbackToOSSpinLock) { - OSSpinLockUnlock(&mMutex.mSpinLock); - } else { - os_unfair_lock_unlock(&mMutex.mUnfairLock); - } + os_unfair_lock_unlock(&mMutex); #else pthread_mutex_unlock(&mMutex); #endif }
#if defined(XP_DARWIN) - static bool UseUnfairLocks(); - static bool gFallbackToOSSpinLock; + static bool SpinInKernelSpace(); + static bool gSpinInKernelSpace; #endif // XP_DARWIN };
@@ -153,10 +164,7 @@ struct StaticMutex { typedef Mutex StaticMutex;
# if defined(XP_DARWIN) -// The hack below works because both OS_UNFAIR_LOCK_INIT and OS_SPINLOCK_INIT -// initialize the lock to 0 and in both case it's a 32-bit integer. -# define STATIC_MUTEX_INIT \ - { .mUnfairLock = OS_UNFAIR_LOCK_INIT } +# define STATIC_MUTEX_INIT OS_UNFAIR_LOCK_INIT # elif defined(XP_LINUX) && !defined(ANDROID) # define STATIC_MUTEX_INIT PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP # else