This is an automated email from the git hooks/post-receive script.
richard pushed a commit to branch geckoview-102.3.0esr-12.0-1 in repository tor-browser.
commit 257982b707fbe6676ab7b9f0b7771f90a7f60101 Author: Ryan Hunt rhunt@eqrion.net AuthorDate: Thu Jul 28 13:27:02 2022 +0000
Bug 1777604 - wasm: Move membarrier call to separate functions. r=nbp, a=RyanVM
Differential Revision: https://phabricator.services.mozilla.com/D152305 --- js/src/jit/AutoWritableJitCode.h | 3 +- js/src/jit/ExecutableAllocator.h | 15 +-- js/src/jit/FlushICache.cpp | 132 +++++++++++++++++++++ js/src/jit/FlushICache.h | 33 ++++-- js/src/jit/ProcessExecutableMemory.cpp | 6 +- js/src/jit/ProcessExecutableMemory.h | 10 +- js/src/jit/arm/Architecture-arm.cpp | 2 +- js/src/jit/arm64/Architecture-arm64.cpp | 8 +- js/src/jit/arm64/vixl/Cpu-vixl.h | 6 +- js/src/jit/arm64/vixl/MozCpu-vixl.cpp | 112 ++--------------- js/src/jit/loong64/Architecture-loong64.cpp | 2 +- .../jit/mips-shared/Architecture-mips-shared.cpp | 2 +- js/src/jit/moz.build | 1 + js/src/jsapi-tests/testsJit.cpp | 4 +- js/src/wasm/WasmBuiltins.cpp | 5 +- js/src/wasm/WasmCode.cpp | 42 ++----- js/src/wasm/WasmCode.h | 10 +- js/src/wasm/WasmCompile.cpp | 25 ++-- js/src/wasm/WasmModule.cpp | 11 +- 19 files changed, 221 insertions(+), 208 deletions(-)
diff --git a/js/src/jit/AutoWritableJitCode.h b/js/src/jit/AutoWritableJitCode.h index 67fa84c2dbd61..ab5b35a54f763 100644 --- a/js/src/jit/AutoWritableJitCode.h +++ b/js/src/jit/AutoWritableJitCode.h @@ -59,8 +59,7 @@ class MOZ_RAII AutoWritableJitCodeFallible { } });
- if (!ExecutableAllocator::makeExecutableAndFlushICache( - FlushICacheSpec::LocalThreadOnly, addr_, size_)) { + if (!ExecutableAllocator::makeExecutableAndFlushICache(addr_, size_)) { MOZ_CRASH(); } rt_->toggleAutoWritableJitCodeActive(false); diff --git a/js/src/jit/ExecutableAllocator.h b/js/src/jit/ExecutableAllocator.h index 266c7af4b8548..85c01562c373a 100644 --- a/js/src/jit/ExecutableAllocator.h +++ b/js/src/jit/ExecutableAllocator.h @@ -172,19 +172,10 @@ class ExecutableAllocator { MustFlushICache::No); }
- [[nodiscard]] static bool makeExecutableAndFlushICache( - FlushICacheSpec flushSpec, void* start, size_t size) { - MustFlushICache mustFlushICache; - switch (flushSpec) { - case FlushICacheSpec::LocalThreadOnly: - mustFlushICache = MustFlushICache::LocalThreadOnly; - break; - case FlushICacheSpec::AllThreads: - mustFlushICache = MustFlushICache::AllThreads; - break; - } + [[nodiscard]] static bool makeExecutableAndFlushICache(void* start, + size_t size) { return ReprotectRegion(start, size, ProtectionSetting::Executable, - mustFlushICache); + MustFlushICache::Yes); }
static void poisonCode(JSRuntime* rt, JitPoisonRangeVector& ranges); diff --git a/js/src/jit/FlushICache.cpp b/js/src/jit/FlushICache.cpp new file mode 100644 index 0000000000000..1e2ec69272fe8 --- /dev/null +++ b/js/src/jit/FlushICache.cpp @@ -0,0 +1,132 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/FlushICache.h" + +#ifdef JS_CODEGEN_ARM64 +# include "jit/arm64/vixl/MozCachingDecoder.h" +# include "jit/arm64/vixl/Simulator-vixl.h" +#endif + +#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) + +# ifdef __linux__ +# include <linux/version.h> +# define LINUX_HAS_MEMBARRIER (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)) +# else +# define LINUX_HAS_MEMBARRIER 0 +# endif + +# if LINUX_HAS_MEMBARRIER || defined(__android__) +# include <string.h> + +# if LINUX_HAS_MEMBARRIER +# include <linux/membarrier.h> +# include <sys/syscall.h> +# include <sys/utsname.h> +# include <unistd.h> +# elif defined(__android__) +# include <sys/syscall.h> +# include <unistd.h> +# else +# error "Missing platform-specific declarations for membarrier syscall!" +# endif // __linux__ / ANDROID + +static int membarrier(int cmd, int flags) { + return syscall(__NR_membarrier, cmd, flags); +} + +// These definitions come from the Linux kernel source, for kernels before 4.16 +// which didn't have access to these membarrier commands. +# ifndef MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE +# define MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE (1 << 5) +# endif + +# ifndef MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE +# define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE (1 << 6) +# endif +# endif // LINUX_HAS_MEMBARRIER || defined(__android__) + +using namespace js; +using namespace js::jit; + +namespace js { +namespace jit { + +bool CanFlushExecutionContextForAllThreads() { +# if (LINUX_HAS_MEMBARRIER || defined(__android__)) + // On linux, check the kernel supports membarrier(2), that is, it's a kernel + // above Linux 4.16 included. + // + // Note: this code has been extracted (August 2020) from + // https://android.googlesource.com/platform/art/+/58520dfba31d6eeef75f5babff15... + static constexpr int kRequiredMajor = 4; + static constexpr int kRequiredMinor = 16; + + static bool computed = false; + static bool kernelHasMembarrier = false; + + if (computed) { + return kernelHasMembarrier; + } + + struct utsname uts; + int major, minor; + kernelHasMembarrier = uname(&uts) == 0 && strcmp(uts.sysname, "Linux") == 0 && + sscanf(uts.release, "%d.%d", &major, &minor) == 2 && + major >= kRequiredMajor && + (major != kRequiredMajor || minor >= kRequiredMinor); + + // As a test bed, try to run the syscall with the command registering the + // intent to use the actual membarrier we'll want to carry out later. + // + // IMPORTANT: This is required or else running the membarrier later won't + // actually interrupt the threads in this process. + if (kernelHasMembarrier && + membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, 0) != 0) { + kernelHasMembarrier = false; + } + + computed = true; + return kernelHasMembarrier; +# else + // On other platforms, we assume that the syscall for flushing the icache + // will flush the execution context for other cores. + return true; +# endif +} + +void FlushExecutionContextForAllThreads() { + // Callers must check that this operation is available. + MOZ_RELEASE_ASSERT(CanFlushExecutionContextForAllThreads()); + +# if defined(JS_SIMULATOR_ARM64) && defined(JS_CACHE_SIMULATOR_ARM64) + // Emulate what the real hardware would do by emitting a membarrier that'll + // interrupt and flush the execution context of all threads. + using js::jit::SimulatorProcess; + js::jit::AutoLockSimulatorCache alsc; + SimulatorProcess::membarrier(); +# elif (LINUX_HAS_MEMBARRIER || defined(__android__)) + // The caller has checked this can be performed, which will have registered + // this process to receive the membarrier. See above. + // + // membarrier will trigger an inter-processor-interrupt on any active threads + // of this process. This is an execution context synchronization event + // equivalent to running an `isb` instruction. + if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, 0) != 0) { + // Better safe than sorry. + MOZ_CRASH("membarrier can't be executed"); + } +# else + // On other platforms, we assume that the syscall for flushing the icache + // will flush the execution context for other cores. +# endif +} + +} // namespace jit +} // namespace js + +#endif diff --git a/js/src/jit/FlushICache.h b/js/src/jit/FlushICache.h index 6c780e43e8665..6ef08c63d3acd 100644 --- a/js/src/jit/FlushICache.h +++ b/js/src/jit/FlushICache.h @@ -18,8 +18,7 @@ namespace jit {
#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
-inline void FlushICache(void* code, size_t size, - bool codeIsThreadLocal = true) { +inline void FlushICache(void* code, size_t size) { // No-op. Code and data caches are coherent on x86 and x64. }
@@ -27,14 +26,15 @@ inline void FlushICache(void* code, size_t size, (defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)) || \ defined(JS_CODEGEN_LOONG64)
-extern void FlushICache(void* code, size_t size, bool codeIsThreadLocal = true); +// Invalidate the given code range from the icache. This will also flush the +// execution context for this core. If this code is to be executed on another +// thread, that thread must perform an execution context flush first using +// `FlushExecutionContext` below. +extern void FlushICache(void* code, size_t size);
#elif defined(JS_CODEGEN_NONE)
-inline void FlushICache(void* code, size_t size, - bool codeIsThreadLocal = true) { - MOZ_CRASH(); -} +inline void FlushICache(void* code, size_t size) { MOZ_CRASH(); }
#else # error "Unknown architecture!" @@ -47,10 +47,16 @@ inline void FlushICache(void* code, size_t size, inline void FlushExecutionContext() { // No-op. Execution context is coherent with instruction cache. } +inline bool CanFlushExecutionContextForAllThreads() { return true; } +inline void FlushExecutionContextForAllThreads() { + // No-op. Execution context is coherent with instruction cache. +}
#elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
inline void FlushExecutionContext() { MOZ_CRASH(); } +inline bool CanFlushExecutionContextForAllThreads() { MOZ_CRASH(); } +inline void FlushExecutionContextForAllThreads() { MOZ_CRASH(); }
#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
@@ -63,6 +69,19 @@ inline void FlushExecutionContext() { MOZ_CRASH(); } // this method. extern void FlushExecutionContext();
+// Some platforms can flush the excecution context for other threads using a +// syscall. This is required when JIT'ed code will be published to multiple +// threads without a synchronization point where a `FlushExecutionContext` +// could be inserted. +extern bool CanFlushExecutionContextForAllThreads(); + +// Flushes the execution context of all threads in this process, equivalent to +// running `FlushExecutionContext` on every thread. +// +// Callers must ensure `CanFlushExecutionContextForAllThreads` is true, or +// else this will crash. +extern void FlushExecutionContextForAllThreads(); + #else # error "Unknown architecture!" #endif diff --git a/js/src/jit/ProcessExecutableMemory.cpp b/js/src/jit/ProcessExecutableMemory.cpp index 5ea4b2e4ca5b4..2085a4802c232 100644 --- a/js/src/jit/ProcessExecutableMemory.cpp +++ b/js/src/jit/ProcessExecutableMemory.cpp @@ -749,11 +749,9 @@ bool js::jit::ReprotectRegion(void* start, size_t size, ProtectionSetting protection, MustFlushICache flushICache) { // Flush ICache when making code executable, before we modify |size|. - if (flushICache == MustFlushICache::LocalThreadOnly || - flushICache == MustFlushICache::AllThreads) { + if (flushICache == MustFlushICache::Yes) { MOZ_ASSERT(protection == ProtectionSetting::Executable); - bool codeIsThreadLocal = flushICache == MustFlushICache::LocalThreadOnly; - jit::FlushICache(start, size, codeIsThreadLocal); + jit::FlushICache(start, size); }
// Calculate the start of the page containing this region, diff --git a/js/src/jit/ProcessExecutableMemory.h b/js/src/jit/ProcessExecutableMemory.h index de2109f5310cc..51747634f38ee 100644 --- a/js/src/jit/ProcessExecutableMemory.h +++ b/js/src/jit/ProcessExecutableMemory.h @@ -68,15 +68,9 @@ enum class ProtectionSetting { Executable, };
-/// Whether the instruction cache must be flushed: -//- No means no flushing will happen. -//- LocalThreadOnly means only the local thread's icache will be flushed. -//- AllThreads means all the threads' icaches will be flushed; this must be used -// when the compiling thread and the executing thread might be different. +/// Whether the instruction cache must be flushed
-enum class MustFlushICache { No, LocalThreadOnly, AllThreads }; - -enum class FlushICacheSpec { LocalThreadOnly, AllThreads }; +enum class MustFlushICache { No, Yes };
[[nodiscard]] extern bool ReprotectRegion(void* start, size_t size, ProtectionSetting protection, diff --git a/js/src/jit/arm/Architecture-arm.cpp b/js/src/jit/arm/Architecture-arm.cpp index 2491c67350146..687c45f8f7373 100644 --- a/js/src/jit/arm/Architecture-arm.cpp +++ b/js/src/jit/arm/Architecture-arm.cpp @@ -481,7 +481,7 @@ uint32_t FloatRegisters::ActualTotalPhys() { return 16; }
-void FlushICache(void* code, size_t size, bool codeIsThreadLocal) { +void FlushICache(void* code, size_t size) { #if defined(JS_SIMULATOR_ARM) js::jit::SimulatorProcess::FlushICache(code, size);
diff --git a/js/src/jit/arm64/Architecture-arm64.cpp b/js/src/jit/arm64/Architecture-arm64.cpp index f95c0231d84d3..eb3dd67b1a9b8 100644 --- a/js/src/jit/arm64/Architecture-arm64.cpp +++ b/js/src/jit/arm64/Architecture-arm64.cpp @@ -119,12 +119,8 @@ uint32_t GetARM64Flags() { return 0; } // computed". bool CPUFlagsHaveBeenComputed() { return true; }
-void FlushICache(void* code, size_t size, bool codeIsThreadLocal) { - vixl::CPU::EnsureIAndDCacheCoherency(code, size, codeIsThreadLocal); -} - -bool CanFlushICacheFromBackgroundThreads() { - return vixl::CPU::CanFlushICacheFromBackgroundThreads(); +void FlushICache(void* code, size_t size) { + vixl::CPU::EnsureIAndDCacheCoherency(code, size); }
void FlushExecutionContext() { vixl::CPU::FlushExecutionContext(); } diff --git a/js/src/jit/arm64/vixl/Cpu-vixl.h b/js/src/jit/arm64/vixl/Cpu-vixl.h index ac709bccbf2a6..4db51aad6b473 100644 --- a/js/src/jit/arm64/vixl/Cpu-vixl.h +++ b/js/src/jit/arm64/vixl/Cpu-vixl.h @@ -165,11 +165,7 @@ class CPU { // the I and D caches. I and D caches are not automatically coherent on ARM // so this operation is required before any dynamically generated code can // safely run. - static void EnsureIAndDCacheCoherency(void *address, size_t length, bool codeIsThreadLocal); - - // Returns true when the current machine supports flushing the instruction - // cache on a background thread. - static bool CanFlushICacheFromBackgroundThreads(); + static void EnsureIAndDCacheCoherency(void* address, size_t length);
// Flush the local instruction pipeline, forcing a reload of any instructions // beyond this barrier from the icache. diff --git a/js/src/jit/arm64/vixl/MozCpu-vixl.cpp b/js/src/jit/arm64/vixl/MozCpu-vixl.cpp index ad96098501679..909cc590aeb78 100644 --- a/js/src/jit/arm64/vixl/MozCpu-vixl.cpp +++ b/js/src/jit/arm64/vixl/MozCpu-vixl.cpp @@ -33,40 +33,8 @@ # include <libkern/OSCacheControl.h> #endif
-#if defined(__aarch64__) && (defined(__linux__) || defined(__android__)) -# if defined(__linux__) -# include <linux/membarrier.h> -# include <sys/syscall.h> -# include <sys/utsname.h> -# include <unistd.h> -# elif defined(__ANDROID__) -# include <sys/syscall.h> -# include <unistd.h> -# else -# error "Missing platform-specific declarations for membarrier syscall!" -# endif // __linux__ / ANDROID - -# include "vm/JSContext.h" // TlsContext - -static int membarrier(int cmd, int flags) { - return syscall(__NR_membarrier, cmd, flags); -} - -// These definitions come from the Linux kernel source, for kernels before 4.16 -// which didn't have access to these membarrier commands. -# ifndef MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE -# define MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE (1 << 5) -# endif - -# ifndef MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE -# define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE (1 << 6) -# endif - -#endif // __aarch64__ - namespace vixl {
- // Currently computes I and D cache line size. void CPU::SetUp() { uint32_t cache_type_register = GetCacheType(); @@ -115,45 +83,7 @@ uint32_t CPU::GetCacheType() { #endif }
-bool CPU::CanFlushICacheFromBackgroundThreads() { -#if defined(__aarch64__) && (defined(__linux__) || defined(__android__)) - // On linux, check the kernel supports membarrier(2), that is, it's a kernel - // above Linux 4.16 included. - // - // Note: this code has been extracted (August 2020) from - // https://android.googlesource.com/platform/art/+/58520dfba31d6eeef75f5babff15... - static constexpr int kRequiredMajor = 4; - static constexpr int kRequiredMinor = 16; - - static bool computed = false; - static bool kernelHasMembarrier = false; - - if (!computed) { - struct utsname uts; - int major, minor; - kernelHasMembarrier = uname(&uts) == 0 && - strcmp(uts.sysname, "Linux") == 0 && - sscanf(uts.release, "%d.%d", &major, &minor) == 2 && - major >= kRequiredMajor && (major != kRequiredMajor || minor >= kRequiredMinor); - - // As a test bed, try to run the syscall with the command registering the - // intent to use the actual membarrier we'll want to carry out later. - if (kernelHasMembarrier && - membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, 0) != 0) { - kernelHasMembarrier = false; - } - - computed = true; - } - - return kernelHasMembarrier; -#else - // On other platforms, we assume that the provided syscall does the right thing. - return true; -#endif -} - -void CPU::EnsureIAndDCacheCoherency(void *address, size_t length, bool codeIsThreadLocal) { +void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) { #if defined(JS_SIMULATOR_ARM64) && defined(JS_CACHE_SIMULATOR_ARM64) // This code attempts to emulate what the following assembly sequence is // doing, which is sending the information to all cores that some cache line @@ -175,11 +105,6 @@ void CPU::EnsureIAndDCacheCoherency(void *address, size_t length, bool codeIsThr Simulator* sim = vixl::Simulator::Current(); if (sim) { sim->FlushICache(); - } else if (!codeIsThreadLocal) { - // We're on a background thread; emulate what the real hardware would do by - // emitting a membarrier that'll interrupt and cause an icache invalidation - // on all the threads. - SimulatorProcess::membarrier(); } #elif defined(_MSC_VER) && defined(_M_ARM64) FlushInstructionCache(GetCurrentProcess(), address, length); @@ -262,31 +187,18 @@ void CPU::EnsureIAndDCacheCoherency(void *address, size_t length, bool codeIsThr iline += isize; } while (iline < end);
- __asm__ __volatile__ ( - // Make sure that the instruction cache operations (above) take effect - // before the isb (below). - " dsb ish\n" - - // Ensure that any instructions already in the pipeline are discarded and - // reloaded from the new data. - // isb : Instruction Synchronisation Barrier - " isb\n" - : : : "memory"); + __asm__ __volatile__( + // Make sure that the instruction cache operations (above) take effect + // before the isb (below). + " dsb ish\n"
- if (!codeIsThreadLocal) { - // If we're on a background thread, emit a membarrier that will synchronize - // all the executing threads with the new version of the code. - JSContext* cx = js::TlsContext.get(); - if (!cx || !cx->isMainThreadContext()) { - MOZ_RELEASE_ASSERT(CPU::CanFlushICacheFromBackgroundThreads()); - // The intent to use this command has been carried over in - // CanFlushICacheFromBackgroundThreads. - if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, 0) != 0) { - // Better safe than sorry. - MOZ_CRASH("membarrier can't be executed"); - } - } - } + // Ensure that any instructions already in the pipeline are discarded and + // reloaded from the new data. + // isb : Instruction Synchronisation Barrier + " isb\n" + : + : + : "memory"); #else // If the host isn't AArch64, we must be using the simulator, so this function // doesn't have to do anything. diff --git a/js/src/jit/loong64/Architecture-loong64.cpp b/js/src/jit/loong64/Architecture-loong64.cpp index d838eac16b8f4..6b1069a592ac2 100644 --- a/js/src/jit/loong64/Architecture-loong64.cpp +++ b/js/src/jit/loong64/Architecture-loong64.cpp @@ -68,7 +68,7 @@ bool CPUFlagsHaveBeenComputed() {
uint32_t GetLOONG64Flags() { return 0; }
-void FlushICache(void* code, size_t size, bool codeIsThreadLocal) { +void FlushICache(void* code, size_t size) { #if defined(JS_SIMULATOR) js::jit::SimulatorProcess::FlushICache(code, size);
diff --git a/js/src/jit/mips-shared/Architecture-mips-shared.cpp b/js/src/jit/mips-shared/Architecture-mips-shared.cpp index ed56ed72502e0..e3017adb4f70b 100644 --- a/js/src/jit/mips-shared/Architecture-mips-shared.cpp +++ b/js/src/jit/mips-shared/Architecture-mips-shared.cpp @@ -87,7 +87,7 @@ Registers::Code Registers::FromName(const char* name) { return Invalid; }
-void FlushICache(void* code, size_t size, bool codeIsThreadLocal) { +void FlushICache(void* code, size_t size) { #if defined(JS_SIMULATOR) js::jit::SimulatorProcess::FlushICache(code, size);
diff --git a/js/src/jit/moz.build b/js/src/jit/moz.build index 69af98f953beb..4f2765c060227 100644 --- a/js/src/jit/moz.build +++ b/js/src/jit/moz.build @@ -37,6 +37,7 @@ UNIFIED_SOURCES += [ "EdgeCaseAnalysis.cpp", "EffectiveAddressAnalysis.cpp", "ExecutableAllocator.cpp", + "FlushICache.cpp", "FoldLinearArithConstants.cpp", "InlinableNatives.cpp", "InstructionReordering.cpp", diff --git a/js/src/jsapi-tests/testsJit.cpp b/js/src/jsapi-tests/testsJit.cpp index ac2c1c7a3cbb8..29de274004862 100644 --- a/js/src/jsapi-tests/testsJit.cpp +++ b/js/src/jsapi-tests/testsJit.cpp @@ -68,8 +68,8 @@ bool ExecuteJit(JSContext* cx, js::jit::MacroAssembler& masm) { if (!code) { return false; } - if (!ExecutableAllocator::makeExecutableAndFlushICache( - FlushICacheSpec::LocalThreadOnly, code->raw(), code->bufferSize())) { + if (!ExecutableAllocator::makeExecutableAndFlushICache(code->raw(), + code->bufferSize())) { return false; }
diff --git a/js/src/wasm/WasmBuiltins.cpp b/js/src/wasm/WasmBuiltins.cpp index f1f49937a3d97..8f53d5d1327fb 100644 --- a/js/src/wasm/WasmBuiltins.cpp +++ b/js/src/wasm/WasmBuiltins.cpp @@ -1756,9 +1756,8 @@ bool wasm::EnsureBuiltinThunksInitialized() { MOZ_ASSERT(masm.trapSites().empty()); MOZ_ASSERT(masm.tryNotes().empty());
- if (!ExecutableAllocator::makeExecutableAndFlushICache( - FlushICacheSpec::LocalThreadOnly, thunks->codeBase, - thunks->codeSize)) { + if (!ExecutableAllocator::makeExecutableAndFlushICache(thunks->codeBase, + thunks->codeSize)) { return false; }
diff --git a/js/src/wasm/WasmCode.cpp b/js/src/wasm/WasmCode.cpp index 34df415bef868..1828037807f31 100644 --- a/js/src/wasm/WasmCode.cpp +++ b/js/src/wasm/WasmCode.cpp @@ -331,7 +331,7 @@ UniqueModuleSegment ModuleSegment::create(Tier tier, const Bytes& unlinkedBytes, linkData); }
-bool ModuleSegment::initialize(IsTier2 isTier2, const CodeTier& codeTier, +bool ModuleSegment::initialize(const CodeTier& codeTier, const LinkData& linkData, const Metadata& metadata, const MetadataTier& metadataTier) { @@ -341,13 +341,9 @@ bool ModuleSegment::initialize(IsTier2 isTier2, const CodeTier& codeTier,
// Optimized compilation finishes on a background thread, so we must make sure // to flush the icaches of all the executing threads. - FlushICacheSpec flushIcacheSpec = isTier2 == IsTier2::Tier2 - ? FlushICacheSpec::AllThreads - : FlushICacheSpec::LocalThreadOnly; - // Reprotect the whole region to avoid having separate RW and RX mappings. if (!ExecutableAllocator::makeExecutableAndFlushICache( - flushIcacheSpec, base(), RoundupCodeLength(length()))) { + base(), RoundupCodeLength(length()))) { return false; }
@@ -499,7 +495,6 @@ static constexpr unsigned LAZY_STUB_LIFO_DEFAULT_CHUNK_SIZE = 8 * 1024;
bool LazyStubTier::createManyEntryStubs(const Uint32Vector& funcExportIndices, const CodeTier& codeTier, - bool flushAllThreadsIcaches, size_t* stubSegmentIndex) { MOZ_ASSERT(funcExportIndices.length());
@@ -579,13 +574,7 @@ bool LazyStubTier::createManyEntryStubs(const Uint32Vector& funcExportIndices, Assembler::Bind(codePtr, label); }
- // Optimized compilation finishes on a background thread, so we must make sure - // to flush the icaches of all the executing threads. - FlushICacheSpec flushIcacheSpec = flushAllThreadsIcaches - ? FlushICacheSpec::AllThreads - : FlushICacheSpec::LocalThreadOnly; - if (!ExecutableAllocator::makeExecutableAndFlushICache(flushIcacheSpec, - codePtr, codeLength)) { + if (!ExecutableAllocator::makeExecutableAndFlushICache(codePtr, codeLength)) { return false; }
@@ -629,14 +618,8 @@ bool LazyStubTier::createOneEntryStub(uint32_t funcExportIndex, return false; }
- // This happens on the executing thread (when createOneEntryStub is called - // from GetInterpEntryAndEnsureStubs), so no need to flush the icaches on all - // the threads. - bool flushAllThreadIcaches = false; - size_t stubSegmentIndex; - if (!createManyEntryStubs(funcExportIndexes, codeTier, flushAllThreadIcaches, - &stubSegmentIndex)) { + if (!createManyEntryStubs(funcExportIndexes, codeTier, &stubSegmentIndex)) { return false; }
@@ -667,13 +650,8 @@ bool LazyStubTier::createTier2(const Uint32Vector& funcExportIndices, return true; }
- // This compilation happens on a background compiler thread, so the icache may - // need to be flushed on all the threads. - bool flushAllThreadIcaches = true; - size_t stubSegmentIndex; - if (!createManyEntryStubs(funcExportIndices, codeTier, flushAllThreadIcaches, - &stubSegmentIndex)) { + if (!createManyEntryStubs(funcExportIndices, codeTier, &stubSegmentIndex)) { return false; }
@@ -849,15 +827,15 @@ bool Metadata::getFuncName(NameContext ctx, uint32_t funcIndex, return AppendFunctionIndexName(funcIndex, name); }
-bool CodeTier::initialize(IsTier2 isTier2, const Code& code, - const LinkData& linkData, const Metadata& metadata) { +bool CodeTier::initialize(const Code& code, const LinkData& linkData, + const Metadata& metadata) { MOZ_ASSERT(!initialized()); code_ = &code;
MOZ_ASSERT(lazyStubs_.readLock()->entryStubsEmpty());
// See comments in CodeSegment::initialize() for why this must be last. - if (!segment_->initialize(isTier2, *this, linkData, metadata, *metadata_)) { + if (!segment_->initialize(*this, linkData, metadata, *metadata_)) { return false; }
@@ -946,7 +924,7 @@ Code::Code(UniqueCodeTier tier1, const Metadata& metadata, bool Code::initialize(const LinkData& linkData) { MOZ_ASSERT(!initialized());
- if (!tier1_->initialize(IsTier2::NotTier2, *this, linkData, *metadata_)) { + if (!tier1_->initialize(*this, linkData, *metadata_)) { return false; }
@@ -960,7 +938,7 @@ bool Code::setAndBorrowTier2(UniqueCodeTier tier2, const LinkData& linkData, MOZ_RELEASE_ASSERT(tier2->tier() == Tier::Optimized && tier1_->tier() == Tier::Baseline);
- if (!tier2->initialize(IsTier2::Tier2, *this, linkData, *metadata_)) { + if (!tier2->initialize(*this, linkData, *metadata_)) { return false; }
diff --git a/js/src/wasm/WasmCode.h b/js/src/wasm/WasmCode.h index ede89a0271378..3ef88c7e53a08 100644 --- a/js/src/wasm/WasmCode.h +++ b/js/src/wasm/WasmCode.h @@ -209,8 +209,6 @@ class CodeSegment {
using UniqueModuleSegment = UniquePtr<ModuleSegment>;
-enum IsTier2 { Tier2, NotTier2 }; - class ModuleSegment : public CodeSegment { const Tier tier_; uint8_t* const trapCode_; @@ -224,9 +222,8 @@ class ModuleSegment : public CodeSegment { static UniqueModuleSegment create(Tier tier, const Bytes& unlinkedBytes, const LinkData& linkData);
- bool initialize(IsTier2 isTier2, const CodeTier& codeTier, - const LinkData& linkData, const Metadata& metadata, - const MetadataTier& metadataTier); + bool initialize(const CodeTier& codeTier, const LinkData& linkData, + const Metadata& metadata, const MetadataTier& metadataTier);
Tier tier() const { return tier_; }
@@ -591,7 +588,6 @@ class LazyStubTier {
[[nodiscard]] bool createManyEntryStubs(const Uint32Vector& funcExportIndices, const CodeTier& codeTier, - bool flushAllThreadsIcaches, size_t* stubSegmentIndex);
public: @@ -654,7 +650,7 @@ class CodeTier { lazyStubs_(mutexForTier(segment_->tier())) {}
bool initialized() const { return !!code_ && segment_->initialized(); } - bool initialize(IsTier2 isTier2, const Code& code, const LinkData& linkData, + bool initialize(const Code& code, const LinkData& linkData, const Metadata& metadata);
Tier tier() const { return segment_->tier(); } diff --git a/js/src/wasm/WasmCompile.cpp b/js/src/wasm/WasmCompile.cpp index 26534bca4ea47..842f75d07dda9 100644 --- a/js/src/wasm/WasmCompile.cpp +++ b/js/src/wasm/WasmCompile.cpp @@ -26,6 +26,7 @@ # include "jit/ProcessExecutableMemory.h" #endif
+#include "jit/FlushICache.h" #include "util/Text.h" #include "vm/HelperThreads.h" #include "vm/Realm.h" @@ -572,6 +573,11 @@ static bool TieringBeneficial(uint32_t codeSize) { return true; }
+// Ensure that we have the non-compiler requirements to tier safely. +static bool PlatformCanTier() { + return CanUseExtraThreads() && jit::CanFlushExecutionContextForAllThreads(); +} + CompilerEnvironment::CompilerEnvironment(const CompileArgs& args) : state_(InitialWithArgs), args_(&args) {}
@@ -590,20 +596,6 @@ void CompilerEnvironment::computeParameters() { state_ = Computed; }
-// Check that this architecture either: -// - is cache-coherent, which is the case for most tier-1 architectures we care -// about. -// - or has the ability to invalidate the instruction cache of all threads, so -// background compilation in tiered compilation can be synchronized across all -// threads. -static bool IsICacheSafe() { -#ifdef JS_CODEGEN_ARM64 - return jit::CanFlushICacheFromBackgroundThreads(); -#else - return true; -#endif -} - void CompilerEnvironment::computeParameters(Decoder& d) { MOZ_ASSERT(!isComputed());
@@ -633,8 +625,9 @@ void CompilerEnvironment::computeParameters(Decoder& d) { codeSectionSize = range.size; }
- if (baselineEnabled && hasSecondTier && CanUseExtraThreads() && - (TieringBeneficial(codeSectionSize) || forceTiering) && IsICacheSafe()) { + if (baselineEnabled && hasSecondTier && + (TieringBeneficial(codeSectionSize) || forceTiering) && + PlatformCanTier()) { mode_ = CompileMode::Tier1; tier_ = Tier::Baseline; } else { diff --git a/js/src/wasm/WasmModule.cpp b/js/src/wasm/WasmModule.cpp index b3bb41cdaf400..406fb12dc3242 100644 --- a/js/src/wasm/WasmModule.cpp +++ b/js/src/wasm/WasmModule.cpp @@ -20,7 +20,8 @@
#include <chrono>
-#include "js/BuildId.h" // JS::BuildIdCharVector +#include "jit/FlushICache.h" // for FlushExecutionContextForAllThreads +#include "js/BuildId.h" // JS::BuildIdCharVector #include "js/experimental/TypedData.h" // JS_NewUint8Array #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* #include "js/Printf.h" // JS_smprintf @@ -212,6 +213,14 @@ bool Module::finishTier2(const LinkData& linkData2, return false; }
+ // Initializing the code above will have flushed the icache for all cores. + // However, there could still be stale data in the execution pipeline of + // other cores on some platforms. Force an execution context flush on all + // threads to fix this before we commit the code. + // + // This is safe due to the check in `PlatformCanTier` in WasmCompile.cpp + jit::FlushExecutionContextForAllThreads(); + // Now that we can't fail or otherwise abort tier2, make it live.
MOZ_ASSERT(!code().hasTier2());