diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /js/src/wasm/WasmCompile.cpp | |
parent | Initial commit. (diff) | |
download | thunderbird-upstream.tar.xz thunderbird-upstream.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/wasm/WasmCompile.cpp')
-rw-r--r-- | js/src/wasm/WasmCompile.cpp | 919 |
1 files changed, 919 insertions, 0 deletions
diff --git a/js/src/wasm/WasmCompile.cpp b/js/src/wasm/WasmCompile.cpp new file mode 100644 index 0000000000..3471de1ad2 --- /dev/null +++ b/js/src/wasm/WasmCompile.cpp @@ -0,0 +1,919 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * + * Copyright 2015 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "wasm/WasmCompile.h" + +#include "mozilla/Maybe.h" + +#include <algorithm> + +#ifndef __wasi__ +# include "jit/ProcessExecutableMemory.h" +#endif + +#include "jit/FlushICache.h" +#include "util/Text.h" +#include "vm/HelperThreads.h" +#include "vm/Realm.h" +#include "wasm/WasmBaselineCompile.h" +#include "wasm/WasmGenerator.h" +#include "wasm/WasmIonCompile.h" +#include "wasm/WasmOpIter.h" +#include "wasm/WasmProcess.h" +#include "wasm/WasmSignalHandlers.h" +#include "wasm/WasmValidate.h" + +using namespace js; +using namespace js::jit; +using namespace js::wasm; + +uint32_t wasm::ObservedCPUFeatures() { + enum Arch { + X86 = 0x1, + X64 = 0x2, + ARM = 0x3, + MIPS = 0x4, + MIPS64 = 0x5, + ARM64 = 0x6, + LOONG64 = 0x7, + RISCV64 = 0x8, + ARCH_BITS = 3 + }; + +#if defined(JS_CODEGEN_X86) + MOZ_ASSERT(uint32_t(jit::CPUInfo::GetFingerprint()) <= + (UINT32_MAX >> ARCH_BITS)); + return X86 | (uint32_t(jit::CPUInfo::GetFingerprint()) << ARCH_BITS); +#elif defined(JS_CODEGEN_X64) + MOZ_ASSERT(uint32_t(jit::CPUInfo::GetFingerprint()) <= + (UINT32_MAX >> ARCH_BITS)); + return X64 | (uint32_t(jit::CPUInfo::GetFingerprint()) << ARCH_BITS); +#elif defined(JS_CODEGEN_ARM) + MOZ_ASSERT(jit::GetARMFlags() <= (UINT32_MAX >> ARCH_BITS)); + return ARM | (jit::GetARMFlags() << ARCH_BITS); +#elif defined(JS_CODEGEN_ARM64) + MOZ_ASSERT(jit::GetARM64Flags() <= (UINT32_MAX >> ARCH_BITS)); + return ARM64 | (jit::GetARM64Flags() << ARCH_BITS); +#elif defined(JS_CODEGEN_MIPS64) + MOZ_ASSERT(jit::GetMIPSFlags() <= (UINT32_MAX >> ARCH_BITS)); + return MIPS64 | (jit::GetMIPSFlags() << ARCH_BITS); +#elif defined(JS_CODEGEN_LOONG64) + MOZ_ASSERT(jit::GetLOONG64Flags() <= (UINT32_MAX >> ARCH_BITS)); + return LOONG64 | (jit::GetLOONG64Flags() << ARCH_BITS); +#elif defined(JS_CODEGEN_RISCV64) + MOZ_ASSERT(jit::GetRISCV64Flags() <= (UINT32_MAX >> ARCH_BITS)); + return RISCV64 | (jit::GetRISCV64Flags() << ARCH_BITS); +#elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32) + return 0; +#else +# error "unknown architecture" +#endif +} + +FeatureArgs FeatureArgs::build(JSContext* cx, const FeatureOptions& options) { + FeatureArgs features; + +#define WASM_FEATURE(NAME, LOWER_NAME, ...) \ + features.LOWER_NAME = wasm::NAME##Available(cx); + JS_FOR_WASM_FEATURES(WASM_FEATURE, WASM_FEATURE, WASM_FEATURE); +#undef WASM_FEATURE + + features.sharedMemory = + wasm::ThreadsAvailable(cx) ? Shareable::True : Shareable::False; + + features.simd = jit::JitSupportsWasmSimd(); + features.intrinsics = options.intrinsics; + + return features; +} + +SharedCompileArgs CompileArgs::build(JSContext* cx, + ScriptedCaller&& scriptedCaller, + const FeatureOptions& options, + CompileArgsError* error) { + bool baseline = BaselineAvailable(cx); + bool ion = IonAvailable(cx); + + // Debug information such as source view or debug traps will require + // additional memory and permanently stay in baseline code, so we try to + // only enable it when a developer actually cares: when the debugger tab + // is open. + bool debug = cx->realm() && cx->realm()->debuggerObservesWasm(); + + bool forceTiering = + cx->options().testWasmAwaitTier2() || JitOptions.wasmDelayTier2; + + // The <Compiler>Available() predicates should ensure no failure here, but + // when we're fuzzing we allow inconsistent switches and the check may thus + // fail. Let it go to a run-time error instead of crashing. + if (debug && ion) { + *error = CompileArgsError::NoCompiler; + return nullptr; + } + + if (forceTiering && !(baseline && ion)) { + // This can happen only in testing, and in this case we don't have a + // proper way to signal the error, so just silently override the default, + // instead of adding a skip-if directive to every test using debug/gc. + forceTiering = false; + } + + if (!(baseline || ion)) { + *error = CompileArgsError::NoCompiler; + return nullptr; + } + + CompileArgs* target = cx->new_<CompileArgs>(std::move(scriptedCaller)); + if (!target) { + *error = CompileArgsError::OutOfMemory; + return nullptr; + } + + target->baselineEnabled = baseline; + target->ionEnabled = ion; + target->debugEnabled = debug; + target->forceTiering = forceTiering; + target->features = FeatureArgs::build(cx, options); + + return target; +} + +SharedCompileArgs CompileArgs::buildForAsmJS(ScriptedCaller&& scriptedCaller) { + CompileArgs* target = js_new<CompileArgs>(std::move(scriptedCaller)); + if (!target) { + return nullptr; + } + + // AsmJS is deprecated and doesn't have mechanisms for experimental features, + // so we don't need to initialize the FeatureArgs. It also only targets the + // Ion backend and does not need WASM debug support since it is de-optimized + // to JS in that case. + target->ionEnabled = true; + target->debugEnabled = false; + + return target; +} + +SharedCompileArgs CompileArgs::buildAndReport(JSContext* cx, + ScriptedCaller&& scriptedCaller, + const FeatureOptions& options, + bool reportOOM) { + CompileArgsError error; + SharedCompileArgs args = + CompileArgs::build(cx, std::move(scriptedCaller), options, &error); + if (args) { + Log(cx, "available wasm compilers: tier1=%s tier2=%s", + args->baselineEnabled ? "baseline" : "none", + args->ionEnabled ? "ion" : "none"); + return args; + } + + switch (error) { + case CompileArgsError::NoCompiler: { + JS_ReportErrorASCII(cx, "no WebAssembly compiler available"); + break; + } + case CompileArgsError::OutOfMemory: { + // Most callers are required to return 'false' without reporting an OOM, + // so we make reporting it optional here. + if (reportOOM) { + ReportOutOfMemory(cx); + } + break; + } + } + return nullptr; +} + +/* + * [SMDOC] Tiered wasm compilation. + * + * "Tiered compilation" refers to the mechanism where we first compile the code + * with a fast non-optimizing compiler so that we can start running the code + * quickly, while in the background recompiling the code with the slower + * optimizing compiler. Code created by baseline is called "tier-1"; code + * created by the optimizing compiler is called "tier-2". When the tier-2 code + * is ready, we "tier up" the code by creating paths from tier-1 code into their + * tier-2 counterparts; this patching is performed as the program is running. + * + * ## Selecting the compilation mode + * + * When wasm bytecode arrives, we choose the compilation strategy based on + * switches and on aspects of the code and the hardware. If switches allow + * tiered compilation to happen (the normal case), the following logic applies. + * + * If the code is sufficiently large that tiered compilation would be beneficial + * but not so large that it might blow our compiled code budget and make + * compilation fail, we choose tiered compilation. Otherwise we go straight to + * optimized code. + * + * The expected benefit of tiering is computed by TieringBeneficial(), below, + * based on various estimated parameters of the hardware: ratios of object code + * to byte code, speed of the system, number of cores. + * + * ## Mechanics of tiering up; patching + * + * Every time control enters a tier-1 function, the function prologue loads its + * tiering pointer from the tiering jump table (see JumpTable in WasmCode.h) and + * jumps to it. + * + * Initially, an entry in the tiering table points to the instruction inside the + * tier-1 function that follows the jump instruction (hence the jump is an + * expensive nop). When the tier-2 compiler is finished, the table is patched + * racily to point into the tier-2 function at the correct prologue location + * (see loop near the end of Module::finishTier2()). As tier-2 compilation is + * performed at most once per Module, there is at most one such racy overwrite + * per table element during the lifetime of the Module. + * + * The effect of the patching is to cause the tier-1 function to jump to its + * tier-2 counterpart whenever the tier-1 function is called subsequently. That + * is, tier-1 code performs standard frame setup on behalf of whatever code it + * jumps to, and the target code (tier-1 or tier-2) allocates its own frame in + * whatever way it wants. + * + * The racy writing means that it is often nondeterministic whether tier-1 or + * tier-2 code is reached by any call during the tiering-up process; if F calls + * A and B in that order, it may reach tier-2 code for A and tier-1 code for B. + * If F is running concurrently on threads T1 and T2, T1 and T2 may see code + * from different tiers for either function. + * + * Note, tiering up also requires upgrading the jit-entry stubs so that they + * reference tier-2 code. The mechanics of this upgrading are described at + * WasmInstanceObject::getExportedFunction(). + * + * ## Current limitations of tiering + * + * Tiering is not always seamless. Partly, it is possible for a program to get + * stuck in tier-1 code. Partly, a function that has tiered up continues to + * force execution to go via tier-1 code to reach tier-2 code, paying for an + * additional jump and a slightly less optimized prologue than tier-2 code could + * have had on its own. + * + * Known tiering limitiations: + * + * - We can tier up only at function boundaries. If a tier-1 function has a + * long-running loop it will not tier up until it returns to its caller. If + * this loop never exits (a runloop in a worker, for example) then the + * function will never tier up. + * + * To do better, we need OSR. + * + * - Wasm Table entries are never patched during tier-up. A Table of funcref + * holds not a JSFunction pointer, but a (code*,instance*) pair of pointers. + * When a table.set operation is performed, the JSFunction value is decomposed + * and its code and instance pointers are stored in the table; subsequently, + * when a table.get operation is performed, the JSFunction value is + * reconstituted from its code pointer using fairly elaborate machinery. (The + * mechanics are the same also for the reflected JS operations on a + * WebAssembly.Table. For everything, see WasmTable.{cpp,h}.) The code pointer + * in the Table will always be the code pointer belonging to the best tier that + * was active at the time when that function was stored in that Table slot; in + * many cases, it will be tier-1 code. As a consequence, a call through a table + * will first enter tier-1 code and then jump to tier-2 code. + * + * To do better, we must update all the tables in the system when an instance + * tiers up. This is expected to be very hard. + * + * - Imported Wasm functions are never patched during tier-up. Imports are held + * in FuncImportInstanceData values in the instance, and for a wasm + * callee, what's stored is the raw code pointer into the best tier of the + * callee that was active at the time the import was resolved. That could be + * baseline code, and if it is, the situation is as for Table entries: a call + * to an import will always go via that import's tier-1 code, which will tier + * up with an indirect jump. + * + * To do better, we must update all the import tables in the system that + * import functions from instances whose modules have tiered up. This is + * expected to be hard. + */ + +// Classify the current system as one of a set of recognizable classes. This +// really needs to get our tier-1 systems right. +// +// TODO: We don't yet have a good measure of how fast a system is. We +// distinguish between mobile and desktop because these are very different kinds +// of systems, but we could further distinguish between low / medium / high end +// within those major classes. If we do so, then constants below would be +// provided for each (class, architecture, system-tier) combination, not just +// (class, architecture) as now. +// +// CPU clock speed is not by itself a good predictor of system performance, as +// there are high-performance systems with slow clocks (recent Intel) and +// low-performance systems with fast clocks (older AMD). We can also use +// physical memory, core configuration, OS details, CPU class and family, and +// CPU manufacturer to disambiguate. + +enum class SystemClass { + DesktopX86, + DesktopX64, + DesktopUnknown32, + DesktopUnknown64, + MobileX86, + MobileArm32, + MobileArm64, + MobileUnknown32, + MobileUnknown64 +}; + +static SystemClass ClassifySystem() { + bool isDesktop; + +#if defined(ANDROID) || defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) + isDesktop = false; +#else + isDesktop = true; +#endif + + if (isDesktop) { +#if defined(JS_CODEGEN_X64) + return SystemClass::DesktopX64; +#elif defined(JS_CODEGEN_X86) + return SystemClass::DesktopX86; +#elif defined(JS_64BIT) + return SystemClass::DesktopUnknown64; +#else + return SystemClass::DesktopUnknown32; +#endif + } else { +#if defined(JS_CODEGEN_X86) + return SystemClass::MobileX86; +#elif defined(JS_CODEGEN_ARM) + return SystemClass::MobileArm32; +#elif defined(JS_CODEGEN_ARM64) + return SystemClass::MobileArm64; +#elif defined(JS_64BIT) + return SystemClass::MobileUnknown64; +#else + return SystemClass::MobileUnknown32; +#endif + } +} + +// Code sizes in machine code bytes per bytecode byte, again empirical except +// where marked. +// +// The Ion estimate for ARM64 is the measured Baseline value scaled by a +// plausible factor for optimized code. + +static const double x64Tox86Inflation = 1.25; + +static const double x64IonBytesPerBytecode = 2.45; +static const double x86IonBytesPerBytecode = + x64IonBytesPerBytecode * x64Tox86Inflation; +static const double arm32IonBytesPerBytecode = 3.3; +static const double arm64IonBytesPerBytecode = 3.0 / 1.4; // Estimate + +static const double x64BaselineBytesPerBytecode = x64IonBytesPerBytecode * 1.43; +static const double x86BaselineBytesPerBytecode = + x64BaselineBytesPerBytecode * x64Tox86Inflation; +static const double arm32BaselineBytesPerBytecode = + arm32IonBytesPerBytecode * 1.39; +static const double arm64BaselineBytesPerBytecode = 3.0; + +static double OptimizedBytesPerBytecode(SystemClass cls) { + switch (cls) { + case SystemClass::DesktopX86: + case SystemClass::MobileX86: + case SystemClass::DesktopUnknown32: + return x86IonBytesPerBytecode; + case SystemClass::DesktopX64: + case SystemClass::DesktopUnknown64: + return x64IonBytesPerBytecode; + case SystemClass::MobileArm32: + case SystemClass::MobileUnknown32: + return arm32IonBytesPerBytecode; + case SystemClass::MobileArm64: + case SystemClass::MobileUnknown64: + return arm64IonBytesPerBytecode; + default: + MOZ_CRASH(); + } +} + +static double BaselineBytesPerBytecode(SystemClass cls) { + switch (cls) { + case SystemClass::DesktopX86: + case SystemClass::MobileX86: + case SystemClass::DesktopUnknown32: + return x86BaselineBytesPerBytecode; + case SystemClass::DesktopX64: + case SystemClass::DesktopUnknown64: + return x64BaselineBytesPerBytecode; + case SystemClass::MobileArm32: + case SystemClass::MobileUnknown32: + return arm32BaselineBytesPerBytecode; + case SystemClass::MobileArm64: + case SystemClass::MobileUnknown64: + return arm64BaselineBytesPerBytecode; + default: + MOZ_CRASH(); + } +} + +double wasm::EstimateCompiledCodeSize(Tier tier, size_t bytecodeSize) { + SystemClass cls = ClassifySystem(); + switch (tier) { + case Tier::Baseline: + return double(bytecodeSize) * BaselineBytesPerBytecode(cls); + case Tier::Optimized: + return double(bytecodeSize) * OptimizedBytesPerBytecode(cls); + } + MOZ_CRASH("bad tier"); +} + +// If parallel Ion compilation is going to take longer than this, we should +// tier. + +static const double tierCutoffMs = 10; + +// Compilation rate values are empirical except when noted, the reference +// systems are: +// +// Late-2013 MacBook Pro (2.6GHz 4 x hyperthreaded Haswell, Mac OS X) +// Late-2015 Nexus 5X (1.4GHz 4 x Cortex-A53 + 1.8GHz 2 x Cortex-A57, Android) +// Ca-2016 SoftIron Overdrive 1000 (1.7GHz 4 x Cortex-A57, Fedora) +// +// The rates are always per core. +// +// The estimate for ARM64 is the Baseline compilation rate on the SoftIron +// (because we have no Ion yet), divided by 5 to estimate Ion compile rate and +// then divided by 2 to make it more reasonable for consumer ARM64 systems. + +static const double x64IonBytecodesPerMs = 2100; +static const double x86IonBytecodesPerMs = 1500; +static const double arm32IonBytecodesPerMs = 450; +static const double arm64IonBytecodesPerMs = 750; // Estimate + +// Tiering cutoff values: if code section sizes are below these values (when +// divided by the effective number of cores) we do not tier, because we guess +// that parallel Ion compilation will be fast enough. + +static const double x64DesktopTierCutoff = x64IonBytecodesPerMs * tierCutoffMs; +static const double x86DesktopTierCutoff = x86IonBytecodesPerMs * tierCutoffMs; +static const double x86MobileTierCutoff = x86DesktopTierCutoff / 2; // Guess +static const double arm32MobileTierCutoff = + arm32IonBytecodesPerMs * tierCutoffMs; +static const double arm64MobileTierCutoff = + arm64IonBytecodesPerMs * tierCutoffMs; + +static double CodesizeCutoff(SystemClass cls) { + switch (cls) { + case SystemClass::DesktopX86: + case SystemClass::DesktopUnknown32: + return x86DesktopTierCutoff; + case SystemClass::DesktopX64: + case SystemClass::DesktopUnknown64: + return x64DesktopTierCutoff; + case SystemClass::MobileX86: + return x86MobileTierCutoff; + case SystemClass::MobileArm32: + case SystemClass::MobileUnknown32: + return arm32MobileTierCutoff; + case SystemClass::MobileArm64: + case SystemClass::MobileUnknown64: + return arm64MobileTierCutoff; + default: + MOZ_CRASH(); + } +} + +// As the number of cores grows the effectiveness of each core dwindles (on the +// systems we care about for SpiderMonkey). +// +// The data are empirical, computed from the observed compilation time of the +// Tanks demo code on a variable number of cores. +// +// The heuristic may fail on NUMA systems where the core count is high but the +// performance increase is nil or negative once the program moves beyond one +// socket. However, few browser users have such systems. + +static double EffectiveCores(uint32_t cores) { + if (cores <= 3) { + return pow(cores, 0.9); + } + return pow(cores, 0.75); +} + +#ifndef JS_64BIT +// Don't tier if tiering will fill code memory to more to more than this +// fraction. + +static const double spaceCutoffPct = 0.9; +#endif + +// Figure out whether we should use tiered compilation or not. +static bool TieringBeneficial(uint32_t codeSize) { + uint32_t cpuCount = GetHelperThreadCPUCount(); + MOZ_ASSERT(cpuCount > 0); + + // It's mostly sensible not to background compile when there's only one + // hardware thread as we want foreground computation to have access to that. + // However, if wasm background compilation helper threads can be given lower + // priority then background compilation on single-core systems still makes + // some kind of sense. That said, this is a non-issue: as of September 2017 + // 1-core was down to 3.5% of our population and falling. + + if (cpuCount == 1) { + return false; + } + + // Compute the max number of threads available to do actual background + // compilation work. + + uint32_t workers = GetMaxWasmCompilationThreads(); + + // The number of cores we will use is bounded both by the CPU count and the + // worker count, since the worker count already takes this into account. + + uint32_t cores = workers; + + SystemClass cls = ClassifySystem(); + + // Ion compilation on available cores must take long enough to be worth the + // bother. + + double cutoffSize = CodesizeCutoff(cls); + double effectiveCores = EffectiveCores(cores); + + if ((codeSize / effectiveCores) < cutoffSize) { + return false; + } + + // Do not implement a size cutoff for 64-bit systems since the code size + // budget for 64 bit is so large that it will hardly ever be an issue. + // (Also the cutoff percentage might be different on 64-bit.) + +#ifndef JS_64BIT + // If the amount of executable code for baseline compilation jeopardizes the + // availability of executable memory for ion code then do not tier, for now. + // + // TODO: For now we consider this module in isolation. We should really + // worry about what else is going on in this process and might be filling up + // the code memory. It's like we need some kind of code memory reservation + // system or JIT compilation for large modules. + + double ionRatio = OptimizedBytesPerBytecode(cls); + double baselineRatio = BaselineBytesPerBytecode(cls); + double needMemory = codeSize * (ionRatio + baselineRatio); + double availMemory = LikelyAvailableExecutableMemory(); + double cutoff = spaceCutoffPct * MaxCodeBytesPerProcess; + + // If the sum of baseline and ion code makes us exceeds some set percentage + // of the executable memory then disable tiering. + + if ((MaxCodeBytesPerProcess - availMemory) + needMemory > cutoff) { + return false; + } +#endif + + return true; +} + +// Ensure that we have the non-compiler requirements to tier safely. +static bool PlatformCanTier() { + return CanUseExtraThreads() && jit::CanFlushExecutionContextForAllThreads(); +} + +CompilerEnvironment::CompilerEnvironment(const CompileArgs& args) + : state_(InitialWithArgs), args_(&args) {} + +CompilerEnvironment::CompilerEnvironment(CompileMode mode, Tier tier, + DebugEnabled debugEnabled) + : state_(InitialWithModeTierDebug), + mode_(mode), + tier_(tier), + debug_(debugEnabled) {} + +void CompilerEnvironment::computeParameters() { + MOZ_ASSERT(state_ == InitialWithModeTierDebug); + + state_ = Computed; +} + +void CompilerEnvironment::computeParameters(Decoder& d) { + MOZ_ASSERT(!isComputed()); + + if (state_ == InitialWithModeTierDebug) { + computeParameters(); + return; + } + + bool baselineEnabled = args_->baselineEnabled; + bool ionEnabled = args_->ionEnabled; + bool debugEnabled = args_->debugEnabled; + bool forceTiering = args_->forceTiering; + + bool hasSecondTier = ionEnabled; + MOZ_ASSERT_IF(debugEnabled, baselineEnabled); + MOZ_ASSERT_IF(forceTiering, baselineEnabled && hasSecondTier); + + // Various constraints in various places should prevent failure here. + MOZ_RELEASE_ASSERT(baselineEnabled || ionEnabled); + + uint32_t codeSectionSize = 0; + + SectionRange range; + if (StartsCodeSection(d.begin(), d.end(), &range)) { + codeSectionSize = range.size; + } + + if (baselineEnabled && hasSecondTier && + (TieringBeneficial(codeSectionSize) || forceTiering) && + PlatformCanTier()) { + mode_ = CompileMode::Tier1; + tier_ = Tier::Baseline; + } else { + mode_ = CompileMode::Once; + tier_ = hasSecondTier ? Tier::Optimized : Tier::Baseline; + } + + debug_ = debugEnabled ? DebugEnabled::True : DebugEnabled::False; + + state_ = Computed; +} + +template <class DecoderT> +static bool DecodeFunctionBody(DecoderT& d, ModuleGenerator& mg, + uint32_t funcIndex) { + uint32_t bodySize; + if (!d.readVarU32(&bodySize)) { + return d.fail("expected number of function body bytes"); + } + + if (bodySize > MaxFunctionBytes) { + return d.fail("function body too big"); + } + + const size_t offsetInModule = d.currentOffset(); + + // Skip over the function body; it will be validated by the compilation + // thread. + const uint8_t* bodyBegin; + if (!d.readBytes(bodySize, &bodyBegin)) { + return d.fail("function body length too big"); + } + + return mg.compileFuncDef(funcIndex, offsetInModule, bodyBegin, + bodyBegin + bodySize); +} + +template <class DecoderT> +static bool DecodeCodeSection(const ModuleEnvironment& env, DecoderT& d, + ModuleGenerator& mg) { + if (!env.codeSection) { + if (env.numFuncDefs() != 0) { + return d.fail("expected code section"); + } + + return mg.finishFuncDefs(); + } + + uint32_t numFuncDefs; + if (!d.readVarU32(&numFuncDefs)) { + return d.fail("expected function body count"); + } + + if (numFuncDefs != env.numFuncDefs()) { + return d.fail( + "function body count does not match function signature count"); + } + + for (uint32_t funcDefIndex = 0; funcDefIndex < numFuncDefs; funcDefIndex++) { + if (!DecodeFunctionBody(d, mg, env.numFuncImports + funcDefIndex)) { + return false; + } + } + + if (!d.finishSection(*env.codeSection, "code")) { + return false; + } + + return mg.finishFuncDefs(); +} + +SharedModule wasm::CompileBuffer(const CompileArgs& args, + const ShareableBytes& bytecode, + UniqueChars* error, + UniqueCharsVector* warnings, + JS::OptimizedEncodingListener* listener) { + Decoder d(bytecode.bytes, 0, error, warnings); + + ModuleEnvironment moduleEnv(args.features); + if (!moduleEnv.init() || !DecodeModuleEnvironment(d, &moduleEnv)) { + return nullptr; + } + CompilerEnvironment compilerEnv(args); + compilerEnv.computeParameters(d); + + ModuleGenerator mg(args, &moduleEnv, &compilerEnv, nullptr, error, warnings); + if (!mg.init(nullptr)) { + return nullptr; + } + + if (!DecodeCodeSection(moduleEnv, d, mg)) { + return nullptr; + } + + if (!DecodeModuleTail(d, &moduleEnv)) { + return nullptr; + } + + return mg.finishModule(bytecode, listener); +} + +bool wasm::CompileTier2(const CompileArgs& args, const Bytes& bytecode, + const Module& module, UniqueChars* error, + UniqueCharsVector* warnings, Atomic<bool>* cancelled) { + Decoder d(bytecode, 0, error); + + ModuleEnvironment moduleEnv(args.features); + if (!moduleEnv.init() || !DecodeModuleEnvironment(d, &moduleEnv)) { + return false; + } + CompilerEnvironment compilerEnv(CompileMode::Tier2, Tier::Optimized, + DebugEnabled::False); + compilerEnv.computeParameters(d); + + ModuleGenerator mg(args, &moduleEnv, &compilerEnv, cancelled, error, + warnings); + if (!mg.init(nullptr)) { + return false; + } + + if (!DecodeCodeSection(moduleEnv, d, mg)) { + return false; + } + + if (!DecodeModuleTail(d, &moduleEnv)) { + return false; + } + + return mg.finishTier2(module); +} + +class StreamingDecoder { + Decoder d_; + const ExclusiveBytesPtr& codeBytesEnd_; + const Atomic<bool>& cancelled_; + + public: + StreamingDecoder(const ModuleEnvironment& env, const Bytes& begin, + const ExclusiveBytesPtr& codeBytesEnd, + const Atomic<bool>& cancelled, UniqueChars* error, + UniqueCharsVector* warnings) + : d_(begin, env.codeSection->start, error, warnings), + codeBytesEnd_(codeBytesEnd), + cancelled_(cancelled) {} + + bool fail(const char* msg) { return d_.fail(msg); } + + bool done() const { return d_.done(); } + + size_t currentOffset() const { return d_.currentOffset(); } + + bool waitForBytes(size_t numBytes) { + numBytes = std::min(numBytes, d_.bytesRemain()); + const uint8_t* requiredEnd = d_.currentPosition() + numBytes; + auto codeBytesEnd = codeBytesEnd_.lock(); + while (codeBytesEnd < requiredEnd) { + if (cancelled_) { + return false; + } + codeBytesEnd.wait(); + } + return true; + } + + bool readVarU32(uint32_t* u32) { + return waitForBytes(MaxVarU32DecodedBytes) && d_.readVarU32(u32); + } + + bool readBytes(size_t size, const uint8_t** begin) { + return waitForBytes(size) && d_.readBytes(size, begin); + } + + bool finishSection(const SectionRange& range, const char* name) { + return d_.finishSection(range, name); + } +}; + +static SharedBytes CreateBytecode(const Bytes& env, const Bytes& code, + const Bytes& tail, UniqueChars* error) { + size_t size = env.length() + code.length() + tail.length(); + if (size > MaxModuleBytes) { + *error = DuplicateString("module too big"); + return nullptr; + } + + MutableBytes bytecode = js_new<ShareableBytes>(); + if (!bytecode || !bytecode->bytes.resize(size)) { + return nullptr; + } + + uint8_t* p = bytecode->bytes.begin(); + + memcpy(p, env.begin(), env.length()); + p += env.length(); + + memcpy(p, code.begin(), code.length()); + p += code.length(); + + memcpy(p, tail.begin(), tail.length()); + p += tail.length(); + + MOZ_ASSERT(p == bytecode->end()); + + return bytecode; +} + +SharedModule wasm::CompileStreaming( + const CompileArgs& args, const Bytes& envBytes, const Bytes& codeBytes, + const ExclusiveBytesPtr& codeBytesEnd, + const ExclusiveStreamEndData& exclusiveStreamEnd, + const Atomic<bool>& cancelled, UniqueChars* error, + UniqueCharsVector* warnings) { + CompilerEnvironment compilerEnv(args); + ModuleEnvironment moduleEnv(args.features); + if (!moduleEnv.init()) { + return nullptr; + } + + { + Decoder d(envBytes, 0, error, warnings); + + if (!DecodeModuleEnvironment(d, &moduleEnv)) { + return nullptr; + } + compilerEnv.computeParameters(d); + + if (!moduleEnv.codeSection) { + d.fail("unknown section before code section"); + return nullptr; + } + + MOZ_RELEASE_ASSERT(moduleEnv.codeSection->size == codeBytes.length()); + MOZ_RELEASE_ASSERT(d.done()); + } + + ModuleGenerator mg(args, &moduleEnv, &compilerEnv, &cancelled, error, + warnings); + if (!mg.init(nullptr)) { + return nullptr; + } + + { + StreamingDecoder d(moduleEnv, codeBytes, codeBytesEnd, cancelled, error, + warnings); + + if (!DecodeCodeSection(moduleEnv, d, mg)) { + return nullptr; + } + + MOZ_RELEASE_ASSERT(d.done()); + } + + { + auto streamEnd = exclusiveStreamEnd.lock(); + while (!streamEnd->reached) { + if (cancelled) { + return nullptr; + } + streamEnd.wait(); + } + } + + const StreamEndData& streamEnd = exclusiveStreamEnd.lock(); + const Bytes& tailBytes = *streamEnd.tailBytes; + + { + Decoder d(tailBytes, moduleEnv.codeSection->end(), error, warnings); + + if (!DecodeModuleTail(d, &moduleEnv)) { + return nullptr; + } + + MOZ_RELEASE_ASSERT(d.done()); + } + + SharedBytes bytecode = CreateBytecode(envBytes, codeBytes, tailBytes, error); + if (!bytecode) { + return nullptr; + } + + return mg.finishModule(*bytecode, streamEnd.tier2Listener); +} |