diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /js/src/wasm/WasmBCFrame.h | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/wasm/WasmBCFrame.h')
-rw-r--r-- | js/src/wasm/WasmBCFrame.h | 1400 |
1 files changed, 1400 insertions, 0 deletions
diff --git a/js/src/wasm/WasmBCFrame.h b/js/src/wasm/WasmBCFrame.h new file mode 100644 index 0000000000..0713761250 --- /dev/null +++ b/js/src/wasm/WasmBCFrame.h @@ -0,0 +1,1400 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * + * Copyright 2016 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This is an INTERNAL header for Wasm baseline compiler: CPU stack frame, +// stack maps, and associated logic. + +#ifndef wasm_wasm_baseline_frame_h +#define wasm_wasm_baseline_frame_h + +#include "wasm/WasmBaselineCompile.h" // For BaseLocalIter +#include "wasm/WasmBCDefs.h" +#include "wasm/WasmBCRegDefs.h" +#include "wasm/WasmBCStk.h" +#include "wasm/WasmConstants.h" // For MaxFrameSize + +// [SMDOC] Wasm baseline compiler's stack frame. +// +// For background, see "Wasm's ABIs" in WasmFrame.h, the following should never +// be in conflict with that. +// +// The stack frame has four parts ("below" means at lower addresses): +// +// - the Frame element; +// - the Local area, including the DebugFrame element and possibly a spilled +// pointer to stack results, if any; allocated below the header with various +// forms of alignment; +// - the Dynamic area, comprising the temporary storage the compiler uses for +// register spilling, allocated below the Local area; +// - the Arguments area, comprising memory allocated for outgoing calls, +// allocated below the Dynamic area. +// +// +==============================+ +// | Incoming stack arg | +// | ... | +// ------------- +==============================+ +// | Frame (fixed size) | +// ------------- +==============================+ <-------------------- FP +// ^ | DebugFrame (optional) | ^ ^ ^^ +// localSize | Register arg local | | | || +// | | ... | | | framePushed +// | | Register stack result ptr?| | | || +// | | Non-arg local | | | || +// | | ... | | | || +// | | (padding) | | | || +// | | Instance pointer | | | || +// | +------------------------------+ | | || +// v | (padding) | | v || +// ------------- +==============================+ currentStackHeight || +// ^ | Dynamic (variable size) | | || +// dynamicSize | ... | | || +// v | ... | v || +// ------------- | (free space, sometimes) | --------- v| +// +==============================+ <----- SP not-during calls +// | Arguments (sometimes) | | +// | ... | v +// +==============================+ <----- SP during calls +// +// The Frame is addressed off the stack pointer. masm.framePushed() is always +// correct, and masm.getStackPointer() + masm.framePushed() always addresses the +// Frame, with the DebugFrame optionally below it. +// +// The Local area (including the DebugFrame and, if needed, the spilled value of +// the stack results area pointer) is laid out by BaseLocalIter and is allocated +// and deallocated by standard prologue and epilogue functions that manipulate +// the stack pointer, but it is accessed via BaseStackFrame. +// +// The Dynamic area is maintained by and accessed via BaseStackFrame. On some +// systems (such as ARM64), the Dynamic memory may be allocated in chunks +// because the SP needs a specific alignment, and in this case there will +// normally be some free space directly above the SP. The stack height does not +// include the free space, it reflects the logically used space only. +// +// The Dynamic area is where space for stack results is allocated when calling +// functions that return results on the stack. If a function has stack results, +// a pointer to the low address of the stack result area is passed as an +// additional argument, according to the usual ABI. See +// ABIResultIter::HasStackResults. +// +// The Arguments area is allocated and deallocated via BaseStackFrame (see +// comments later) but is accessed directly off the stack pointer. + +namespace js { +namespace wasm { + +using namespace js::jit; + +// Abstraction of the height of the stack frame, to avoid type confusion. + +class StackHeight { + friend class BaseStackFrameAllocator; + + uint32_t height; + + public: + explicit StackHeight(uint32_t h) : height(h) {} + static StackHeight Invalid() { return StackHeight(UINT32_MAX); } + bool isValid() const { return height != UINT32_MAX; } + bool operator==(StackHeight rhs) const { + MOZ_ASSERT(isValid() && rhs.isValid()); + return height == rhs.height; + } + bool operator!=(StackHeight rhs) const { return !(*this == rhs); } +}; + +// Abstraction for where multi-value results go on the machine stack. + +class StackResultsLoc { + uint32_t bytes_; + size_t count_; + Maybe<uint32_t> height_; + + public: + StackResultsLoc() : bytes_(0), count_(0){}; + StackResultsLoc(uint32_t bytes, size_t count, uint32_t height) + : bytes_(bytes), count_(count), height_(Some(height)) { + MOZ_ASSERT(bytes != 0); + MOZ_ASSERT(count != 0); + MOZ_ASSERT(height != 0); + } + + uint32_t bytes() const { return bytes_; } + uint32_t count() const { return count_; } + uint32_t height() const { return height_.value(); } + + bool hasStackResults() const { return bytes() != 0; } + StackResults stackResults() const { + return hasStackResults() ? StackResults::HasStackResults + : StackResults::NoStackResults; + } +}; + +// Abstraction of the baseline compiler's stack frame (except for the Frame / +// DebugFrame parts). See comments above for more. Remember, "below" on the +// stack means at lower addresses. +// +// The abstraction is split into two parts: BaseStackFrameAllocator is +// responsible for allocating and deallocating space on the stack and for +// performing computations that are affected by how the allocation is performed; +// BaseStackFrame then provides a pleasant interface for stack frame management. + +class BaseStackFrameAllocator { + MacroAssembler& masm; + +#ifdef RABALDR_CHUNKY_STACK + // On platforms that require the stack pointer to be aligned on a boundary + // greater than the typical stack item (eg, ARM64 requires 16-byte alignment + // but items are 8 bytes), allocate stack memory in chunks, and use a + // separate stack height variable to track the effective stack pointer + // within the allocated area. Effectively, there's a variable amount of + // free space directly above the stack pointer. See diagram above. + + // The following must be true in order for the stack height to be + // predictable at control flow joins: + // + // - The Local area is always aligned according to WasmStackAlignment, ie, + // masm.framePushed() % WasmStackAlignment is zero after allocating + // locals. + // + // - ChunkSize is always a multiple of WasmStackAlignment. + // + // - Pushing and popping are always in units of ChunkSize (hence preserving + // alignment). + // + // - The free space on the stack (masm.framePushed() - currentStackHeight_) + // is a predictable (nonnegative) amount. + + // As an optimization, we pre-allocate some space on the stack, the size of + // this allocation is InitialChunk and it must be a multiple of ChunkSize. + // It is allocated as part of the function prologue and deallocated as part + // of the epilogue, along with the locals. + // + // If ChunkSize is too large then we risk overflowing the stack on simple + // recursions with few live values where stack overflow should not be a + // risk; if it is too small we spend too much time adjusting the stack + // pointer. + // + // Good values for ChunkSize are the subject of future empirical analysis; + // eight words is just an educated guess. + + static constexpr uint32_t ChunkSize = 8 * sizeof(void*); + static constexpr uint32_t InitialChunk = ChunkSize; + + // The current logical height of the frame is + // currentStackHeight_ = localSize_ + dynamicSize + // where dynamicSize is not accounted for explicitly and localSize_ also + // includes size for the DebugFrame. + // + // The allocated size of the frame, provided by masm.framePushed(), is usually + // larger than currentStackHeight_, notably at the beginning of execution when + // we've allocated InitialChunk extra space. + + uint32_t currentStackHeight_; +#endif + + // Size of the Local area in bytes (stable after BaseCompiler::init() has + // called BaseStackFrame::setupLocals(), which in turn calls + // BaseStackFrameAllocator::setLocalSize()), always rounded to the proper + // stack alignment. The Local area is then allocated in beginFunction(), + // following the allocation of the Header. See onFixedStackAllocated() + // below. + + uint32_t localSize_; + + protected: + /////////////////////////////////////////////////////////////////////////// + // + // Initialization + + explicit BaseStackFrameAllocator(MacroAssembler& masm) + : masm(masm), +#ifdef RABALDR_CHUNKY_STACK + currentStackHeight_(0), +#endif + localSize_(UINT32_MAX) { + } + + protected: + ////////////////////////////////////////////////////////////////////// + // + // The Local area - the static part of the frame. + + // Record the size of the Local area, once it is known. + + void setLocalSize(uint32_t localSize) { + MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)), + "localSize_ should be aligned to at least a pointer"); + MOZ_ASSERT(localSize_ == UINT32_MAX); + localSize_ = localSize; + } + + // Record the current stack height, after it has become stable in + // beginFunction(). See also BaseStackFrame::onFixedStackAllocated(). + + void onFixedStackAllocated() { + MOZ_ASSERT(localSize_ != UINT32_MAX); +#ifdef RABALDR_CHUNKY_STACK + currentStackHeight_ = localSize_; +#endif + } + + public: + // The fixed amount of memory, in bytes, allocated on the stack below the + // Header for purposes such as locals and other fixed values. Includes all + // necessary alignment, and on ARM64 also the initial chunk for the working + // stack memory. + + uint32_t fixedAllocSize() const { + MOZ_ASSERT(localSize_ != UINT32_MAX); +#ifdef RABALDR_CHUNKY_STACK + return localSize_ + InitialChunk; +#else + return localSize_; +#endif + } + +#ifdef RABALDR_CHUNKY_STACK + // The allocated frame size is frequently larger than the logical stack + // height; we round up to a chunk boundary, and special case the initial + // chunk. + uint32_t framePushedForHeight(uint32_t logicalHeight) { + if (logicalHeight <= fixedAllocSize()) { + return fixedAllocSize(); + } + return fixedAllocSize() + + AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize); + } +#endif + + protected: + ////////////////////////////////////////////////////////////////////// + // + // The Dynamic area - the dynamic part of the frame, for spilling and saving + // intermediate values. + + // Offset off of sp_ for the slot at stack area location `offset`. + + int32_t stackOffset(int32_t offset) { + MOZ_ASSERT(offset > 0); + return masm.framePushed() - offset; + } + + uint32_t computeHeightWithStackResults(StackHeight stackBase, + uint32_t stackResultBytes) { + MOZ_ASSERT(stackResultBytes); + MOZ_ASSERT(currentStackHeight() >= stackBase.height); + return stackBase.height + stackResultBytes; + } + +#ifdef RABALDR_CHUNKY_STACK + void pushChunkyBytes(uint32_t bytes) { + checkChunkyInvariants(); + uint32_t freeSpace = masm.framePushed() - currentStackHeight_; + if (freeSpace < bytes) { + uint32_t bytesToReserve = AlignBytes(bytes - freeSpace, ChunkSize); + MOZ_ASSERT(bytesToReserve + freeSpace >= bytes); + masm.reserveStack(bytesToReserve); + } + currentStackHeight_ += bytes; + checkChunkyInvariants(); + } + + void popChunkyBytes(uint32_t bytes) { + checkChunkyInvariants(); + currentStackHeight_ -= bytes; + // Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop + // values consumed by a call, and we may need to drop several chunks. But + // never drop the initial chunk. Crucially, the amount we drop is always an + // integral number of chunks. + uint32_t freeSpace = masm.framePushed() - currentStackHeight_; + if (freeSpace >= ChunkSize) { + uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_); + uint32_t amountToFree = masm.framePushed() - targetAllocSize; + MOZ_ASSERT(amountToFree % ChunkSize == 0); + if (amountToFree) { + masm.freeStack(amountToFree); + } + } + checkChunkyInvariants(); + } +#endif + + uint32_t currentStackHeight() const { +#ifdef RABALDR_CHUNKY_STACK + return currentStackHeight_; +#else + return masm.framePushed(); +#endif + } + + private: +#ifdef RABALDR_CHUNKY_STACK + void checkChunkyInvariants() { + MOZ_ASSERT(masm.framePushed() >= fixedAllocSize()); + MOZ_ASSERT(masm.framePushed() >= currentStackHeight_); + MOZ_ASSERT(masm.framePushed() == fixedAllocSize() || + masm.framePushed() - currentStackHeight_ < ChunkSize); + MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0); + } +#endif + + // For a given stack height, return the appropriate size of the allocated + // frame. + + uint32_t framePushedForHeight(StackHeight stackHeight) { +#ifdef RABALDR_CHUNKY_STACK + // A more complicated adjustment is needed. + return framePushedForHeight(stackHeight.height); +#else + // The allocated frame size equals the stack height. + return stackHeight.height; +#endif + } + + public: + // The current height of the stack area, not necessarily zero-based, in a + // type-safe way. + + StackHeight stackHeight() const { return StackHeight(currentStackHeight()); } + + // Set the frame height to a previously recorded value. + + void setStackHeight(StackHeight amount) { +#ifdef RABALDR_CHUNKY_STACK + currentStackHeight_ = amount.height; + masm.setFramePushed(framePushedForHeight(amount)); + checkChunkyInvariants(); +#else + masm.setFramePushed(amount.height); +#endif + } + + // The current height of the dynamic part of the stack area (ie, the backing + // store for the evaluation stack), zero-based. + + uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; } + + // Before branching to an outer control label, pop the execution stack to + // the level expected by that region, but do not update masm.framePushed() + // as that will happen as compilation leaves the block. + // + // Note these operate directly on the stack pointer register. + + void popStackBeforeBranch(StackHeight destStackHeight, + uint32_t stackResultBytes) { + uint32_t framePushedHere = masm.framePushed(); + StackHeight heightThere = + StackHeight(destStackHeight.height + stackResultBytes); + uint32_t framePushedThere = framePushedForHeight(heightThere); + if (framePushedHere > framePushedThere) { + masm.addToStackPtr(Imm32(framePushedHere - framePushedThere)); + } + } + + void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) { + popStackBeforeBranch(destStackHeight, + ABIResultIter::MeasureStackBytes(type)); + } + + // Given that there are |stackParamSize| bytes on the dynamic stack + // corresponding to the stack results, return the stack height once these + // parameters are popped. + + StackHeight stackResultsBase(uint32_t stackParamSize) { + return StackHeight(currentStackHeight() - stackParamSize); + } + + // For most of WebAssembly, adjacent instructions have fallthrough control + // flow between them, which allows us to simply thread the current stack + // height through the compiler. There are two exceptions to this rule: when + // leaving a block via dead code, and when entering the "else" arm of an "if". + // In these cases, the stack height is the block entry height, plus any stack + // values (results in the block exit case, parameters in the else entry case). + + void resetStackHeight(StackHeight destStackHeight, ResultType type) { + uint32_t height = destStackHeight.height; + height += ABIResultIter::MeasureStackBytes(type); + setStackHeight(StackHeight(height)); + } + + // Return offset of stack result. + + uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase, + uint32_t stackResultBytes) { + MOZ_ASSERT(result.onStack()); + MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes); + uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes); + return end - result.stackOffset(); + } + + public: + ////////////////////////////////////////////////////////////////////// + // + // The Argument area - for outgoing calls. + // + // We abstract these operations as an optimization: we can merge the freeing + // of the argument area and dropping values off the stack after a call. But + // they always amount to manipulating the real stack pointer by some amount. + // + // Note that we do not update currentStackHeight_ for this; the frame does + // not know about outgoing arguments. But we do update framePushed(), so we + // can still index into the frame below the outgoing arguments area. + + // This is always equivalent to a masm.reserveStack() call. + + void allocArgArea(size_t argSize) { + if (argSize) { + masm.reserveStack(argSize); + } + } + + // This frees the argument area allocated by allocArgArea(), and `argSize` + // must be equal to the `argSize` argument to allocArgArea(). In addition + // we drop some values from the frame, corresponding to the values that were + // consumed by the call. + + void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) { + // The method is called to re-initialize SP after the call. Note that + // this operation shall not be optimized for argSize + dropSize == 0. +#ifdef RABALDR_CHUNKY_STACK + // Freeing the outgoing arguments and freeing the consumed values have + // different semantics here, which is why the operation is split. + masm.freeStackTo(masm.framePushed() - argSize); + popChunkyBytes(dropSize); +#else + masm.freeStackTo(masm.framePushed() - (argSize + dropSize)); +#endif + } +}; + +class BaseStackFrame final : public BaseStackFrameAllocator { + MacroAssembler& masm; + + // The largest observed value of masm.framePushed(), ie, the size of the + // stack frame. Read this for its true value only when code generation is + // finished. + uint32_t maxFramePushed_; + + // Patch point where we check for stack overflow. + CodeOffset stackAddOffset_; + + // Low byte offset of pointer to stack results, if any. + Maybe<int32_t> stackResultsPtrOffset_; + + // The offset of instance pointer. + uint32_t instancePointerOffset_; + + // Low byte offset of local area for true locals (not parameters). + uint32_t varLow_; + + // High byte offset + 1 of local area for true locals. + uint32_t varHigh_; + + // The stack pointer, cached for brevity. + RegisterOrSP sp_; + + public: + explicit BaseStackFrame(MacroAssembler& masm) + : BaseStackFrameAllocator(masm), + masm(masm), + maxFramePushed_(0), + stackAddOffset_(0), + instancePointerOffset_(UINT32_MAX), + varLow_(UINT32_MAX), + varHigh_(UINT32_MAX), + sp_(masm.getStackPointer()) {} + + /////////////////////////////////////////////////////////////////////////// + // + // Stack management and overflow checking + + // This must be called once beginFunction has allocated space for the Header + // (the Frame and DebugFrame) and the Local area, and will record the current + // frame size for internal use by the stack abstractions. + + void onFixedStackAllocated() { + maxFramePushed_ = masm.framePushed(); + BaseStackFrameAllocator::onFixedStackAllocated(); + } + + // We won't know until after we've generated code how big the frame will be + // (we may need arbitrary spill slots and outgoing param slots) so emit a + // patchable add that is patched in endFunction(). + // + // Note the platform scratch register may be used by branchPtr(), so + // generally tmp must be something else. + + void checkStack(Register tmp, BytecodeOffset trapOffset) { + stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp); + Label ok; + masm.branchPtr(Assembler::Below, + Address(InstanceReg, wasm::Instance::offsetOfStackLimit()), + tmp, &ok); + masm.wasmTrap(Trap::StackOverflow, trapOffset); + masm.bind(&ok); + } + + void patchCheckStack() { + masm.patchSub32FromStackPtr(stackAddOffset_, + Imm32(int32_t(maxFramePushed_))); + } + + // Very large frames are implausible, probably an attack. + + bool checkStackHeight() { return maxFramePushed_ <= MaxFrameSize; } + + /////////////////////////////////////////////////////////////////////////// + // + // Local area + + struct Local { + // Type of the value. + const MIRType type; + + // Byte offset from Frame "into" the locals, ie positive for true locals + // and negative for incoming args that read directly from the arg area. + // It assumes the stack is growing down and that locals are on the stack + // at lower addresses than Frame, and is the offset from Frame of the + // lowest-addressed byte of the local. + const int32_t offs; + + Local(MIRType type, int32_t offs) : type(type), offs(offs) {} + + bool isStackArgument() const { return offs < 0; } + }; + + // Profiling shows that the number of parameters and locals frequently + // touches or exceeds 8. So 16 seems like a reasonable starting point. + using LocalVector = Vector<Local, 16, SystemAllocPolicy>; + + // Initialize `localInfo` based on the types of `locals` and `args`. + [[nodiscard]] bool setupLocals(const ValTypeVector& locals, + const ArgTypeVector& args, bool debugEnabled, + LocalVector* localInfo) { + if (!localInfo->reserve(locals.length())) { + return false; + } + + DebugOnly<uint32_t> index = 0; + BaseLocalIter i(locals, args, debugEnabled); + for (; !i.done() && i.index() < args.lengthWithoutStackResults(); i++) { + MOZ_ASSERT(i.isArg()); + MOZ_ASSERT(i.index() == index); + localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset()); + index++; + } + + varLow_ = i.frameSize(); + for (; !i.done(); i++) { + MOZ_ASSERT(!i.isArg()); + MOZ_ASSERT(i.index() == index); + localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset()); + index++; + } + varHigh_ = i.frameSize(); + + // Reserve an additional stack slot for the instance pointer. + const uint32_t pointerAlignedVarHigh = AlignBytes(varHigh_, sizeof(void*)); + const uint32_t localSize = pointerAlignedVarHigh + sizeof(void*); + instancePointerOffset_ = localSize; + + setLocalSize(AlignBytes(localSize, WasmStackAlignment)); + + if (args.hasSyntheticStackResultPointerArg()) { + stackResultsPtrOffset_ = Some(i.stackResultPointerOffset()); + } + + return true; + } + + void zeroLocals(BaseRegAlloc* ra); + + Address addressOfLocal(const Local& local, uint32_t additionalOffset = 0) { + if (local.isStackArgument()) { + return Address(FramePointer, + stackArgumentOffsetFromFp(local) + additionalOffset); + } + return Address(sp_, localOffsetFromSp(local) + additionalOffset); + } + + void loadLocalI32(const Local& src, RegI32 dest) { + masm.load32(addressOfLocal(src), dest); + } + +#ifndef JS_PUNBOX64 + void loadLocalI64Low(const Local& src, RegI32 dest) { + masm.load32(addressOfLocal(src, INT64LOW_OFFSET), dest); + } + + void loadLocalI64High(const Local& src, RegI32 dest) { + masm.load32(addressOfLocal(src, INT64HIGH_OFFSET), dest); + } +#endif + + void loadLocalI64(const Local& src, RegI64 dest) { + masm.load64(addressOfLocal(src), dest); + } + + void loadLocalRef(const Local& src, RegRef dest) { + masm.loadPtr(addressOfLocal(src), dest); + } + + void loadLocalF64(const Local& src, RegF64 dest) { + masm.loadDouble(addressOfLocal(src), dest); + } + + void loadLocalF32(const Local& src, RegF32 dest) { + masm.loadFloat32(addressOfLocal(src), dest); + } + +#ifdef ENABLE_WASM_SIMD + void loadLocalV128(const Local& src, RegV128 dest) { + masm.loadUnalignedSimd128(addressOfLocal(src), dest); + } +#endif + + void storeLocalI32(RegI32 src, const Local& dest) { + masm.store32(src, addressOfLocal(dest)); + } + + void storeLocalI64(RegI64 src, const Local& dest) { + masm.store64(src, addressOfLocal(dest)); + } + + void storeLocalRef(RegRef src, const Local& dest) { + masm.storePtr(src, addressOfLocal(dest)); + } + + void storeLocalF64(RegF64 src, const Local& dest) { + masm.storeDouble(src, addressOfLocal(dest)); + } + + void storeLocalF32(RegF32 src, const Local& dest) { + masm.storeFloat32(src, addressOfLocal(dest)); + } + +#ifdef ENABLE_WASM_SIMD + void storeLocalV128(RegV128 src, const Local& dest) { + masm.storeUnalignedSimd128(src, addressOfLocal(dest)); + } +#endif + + // Offset off of sp_ for `local`. + int32_t localOffsetFromSp(const Local& local) { + MOZ_ASSERT(!local.isStackArgument()); + return localOffset(local.offs); + } + + // Offset off of frame pointer for `stack argument`. + int32_t stackArgumentOffsetFromFp(const Local& local) { + MOZ_ASSERT(local.isStackArgument()); + return -local.offs; + } + + // The incoming stack result area pointer is for stack results of the function + // being compiled. + void loadIncomingStackResultAreaPtr(RegPtr reg) { + const int32_t offset = stackResultsPtrOffset_.value(); + Address src = offset < 0 ? Address(FramePointer, -offset) + : Address(sp_, stackOffset(offset)); + masm.loadPtr(src, reg); + } + + void storeIncomingStackResultAreaPtr(RegPtr reg) { + // If we get here, that means the pointer to the stack results area was + // passed in as a register, and therefore it will be spilled below the + // frame, so the offset is a positive height. + MOZ_ASSERT(stackResultsPtrOffset_.value() > 0); + masm.storePtr(reg, + Address(sp_, stackOffset(stackResultsPtrOffset_.value()))); + } + + void loadInstancePtr(Register dst) { + // Sometimes loadInstancePtr is used in context when SP is not sync is FP, + // e.g. just after tail calls returns. + masm.loadPtr(Address(FramePointer, -instancePointerOffset_), dst); + } + + void storeInstancePtr(Register instance) { + masm.storePtr(instance, Address(sp_, stackOffset(instancePointerOffset_))); + } + + int32_t getInstancePtrOffset() { return stackOffset(instancePointerOffset_); } + + // An outgoing stack result area pointer is for stack results of callees of + // the function being compiled. + void computeOutgoingStackResultAreaPtr(const StackResultsLoc& results, + RegPtr dest) { + MOZ_ASSERT(results.height() <= masm.framePushed()); + uint32_t offsetFromSP = masm.framePushed() - results.height(); + masm.moveStackPtrTo(dest); + if (offsetFromSP) { + masm.addPtr(Imm32(offsetFromSP), dest); + } + } + + private: + // Offset off of sp_ for a local with offset `offset` from Frame. + int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; } + + public: + /////////////////////////////////////////////////////////////////////////// + // + // Dynamic area + + static constexpr size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr; + static constexpr size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64; + static constexpr size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat; + static constexpr size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble; +#ifdef ENABLE_WASM_SIMD + static constexpr size_t StackSizeOfV128 = ABIResult::StackSizeOfV128; +#endif + + // Pushes the register `r` to the stack. This pushes the full 64-bit width on + // 64-bit systems, and 32-bits otherwise. + uint32_t pushGPR(Register r) { + DebugOnly<uint32_t> stackBefore = currentStackHeight(); +#ifdef RABALDR_CHUNKY_STACK + pushChunkyBytes(StackSizeOfPtr); + masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight()))); +#else + masm.Push(r); +#endif + maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); + MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight()); + return currentStackHeight(); + } + + uint32_t pushFloat32(FloatRegister r) { + DebugOnly<uint32_t> stackBefore = currentStackHeight(); +#ifdef RABALDR_CHUNKY_STACK + pushChunkyBytes(StackSizeOfFloat); + masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight()))); +#else + masm.Push(r); +#endif + maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); + MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight()); + return currentStackHeight(); + } + +#ifdef ENABLE_WASM_SIMD + uint32_t pushV128(RegV128 r) { + DebugOnly<uint32_t> stackBefore = currentStackHeight(); +# ifdef RABALDR_CHUNKY_STACK + pushChunkyBytes(StackSizeOfV128); +# else + masm.adjustStack(-(int)StackSizeOfV128); +# endif + masm.storeUnalignedSimd128(r, + Address(sp_, stackOffset(currentStackHeight()))); + maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); + MOZ_ASSERT(stackBefore + StackSizeOfV128 == currentStackHeight()); + return currentStackHeight(); + } +#endif + + uint32_t pushDouble(FloatRegister r) { + DebugOnly<uint32_t> stackBefore = currentStackHeight(); +#ifdef RABALDR_CHUNKY_STACK + pushChunkyBytes(StackSizeOfDouble); + masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight()))); +#else + masm.Push(r); +#endif + maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); + MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight()); + return currentStackHeight(); + } + + // Pops the stack into the register `r`. This pops the full 64-bit width on + // 64-bit systems, and 32-bits otherwise. + void popGPR(Register r) { + DebugOnly<uint32_t> stackBefore = currentStackHeight(); +#ifdef RABALDR_CHUNKY_STACK + masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r); + popChunkyBytes(StackSizeOfPtr); +#else + masm.Pop(r); +#endif + MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight()); + } + + void popFloat32(FloatRegister r) { + DebugOnly<uint32_t> stackBefore = currentStackHeight(); +#ifdef RABALDR_CHUNKY_STACK + masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r); + popChunkyBytes(StackSizeOfFloat); +#else + masm.Pop(r); +#endif + MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight()); + } + + void popDouble(FloatRegister r) { + DebugOnly<uint32_t> stackBefore = currentStackHeight(); +#ifdef RABALDR_CHUNKY_STACK + masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r); + popChunkyBytes(StackSizeOfDouble); +#else + masm.Pop(r); +#endif + MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight()); + } + +#ifdef ENABLE_WASM_SIMD + void popV128(RegV128 r) { + DebugOnly<uint32_t> stackBefore = currentStackHeight(); + masm.loadUnalignedSimd128(Address(sp_, stackOffset(currentStackHeight())), + r); +# ifdef RABALDR_CHUNKY_STACK + popChunkyBytes(StackSizeOfV128); +# else + masm.adjustStack((int)StackSizeOfV128); +# endif + MOZ_ASSERT(stackBefore - StackSizeOfV128 == currentStackHeight()); + } +#endif + + void popBytes(size_t bytes) { + if (bytes > 0) { +#ifdef RABALDR_CHUNKY_STACK + popChunkyBytes(bytes); +#else + masm.freeStack(bytes); +#endif + } + } + + void loadStackI32(int32_t offset, RegI32 dest) { + masm.load32(Address(sp_, stackOffset(offset)), dest); + } + + void loadStackI64(int32_t offset, RegI64 dest) { + masm.load64(Address(sp_, stackOffset(offset)), dest); + } + +#ifndef JS_PUNBOX64 + void loadStackI64Low(int32_t offset, RegI32 dest) { + masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest); + } + + void loadStackI64High(int32_t offset, RegI32 dest) { + masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest); + } +#endif + + void loadStackRef(int32_t offset, RegRef dest) { + masm.loadPtr(Address(sp_, stackOffset(offset)), dest); + } + + void loadStackF64(int32_t offset, RegF64 dest) { + masm.loadDouble(Address(sp_, stackOffset(offset)), dest); + } + + void loadStackF32(int32_t offset, RegF32 dest) { + masm.loadFloat32(Address(sp_, stackOffset(offset)), dest); + } + +#ifdef ENABLE_WASM_SIMD + void loadStackV128(int32_t offset, RegV128 dest) { + masm.loadUnalignedSimd128(Address(sp_, stackOffset(offset)), dest); + } +#endif + + uint32_t prepareStackResultArea(StackHeight stackBase, + uint32_t stackResultBytes) { + uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes); + if (currentStackHeight() < end) { + uint32_t bytes = end - currentStackHeight(); +#ifdef RABALDR_CHUNKY_STACK + pushChunkyBytes(bytes); +#else + masm.reserveStack(bytes); +#endif + maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); + } + return end; + } + + void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) { + uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes); + MOZ_ASSERT(currentStackHeight() >= end); + popBytes(currentStackHeight() - end); + } + + // |srcHeight| and |destHeight| are stack heights *including* |bytes|. + void shuffleStackResultsTowardFP(uint32_t srcHeight, uint32_t destHeight, + uint32_t bytes, Register temp) { + MOZ_ASSERT(destHeight < srcHeight); + MOZ_ASSERT(bytes % sizeof(uint32_t) == 0); + // The shuffleStackResultsTowardFP is used when SP/framePushed is not + // tracked by the compiler, e.g. after possible return call -- use + // FramePointer instead of sp_. + int32_t destOffset = int32_t(-destHeight + bytes); + int32_t srcOffset = int32_t(-srcHeight + bytes); + while (bytes >= sizeof(intptr_t)) { + destOffset -= sizeof(intptr_t); + srcOffset -= sizeof(intptr_t); + bytes -= sizeof(intptr_t); + masm.loadPtr(Address(FramePointer, srcOffset), temp); + masm.storePtr(temp, Address(FramePointer, destOffset)); + } + if (bytes) { + MOZ_ASSERT(bytes == sizeof(uint32_t)); + destOffset -= sizeof(uint32_t); + srcOffset -= sizeof(uint32_t); + masm.load32(Address(FramePointer, srcOffset), temp); + masm.store32(temp, Address(FramePointer, destOffset)); + } + } + + // Unlike the overload that operates on raw heights, |srcHeight| and + // |destHeight| are stack heights *not including* |bytes|. + void shuffleStackResultsTowardFP(StackHeight srcHeight, + StackHeight destHeight, uint32_t bytes, + Register temp) { + MOZ_ASSERT(srcHeight.isValid()); + MOZ_ASSERT(destHeight.isValid()); + uint32_t src = computeHeightWithStackResults(srcHeight, bytes); + uint32_t dest = computeHeightWithStackResults(destHeight, bytes); + MOZ_ASSERT(src <= currentStackHeight()); + MOZ_ASSERT(dest <= currentStackHeight()); + shuffleStackResultsTowardFP(src, dest, bytes, temp); + } + + // |srcHeight| and |destHeight| are stack heights *including* |bytes|. + void shuffleStackResultsTowardSP(uint32_t srcHeight, uint32_t destHeight, + uint32_t bytes, Register temp) { + MOZ_ASSERT(destHeight > srcHeight); + MOZ_ASSERT(bytes % sizeof(uint32_t) == 0); + uint32_t destOffset = stackOffset(destHeight); + uint32_t srcOffset = stackOffset(srcHeight); + while (bytes >= sizeof(intptr_t)) { + masm.loadPtr(Address(sp_, srcOffset), temp); + masm.storePtr(temp, Address(sp_, destOffset)); + destOffset += sizeof(intptr_t); + srcOffset += sizeof(intptr_t); + bytes -= sizeof(intptr_t); + } + if (bytes) { + MOZ_ASSERT(bytes == sizeof(uint32_t)); + masm.load32(Address(sp_, srcOffset), temp); + masm.store32(temp, Address(sp_, destOffset)); + } + } + + // Copy results from the top of the current stack frame to an area of memory, + // and pop the stack accordingly. `dest` is the address of the low byte of + // that memory. + void popStackResultsToMemory(Register dest, uint32_t bytes, Register temp) { + MOZ_ASSERT(bytes <= currentStackHeight()); + MOZ_ASSERT(bytes % sizeof(uint32_t) == 0); + uint32_t bytesToPop = bytes; + uint32_t srcOffset = stackOffset(currentStackHeight()); + uint32_t destOffset = 0; + while (bytes >= sizeof(intptr_t)) { + masm.loadPtr(Address(sp_, srcOffset), temp); + masm.storePtr(temp, Address(dest, destOffset)); + destOffset += sizeof(intptr_t); + srcOffset += sizeof(intptr_t); + bytes -= sizeof(intptr_t); + } + if (bytes) { + MOZ_ASSERT(bytes == sizeof(uint32_t)); + masm.load32(Address(sp_, srcOffset), temp); + masm.store32(temp, Address(dest, destOffset)); + } + popBytes(bytesToPop); + } + + void allocArgArea(size_t argSize) { + if (argSize) { + BaseStackFrameAllocator::allocArgArea(argSize); + maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); + } + } + + private: + void store32BitsToStack(int32_t imm, uint32_t destHeight, Register temp) { + masm.move32(Imm32(imm), temp); + masm.store32(temp, Address(sp_, stackOffset(destHeight))); + } + + void store64BitsToStack(int64_t imm, uint32_t destHeight, Register temp) { +#ifdef JS_PUNBOX64 + masm.move64(Imm64(imm), Register64(temp)); + masm.store64(Register64(temp), Address(sp_, stackOffset(destHeight))); +#else + union { + int64_t i64; + int32_t i32[2]; + } bits = {.i64 = imm}; + static_assert(sizeof(bits) == 8); + store32BitsToStack(bits.i32[0], destHeight, temp); + store32BitsToStack(bits.i32[1], destHeight - sizeof(int32_t), temp); +#endif + } + + public: + void storeImmediatePtrToStack(intptr_t imm, uint32_t destHeight, + Register temp) { +#ifdef JS_PUNBOX64 + static_assert(StackSizeOfPtr == 8); + store64BitsToStack(imm, destHeight, temp); +#else + static_assert(StackSizeOfPtr == 4); + store32BitsToStack(int32_t(imm), destHeight, temp); +#endif + } + + void storeImmediateI64ToStack(int64_t imm, uint32_t destHeight, + Register temp) { + store64BitsToStack(imm, destHeight, temp); + } + + void storeImmediateF32ToStack(float imm, uint32_t destHeight, Register temp) { + union { + int32_t i32; + float f32; + } bits = {.f32 = imm}; + static_assert(sizeof(bits) == 4); + // Do not store 4 bytes if StackSizeOfFloat == 8. It's probably OK to do + // so, but it costs little to store something predictable. + if (StackSizeOfFloat == 4) { + store32BitsToStack(bits.i32, destHeight, temp); + } else { + store64BitsToStack(uint32_t(bits.i32), destHeight, temp); + } + } + + void storeImmediateF64ToStack(double imm, uint32_t destHeight, + Register temp) { + union { + int64_t i64; + double f64; + } bits = {.f64 = imm}; + static_assert(sizeof(bits) == 8); + store64BitsToStack(bits.i64, destHeight, temp); + } + +#ifdef ENABLE_WASM_SIMD + void storeImmediateV128ToStack(V128 imm, uint32_t destHeight, Register temp) { + union { + int32_t i32[4]; + uint8_t bytes[16]; + } bits{}; + static_assert(sizeof(bits) == 16); + memcpy(bits.bytes, imm.bytes, 16); + for (unsigned i = 0; i < 4; i++) { + store32BitsToStack(bits.i32[i], destHeight - i * sizeof(int32_t), temp); + } + } +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// +// MachineStackTracker, used for stack-slot pointerness tracking. + +// An expensive operation in stack-map creation is copying of the +// MachineStackTracker (MST) into the final StackMap. This is done in +// StackMapGenerator::createStackMap. Given that this is basically a +// bit-array copy, it is reasonable to ask whether the two classes could have +// a more similar representation, so that the copy could then be done with +// `memcpy`. +// +// Although in principle feasible, the follow complications exist, and so for +// the moment, this has not been done. +// +// * StackMap is optimised for compact size (storage) since there will be +// many, so it uses a true bitmap. MST is intended to be fast and simple, +// and only one exists at once (per compilation thread). Doing this would +// require MST to use a true bitmap, and hence .. +// +// * .. the copying can't be a straight memcpy, since StackMap has entries for +// words not covered by MST. Hence the copy would need to shift bits in +// each byte left or right (statistically speaking, in 7 cases out of 8) in +// order to ensure no "holes" in the resulting bitmap. +// +// * Furthermore the copying would need to logically invert the direction of +// the stacks. For MST, index zero in the vector corresponds to the highest +// address in the stack. For StackMap, bit index zero corresponds to the +// lowest address in the stack. +// +// * Finally, StackMap is a variable-length structure whose size must be known +// at creation time. The size of an MST by contrast isn't known at creation +// time -- it grows as the baseline compiler pushes stuff on its value +// stack. That's why it has to have vector entry 0 being the highest address. +// +// * Although not directly relevant, StackMaps are also created by the via-Ion +// compilation routes, by translation from the pre-existing "JS-era" +// LSafePoints (CreateStackMapFromLSafepoint). So if we want to mash +// StackMap around to suit baseline better, we also need to ensure it +// doesn't break Ion somehow. + +class MachineStackTracker { + // Simulates the machine's stack, with one bool per word. The booleans are + // represented as `uint8_t`s so as to guarantee the element size is one + // byte. Index zero in this vector corresponds to the highest address in + // the machine's stack. The last entry corresponds to what SP currently + // points at. This all assumes a grow-down stack. + // + // numPtrs_ contains the number of "true" values in vec_, and is therefore + // redundant. But it serves as a constant-time way to detect the common + // case where vec_ holds no "true" values. + size_t numPtrs_; + Vector<uint8_t, 64, SystemAllocPolicy> vec_; + + public: + MachineStackTracker() : numPtrs_(0) {} + + ~MachineStackTracker() { +#ifdef DEBUG + size_t n = 0; + for (uint8_t b : vec_) { + n += (b ? 1 : 0); + } + MOZ_ASSERT(n == numPtrs_); +#endif + } + + // Clone this MachineStackTracker, writing the result at |dst|. + [[nodiscard]] bool cloneTo(MachineStackTracker* dst); + + // Notionally push |n| non-pointers on the stack. + [[nodiscard]] bool pushNonGCPointers(size_t n) { + return vec_.appendN(uint8_t(false), n); + } + + // Mark the stack slot |offsetFromSP| up from the bottom as holding a + // pointer. + void setGCPointer(size_t offsetFromSP) { + // offsetFromSP == 0 denotes the most recently pushed item, == 1 the + // second most recently pushed item, etc. + MOZ_ASSERT(offsetFromSP < vec_.length()); + + size_t offsetFromTop = vec_.length() - 1 - offsetFromSP; + numPtrs_ = numPtrs_ + 1 - (vec_[offsetFromTop] ? 1 : 0); + vec_[offsetFromTop] = uint8_t(true); + } + + // Query the pointerness of the slot |offsetFromSP| up from the bottom. + bool isGCPointer(size_t offsetFromSP) const { + MOZ_ASSERT(offsetFromSP < vec_.length()); + + size_t offsetFromTop = vec_.length() - 1 - offsetFromSP; + return bool(vec_[offsetFromTop]); + } + + // Return the number of words tracked by this MachineStackTracker. + size_t length() const { return vec_.length(); } + + // Return the number of pointer-typed words tracked by this + // MachineStackTracker. + size_t numPtrs() const { + MOZ_ASSERT(numPtrs_ <= length()); + return numPtrs_; + } + + // Discard all contents, but (per mozilla::Vector::clear semantics) don't + // free or reallocate any dynamic storage associated with |vec_|. + void clear() { + vec_.clear(); + numPtrs_ = 0; + } + + // An iterator that produces indices of reftyped slots, starting at the + // logical bottom of the (grow-down) stack. Indices have the same meaning + // as the arguments to `isGCPointer`. That is, if this iterator produces a + // value `i`, then it means that `isGCPointer(i) == true`; if the value `i` + // is never produced then `isGCPointer(i) == false`. The values are + // produced in ascending order. + // + // Because most slots are non-reftyped, some effort has been put into + // skipping over large groups of non-reftyped slots quickly. + class Iter { + // Both `bufU8_` and `bufU32_` are made to point to `vec_`s array of + // `uint8_t`s, so we can scan (backwards) through it either in bytes or + // 32-bit words. Recall that the last element in `vec_` pertains to the + // lowest-addressed word in the machine's grow-down stack, and we want to + // iterate logically "up" this stack, so we need to iterate backwards + // through `vec_`. + // + // This dual-pointer scheme assumes that the `vec_`s content array is at + // least 32-bit aligned. + const uint8_t* bufU8_; + const uint32_t* bufU32_; + // The number of elements in `bufU8_`. + const size_t nElems_; + // The index in `bufU8_` where the next search should start. + size_t next_; + + public: + explicit Iter(const MachineStackTracker& mst) + : bufU8_((uint8_t*)mst.vec_.begin()), + bufU32_((uint32_t*)mst.vec_.begin()), + nElems_(mst.vec_.length()), + next_(mst.vec_.length() - 1) { + MOZ_ASSERT(uintptr_t(bufU8_) == uintptr_t(bufU32_)); + // Check minimum alignment constraint on the array. + MOZ_ASSERT(0 == (uintptr_t(bufU8_) & 3)); + } + + ~Iter() { MOZ_ASSERT(uintptr_t(bufU8_) == uintptr_t(bufU32_)); } + + // It is important, for termination of the search loop in `next()`, that + // this has the value obtained by subtracting 1 from size_t(0). + static constexpr size_t FINISHED = ~size_t(0); + static_assert(FINISHED == size_t(0) - 1); + + // Returns the next index `i` for which `isGCPointer(i) == true`. + size_t get() { + while (next_ != FINISHED) { + if (bufU8_[next_]) { + next_--; + return nElems_ - 1 - (next_ + 1); + } + // Invariant: next_ != FINISHED (so it's still a valid index) + // and: bufU8_[next_] == 0 + // (so we need to move backwards by at least 1) + // + // BEGIN optimization -- this could be removed without affecting + // correctness. + if ((next_ & 7) == 0) { + // We're at the "bottom" of the current dual-4-element word. Check + // if we can jump backwards by 8. This saves a conditional branch + // and a few cycles by ORing two adjacent 32-bit words together, + // whilst not requiring 64-bit alignment of `bufU32_`. + while (next_ >= 8 && + (bufU32_[(next_ - 4) >> 2] | bufU32_[(next_ - 8) >> 2]) == 0) { + next_ -= 8; + } + } + // END optimization + next_--; + } + return FINISHED; + } + }; +}; + +////////////////////////////////////////////////////////////////////////////// +// +// StackMapGenerator, which carries all state needed to create stackmaps. + +enum class HasDebugFrameWithLiveRefs { No, Maybe }; + +struct StackMapGenerator { + private: + // --- These are constant for the life of the function's compilation --- + + // For generating stackmaps, we'll need to know the offsets of registers + // as saved by the trap exit stub. + const RegisterOffsets& trapExitLayout_; + const size_t trapExitLayoutNumWords_; + + // Completed stackmaps are added here + StackMaps* stackMaps_; + + // So as to be able to get current offset when creating stackmaps + const MacroAssembler& masm_; + + public: + // --- These are constant once we've completed beginFunction() --- + + // The number of bytes of arguments passed to this function in memory. + size_t numStackArgBytes; + + MachineStackTracker machineStackTracker; // tracks machine stack pointerness + + // This holds masm.framePushed at entry to the function's body. It is a + // Maybe because createStackMap needs to know whether or not we're still + // in the prologue. It makes a Nothing-to-Some transition just once per + // function. + Maybe<uint32_t> framePushedAtEntryToBody; + + // --- These can change at any point --- + + // This holds masm.framePushed at it would be be for a function call + // instruction, but excluding the stack area used to pass arguments in + // memory. That is, for an upcoming function call, this will hold + // + // masm.framePushed() at the call instruction - + // StackArgAreaSizeAligned(argumentTypes) + // + // This value denotes the lowest-addressed stack word covered by the current + // function's stackmap. Words below this point form the highest-addressed + // area of the callee's stackmap. Note that all alignment padding above the + // arguments-in-memory themselves belongs to the callee's stackmap, as return + // calls will replace the function arguments with a new set of arguments which + // may have different alignment. + // + // When not inside a function call setup/teardown sequence, it is Nothing. + // It can make Nothing-to/from-Some transitions arbitrarily as we progress + // through the function body. + Maybe<uint32_t> framePushedExcludingOutboundCallArgs; + + // The number of memory-resident, ref-typed entries on the containing + // BaseCompiler::stk_. + size_t memRefsOnStk; + + // This is a copy of machineStackTracker that is used only within individual + // calls to createStackMap. It is here only to avoid possible heap allocation + // costs resulting from making it local to createStackMap(). + MachineStackTracker augmentedMst; + + StackMapGenerator(StackMaps* stackMaps, const RegisterOffsets& trapExitLayout, + const size_t trapExitLayoutNumWords, + const MacroAssembler& masm) + : trapExitLayout_(trapExitLayout), + trapExitLayoutNumWords_(trapExitLayoutNumWords), + stackMaps_(stackMaps), + masm_(masm), + numStackArgBytes(0), + memRefsOnStk(0) {} + + // At the beginning of a function, we may have live roots in registers (as + // arguments) at the point where we perform a stack overflow check. This + // method generates the "extra" stackmap entries to describe that, in the + // case that the check fails and we wind up calling into the wasm exit + // stub, as generated by GenerateTrapExit(). + // + // The resulting map must correspond precisely with the stack layout + // created for the integer registers as saved by (code generated by) + // GenerateTrapExit(). To do that we use trapExitLayout_ and + // trapExitLayoutNumWords_, which together comprise a description of the + // layout and are created by GenerateTrapExitRegisterOffsets(). + [[nodiscard]] bool generateStackmapEntriesForTrapExit( + const ArgTypeVector& args, ExitStubMapVector* extras); + + // Creates a stackmap associated with the instruction denoted by + // |assemblerOffset|, incorporating pointers from the current operand + // stack |stk|, incorporating possible extra pointers in |extra| at the + // lower addressed end, and possibly with the associated frame having a + // DebugFrame that must be traced, as indicated by |debugFrameWithLiveRefs|. + [[nodiscard]] bool createStackMap( + const char* who, const ExitStubMapVector& extras, + uint32_t assemblerOffset, + HasDebugFrameWithLiveRefs debugFrameWithLiveRefs, const StkVector& stk); +}; + +} // namespace wasm +} // namespace js + +#endif // wasm_wasm_baseline_frame_h |