/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * * Copyright 2016 Mozilla Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "wasm/WasmBCFrame.h" #include "wasm/WasmBaselineCompile.h" // For BaseLocalIter #include "wasm/WasmBCClass.h" #include "jit/MacroAssembler-inl.h" #include "wasm/WasmBCClass-inl.h" #include "wasm/WasmBCCodegen-inl.h" #include "wasm/WasmBCRegDefs-inl.h" #include "wasm/WasmBCRegMgmt-inl.h" #include "wasm/WasmBCStkMgmt-inl.h" namespace js { namespace wasm { ////////////////////////////////////////////////////////////////////////////// // // BaseLocalIter methods. BaseLocalIter::BaseLocalIter(const ValTypeVector& locals, const ArgTypeVector& args, bool debugEnabled) : locals_(locals), args_(args), argsIter_(args_), index_(0), frameSize_(0), nextFrameSize_(debugEnabled ? DebugFrame::offsetOfFrame() : 0), frameOffset_(INT32_MAX), stackResultPointerOffset_(INT32_MAX), mirType_(MIRType::Undefined), done_(false) { MOZ_ASSERT(args.lengthWithoutStackResults() <= locals.length()); settle(); } int32_t BaseLocalIter::pushLocal(size_t nbytes) { MOZ_ASSERT(nbytes % 4 == 0 && nbytes <= 16); nextFrameSize_ = AlignBytes(frameSize_, nbytes) + nbytes; return nextFrameSize_; // Locals grow down so capture base address. } void BaseLocalIter::settle() { MOZ_ASSERT(!done_); frameSize_ = nextFrameSize_; if (!argsIter_.done()) { mirType_ = argsIter_.mirType(); MIRType concreteType = mirType_; switch (mirType_) { case MIRType::StackResults: // The pointer to stack results is handled like any other argument: // either addressed in place if it is passed on the stack, or we spill // it in the frame if it's in a register. MOZ_ASSERT(args_.isSyntheticStackResultPointerArg(index_)); concreteType = MIRType::Pointer; [[fallthrough]]; case MIRType::Int32: case MIRType::Int64: case MIRType::Double: case MIRType::Float32: case MIRType::WasmAnyRef: #ifdef ENABLE_WASM_SIMD case MIRType::Simd128: #endif if (argsIter_->argInRegister()) { frameOffset_ = pushLocal(MIRTypeToSize(concreteType)); } else { frameOffset_ = -(argsIter_->offsetFromArgBase() + sizeof(Frame)); } break; default: MOZ_CRASH("Argument type"); } if (mirType_ == MIRType::StackResults) { stackResultPointerOffset_ = frameOffset(); // Advance past the synthetic stack result pointer argument and fall // through to the next case. argsIter_++; frameSize_ = nextFrameSize_; MOZ_ASSERT(argsIter_.done()); } else { return; } } if (index_ < locals_.length()) { switch (locals_[index_].kind()) { case ValType::I32: case ValType::I64: case ValType::F32: case ValType::F64: #ifdef ENABLE_WASM_SIMD case ValType::V128: #endif case ValType::Ref: mirType_ = locals_[index_].toMIRType(); frameOffset_ = pushLocal(MIRTypeToSize(mirType_)); break; default: MOZ_CRASH("Compiler bug: Unexpected local type"); } return; } done_ = true; } void BaseLocalIter::operator++(int) { MOZ_ASSERT(!done_); index_++; if (!argsIter_.done()) { argsIter_++; } settle(); } ////////////////////////////////////////////////////////////////////////////// // // Stack map methods. bool BaseCompiler::createStackMap(const char* who) { const ExitStubMapVector noExtras; return stackMapGenerator_.createStackMap(who, noExtras, masm.currentOffset(), HasDebugFrameWithLiveRefs::No, stk_); } bool BaseCompiler::createStackMap(const char* who, CodeOffset assemblerOffset) { const ExitStubMapVector noExtras; return stackMapGenerator_.createStackMap(who, noExtras, assemblerOffset.offset(), HasDebugFrameWithLiveRefs::No, stk_); } bool BaseCompiler::createStackMap( const char* who, HasDebugFrameWithLiveRefs debugFrameWithLiveRefs) { const ExitStubMapVector noExtras; return stackMapGenerator_.createStackMap(who, noExtras, masm.currentOffset(), debugFrameWithLiveRefs, stk_); } bool BaseCompiler::createStackMap( const char* who, const ExitStubMapVector& extras, uint32_t assemblerOffset, HasDebugFrameWithLiveRefs debugFrameWithLiveRefs) { return stackMapGenerator_.createStackMap(who, extras, assemblerOffset, debugFrameWithLiveRefs, stk_); } bool MachineStackTracker::cloneTo(MachineStackTracker* dst) { MOZ_ASSERT(dst->vec_.empty()); if (!dst->vec_.appendAll(vec_)) { return false; } dst->numPtrs_ = numPtrs_; return true; } bool StackMapGenerator::generateStackmapEntriesForTrapExit( const ArgTypeVector& args, ExitStubMapVector* extras) { return GenerateStackmapEntriesForTrapExit(args, trapExitLayout_, trapExitLayoutNumWords_, extras); } bool StackMapGenerator::createStackMap( const char* who, const ExitStubMapVector& extras, uint32_t assemblerOffset, HasDebugFrameWithLiveRefs debugFrameWithLiveRefs, const StkVector& stk) { size_t countedPointers = machineStackTracker.numPtrs() + memRefsOnStk; #ifndef DEBUG // An important optimization. If there are obviously no pointers, as // we expect in the majority of cases, exit quickly. if (countedPointers == 0 && debugFrameWithLiveRefs == HasDebugFrameWithLiveRefs::No) { // We can skip creating the map if there are no |true| elements in // |extras|. bool extrasHasRef = false; for (bool b : extras) { if (b) { extrasHasRef = true; break; } } if (!extrasHasRef) { return true; } } #else // In the debug case, create the stackmap regardless, and cross-check // the pointer-counting below. We expect the final map to have // |countedPointers| in total. This doesn't include those in the // DebugFrame, but they do not appear in the map's bitmap. Note that // |countedPointers| is debug-only from this point onwards. for (bool b : extras) { countedPointers += (b ? 1 : 0); } #endif // Start with the frame-setup map, and add operand-stack information to // that. augmentedMst holds live data only within individual calls to // createStackMap. augmentedMst.clear(); if (!machineStackTracker.cloneTo(&augmentedMst)) { return false; } // At this point, augmentedMst only contains entries covering the // incoming argument area (if any) and for the area allocated by this // function's prologue. We now need to calculate how far the machine's // stack pointer is below where it was at the start of the body. But we // must take care not to include any words pushed as arguments to an // upcoming function call, since those words belong to the stackmap of // the callee, not to the stackmap of this function. Any alignment padding // for the args also belongs to the callee. // // The only padding belonging to the stackmap of this function is that // required to align the upcoming frame. This is accounted for where // framePushedExcludingOutboundCallArgs is set, in startCallArgs(), and is // comprised of just one component: // // * call->frameAlignAdjustment Maybe framePushedExcludingArgs; if (framePushedAtEntryToBody.isNothing()) { // Still in the prologue. framePushedExcludingArgs remains Nothing. MOZ_ASSERT(framePushedExcludingOutboundCallArgs.isNothing()); } else { // In the body. MOZ_ASSERT(masm_.framePushed() >= framePushedAtEntryToBody.value()); if (framePushedExcludingOutboundCallArgs.isSome()) { // In the body, and we've potentially pushed some args onto the stack. // We must ignore them when sizing the stackmap. MOZ_ASSERT(masm_.framePushed() >= framePushedExcludingOutboundCallArgs.value()); MOZ_ASSERT(framePushedExcludingOutboundCallArgs.value() >= framePushedAtEntryToBody.value()); framePushedExcludingArgs = Some(framePushedExcludingOutboundCallArgs.value()); } else { // In the body, but not with call args on the stack. The stackmap // must be sized so as to extend all the way "down" to // masm_.framePushed(). framePushedExcludingArgs = Some(masm_.framePushed()); } } if (framePushedExcludingArgs.isSome()) { uint32_t bodyPushedBytes = framePushedExcludingArgs.value() - framePushedAtEntryToBody.value(); MOZ_ASSERT(0 == bodyPushedBytes % sizeof(void*)); if (!augmentedMst.pushNonGCPointers(bodyPushedBytes / sizeof(void*))) { return false; } } // Scan the operand stack, marking pointers in the just-added new // section. MOZ_ASSERT_IF(framePushedAtEntryToBody.isNothing(), stk.empty()); MOZ_ASSERT_IF(framePushedExcludingArgs.isNothing(), stk.empty()); for (const Stk& v : stk) { #ifndef DEBUG // We don't track roots in registers, per rationale below, so if this // doesn't hold, something is seriously wrong, and we're likely to get a // GC-related crash. MOZ_RELEASE_ASSERT(v.kind() != Stk::RegisterRef); if (v.kind() != Stk::MemRef) { continue; } #else // Take the opportunity to check everything we reasonably can about // operand stack elements. switch (v.kind()) { case Stk::MemI32: case Stk::MemI64: case Stk::MemF32: case Stk::MemF64: case Stk::ConstI32: case Stk::ConstI64: case Stk::ConstF32: case Stk::ConstF64: # ifdef ENABLE_WASM_SIMD case Stk::MemV128: case Stk::ConstV128: # endif // All of these have uninteresting type. continue; case Stk::LocalI32: case Stk::LocalI64: case Stk::LocalF32: case Stk::LocalF64: # ifdef ENABLE_WASM_SIMD case Stk::LocalV128: # endif // These also have uninteresting type. Check that they live in the // section of stack set up by beginFunction(). The unguarded use of // |value()| here is safe due to the assertion above this loop. MOZ_ASSERT(v.offs() <= framePushedAtEntryToBody.value()); continue; case Stk::RegisterI32: case Stk::RegisterI64: case Stk::RegisterF32: case Stk::RegisterF64: # ifdef ENABLE_WASM_SIMD case Stk::RegisterV128: # endif // These also have uninteresting type, but more to the point: all // registers holding live values should have been flushed to the // machine stack immediately prior to the instruction to which this // stackmap pertains. So these can't happen. MOZ_CRASH("createStackMap: operand stack has Register-non-Ref"); case Stk::MemRef: // This is the only case we care about. We'll handle it after the // switch. break; case Stk::LocalRef: // We need the stackmap to mention this pointer, but it should // already be in the machineStackTracker section created by // beginFunction(). MOZ_ASSERT(v.offs() <= framePushedAtEntryToBody.value()); continue; case Stk::ConstRef: // This can currently only be a null pointer. MOZ_ASSERT(v.refval() == 0); continue; case Stk::RegisterRef: // This can't happen, per rationale above. MOZ_CRASH("createStackMap: operand stack contains RegisterRef"); default: MOZ_CRASH("createStackMap: unknown operand stack element"); } #endif // v.offs() holds masm.framePushed() at the point immediately after it // was pushed on the stack. Since it's still on the stack, // masm.framePushed() can't be less. MOZ_ASSERT(v.offs() <= framePushedExcludingArgs.value()); uint32_t offsFromMapLowest = framePushedExcludingArgs.value() - v.offs(); MOZ_ASSERT(0 == offsFromMapLowest % sizeof(void*)); augmentedMst.setGCPointer(offsFromMapLowest / sizeof(void*)); } MOZ_ASSERT(numStackArgBytes % sizeof(void*) == 0); const size_t numStackArgWords = numStackArgBytes / sizeof(void*); const size_t numStackArgPaddingBytes = AlignStackArgAreaSize(numStackArgBytes) - numStackArgBytes; const size_t numStackArgPaddingWords = numStackArgPaddingBytes / sizeof(void*); // Create the final StackMap. The initial map is zeroed out, so there's // no need to write zero bits in it. const uint32_t extraWords = extras.length(); const uint32_t augmentedMstWords = augmentedMst.length(); const uint32_t numMappedWords = numStackArgPaddingWords + extraWords + augmentedMstWords; StackMap* stackMap = StackMap::create(numMappedWords); if (!stackMap) { return false; } { // First the exit stub extra words, if any. uint32_t i = 0; for (bool b : extras) { if (b) { stackMap->set(i, StackMap::Kind::AnyRef); } i++; } } { // Followed by the "main" part of the map. // // This is really just a bit-array copy, so it is reasonable to ask // whether the representation of MachineStackTracker could be made more // similar to that of StackMap, so that the copy could be done with // `memcpy`. Unfortunately it's not so simple; see comment on `class // MachineStackTracker` for details. MachineStackTracker::Iter iter(augmentedMst); while (true) { size_t i = iter.get(); if (i == MachineStackTracker::Iter::FINISHED) { break; } stackMap->set(extraWords + i, StackMap::Kind::AnyRef); } } stackMap->setExitStubWords(extraWords); // Record in the map, how far down from the highest address the Frame* is. // Take the opportunity to check that we haven't marked any part of the // Frame itself as a pointer. stackMap->setFrameOffsetFromTop(numStackArgPaddingWords + numStackArgWords + sizeof(Frame) / sizeof(void*)); #ifdef DEBUG for (uint32_t i = 0; i < sizeof(Frame) / sizeof(void*); i++) { MOZ_ASSERT(stackMap->get(stackMap->header.numMappedWords - stackMap->header.frameOffsetFromTop + i) == StackMap::Kind::POD); } #endif // Note the presence of a DebugFrame with live pointers, if any. if (debugFrameWithLiveRefs != HasDebugFrameWithLiveRefs::No) { stackMap->setHasDebugFrameWithLiveRefs(); } // Add the completed map to the running collection thereof. if (!stackMaps_->add((uint8_t*)(uintptr_t)assemblerOffset, stackMap)) { stackMap->destroy(); return false; } #ifdef DEBUG { // Crosscheck the map pointer counting. uint32_t nw = stackMap->header.numMappedWords; uint32_t np = 0; for (uint32_t i = 0; i < nw; i++) { if (stackMap->get(i) == StackMap::Kind::AnyRef) { np += 1; } } MOZ_ASSERT(size_t(np) == countedPointers); } #endif return true; } ////////////////////////////////////////////////////////////////////////////// // // Stack frame methods. void BaseStackFrame::zeroLocals(BaseRegAlloc* ra) { MOZ_ASSERT(varLow_ != UINT32_MAX); if (varLow_ == varHigh_) { return; } static const uint32_t wordSize = sizeof(void*); // The adjustments to 'low' by the size of the item being stored compensates // for the fact that locals offsets are the offsets from Frame to the bytes // directly "above" the locals in the locals area. See comment at Local. // On 64-bit systems we may have 32-bit alignment for the local area as it // may be preceded by parameters and prologue/debug data. uint32_t low = varLow_; if (low % wordSize) { masm.store32(Imm32(0), Address(sp_, localOffset(low + 4))); low += 4; } MOZ_ASSERT(low % wordSize == 0); const uint32_t high = AlignBytes(varHigh_, wordSize); // An UNROLL_LIMIT of 16 is chosen so that we only need an 8-bit signed // immediate to represent the offset in the store instructions in the loop // on x64. const uint32_t UNROLL_LIMIT = 16; const uint32_t initWords = (high - low) / wordSize; const uint32_t tailWords = initWords % UNROLL_LIMIT; const uint32_t loopHigh = high - (tailWords * wordSize); // With only one word to initialize, just store an immediate zero. if (initWords == 1) { masm.storePtr(ImmWord(0), Address(sp_, localOffset(low + wordSize))); return; } // For other cases, it's best to have a zero in a register. // // One can do more here with SIMD registers (store 16 bytes at a time) or // with instructions like STRD on ARM (store 8 bytes at a time), but that's // for another day. RegI32 zero = ra->needI32(); masm.mov(ImmWord(0), zero); // For the general case we want to have a loop body of UNROLL_LIMIT stores // and then a tail of less than UNROLL_LIMIT stores. When initWords is less // than 2*UNROLL_LIMIT the loop trip count is at most 1 and there is no // benefit to having the pointer calculations and the compare-and-branch. // So we completely unroll when we have initWords < 2 * UNROLL_LIMIT. (In // this case we'll end up using 32-bit offsets on x64 for up to half of the // stores, though.) // Fully-unrolled case. if (initWords < 2 * UNROLL_LIMIT) { for (uint32_t i = low; i < high; i += wordSize) { masm.storePtr(zero, Address(sp_, localOffset(i + wordSize))); } ra->freeI32(zero); return; } // Unrolled loop with a tail. Stores will use negative offsets. That's OK // for x86 and ARM, at least. // Compute pointer to the highest-addressed slot on the frame. RegI32 p = ra->needI32(); masm.computeEffectiveAddress(Address(sp_, localOffset(low + wordSize)), p); // Compute pointer to the lowest-addressed slot on the frame that will be // initialized by the loop body. RegI32 lim = ra->needI32(); masm.computeEffectiveAddress(Address(sp_, localOffset(loopHigh + wordSize)), lim); // The loop body. Eventually we'll have p == lim and exit the loop. Label again; masm.bind(&again); for (uint32_t i = 0; i < UNROLL_LIMIT; ++i) { masm.storePtr(zero, Address(p, -(wordSize * i))); } masm.subPtr(Imm32(UNROLL_LIMIT * wordSize), p); masm.branchPtr(Assembler::LessThan, lim, p, &again); // The tail. for (uint32_t i = 0; i < tailWords; ++i) { masm.storePtr(zero, Address(p, -(wordSize * i))); } ra->freeI32(p); ra->freeI32(lim); ra->freeI32(zero); } } // namespace wasm } // namespace js