/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "jit/x86-shared/CodeGenerator-x86-shared.h" #include "mozilla/DebugOnly.h" #include "mozilla/MathAlgorithms.h" #include "jit/CodeGenerator.h" #include "jit/InlineScriptTree.h" #include "jit/JitRuntime.h" #include "jit/RangeAnalysis.h" #include "jit/ReciprocalMulConstants.h" #include "js/ScalarType.h" // js::Scalar::Type #include "util/DifferentialTesting.h" #include "jit/MacroAssembler-inl.h" #include "jit/shared/CodeGenerator-shared-inl.h" using namespace js; using namespace js::jit; using mozilla::Abs; using mozilla::DebugOnly; using mozilla::FloorLog2; using mozilla::NegativeInfinity; using JS::GenericNaN; namespace js { namespace jit { CodeGeneratorX86Shared::CodeGeneratorX86Shared(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm) : CodeGeneratorShared(gen, graph, masm) {} #ifdef JS_PUNBOX64 Operand CodeGeneratorX86Shared::ToOperandOrRegister64( const LInt64Allocation input) { return ToOperand(input.value()); } #else Register64 CodeGeneratorX86Shared::ToOperandOrRegister64( const LInt64Allocation input) { return ToRegister64(input); } #endif void OutOfLineBailout::accept(CodeGeneratorX86Shared* codegen) { codegen->visitOutOfLineBailout(this); } void CodeGeneratorX86Shared::emitBranch(Assembler::Condition cond, MBasicBlock* mirTrue, MBasicBlock* mirFalse, Assembler::NaNCond ifNaN) { if (ifNaN == Assembler::NaN_IsFalse) { jumpToBlock(mirFalse, Assembler::Parity); } else if (ifNaN == Assembler::NaN_IsTrue) { jumpToBlock(mirTrue, Assembler::Parity); } if (isNextBlock(mirFalse->lir())) { jumpToBlock(mirTrue, cond); } else { jumpToBlock(mirFalse, Assembler::InvertCondition(cond)); jumpToBlock(mirTrue); } } void CodeGenerator::visitDouble(LDouble* ins) { const LDefinition* out = ins->getDef(0); masm.loadConstantDouble(ins->value(), ToFloatRegister(out)); } void CodeGenerator::visitFloat32(LFloat32* ins) { const LDefinition* out = ins->getDef(0); masm.loadConstantFloat32(ins->value(), ToFloatRegister(out)); } void CodeGenerator::visitTestIAndBranch(LTestIAndBranch* test) { Register input = ToRegister(test->input()); masm.test32(input, input); emitBranch(Assembler::NonZero, test->ifTrue(), test->ifFalse()); } void CodeGenerator::visitTestDAndBranch(LTestDAndBranch* test) { const LAllocation* opd = test->input(); // vucomisd flags: // Z P C // --------- // NaN 1 1 1 // > 0 0 0 // < 0 0 1 // = 1 0 0 // // NaN is falsey, so comparing against 0 and then using the Z flag is // enough to determine which branch to take. ScratchDoubleScope scratch(masm); masm.zeroDouble(scratch); masm.vucomisd(scratch, ToFloatRegister(opd)); emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse()); } void CodeGenerator::visitTestFAndBranch(LTestFAndBranch* test) { const LAllocation* opd = test->input(); // vucomiss flags are the same as doubles; see comment above { ScratchFloat32Scope scratch(masm); masm.zeroFloat32(scratch); masm.vucomiss(scratch, ToFloatRegister(opd)); } emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse()); } void CodeGeneratorX86Shared::emitCompare(MCompare::CompareType type, const LAllocation* left, const LAllocation* right) { #ifdef JS_CODEGEN_X64 if (type == MCompare::Compare_Object || type == MCompare::Compare_Symbol || type == MCompare::Compare_UIntPtr || type == MCompare::Compare_RefOrNull) { if (right->isConstant()) { MOZ_ASSERT(type == MCompare::Compare_UIntPtr); masm.cmpPtr(ToRegister(left), Imm32(ToInt32(right))); } else { masm.cmpPtr(ToRegister(left), ToOperand(right)); } return; } #endif if (right->isConstant()) { masm.cmp32(ToRegister(left), Imm32(ToInt32(right))); } else { masm.cmp32(ToRegister(left), ToOperand(right)); } } void CodeGenerator::visitCompare(LCompare* comp) { MCompare* mir = comp->mir(); emitCompare(mir->compareType(), comp->left(), comp->right()); masm.emitSet(JSOpToCondition(mir->compareType(), comp->jsop()), ToRegister(comp->output())); } void CodeGenerator::visitCompareAndBranch(LCompareAndBranch* comp) { MCompare* mir = comp->cmpMir(); emitCompare(mir->compareType(), comp->left(), comp->right()); Assembler::Condition cond = JSOpToCondition(mir->compareType(), comp->jsop()); emitBranch(cond, comp->ifTrue(), comp->ifFalse()); } void CodeGenerator::visitCompareD(LCompareD* comp) { FloatRegister lhs = ToFloatRegister(comp->left()); FloatRegister rhs = ToFloatRegister(comp->right()); Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop()); Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); if (comp->mir()->operandsAreNeverNaN()) { nanCond = Assembler::NaN_HandledByCond; } masm.compareDouble(cond, lhs, rhs); masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), ToRegister(comp->output()), nanCond); } void CodeGenerator::visitCompareF(LCompareF* comp) { FloatRegister lhs = ToFloatRegister(comp->left()); FloatRegister rhs = ToFloatRegister(comp->right()); Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop()); Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); if (comp->mir()->operandsAreNeverNaN()) { nanCond = Assembler::NaN_HandledByCond; } masm.compareFloat(cond, lhs, rhs); masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), ToRegister(comp->output()), nanCond); } void CodeGenerator::visitNotI(LNotI* ins) { masm.cmp32(ToRegister(ins->input()), Imm32(0)); masm.emitSet(Assembler::Equal, ToRegister(ins->output())); } void CodeGenerator::visitNotD(LNotD* ins) { FloatRegister opd = ToFloatRegister(ins->input()); // Not returns true if the input is a NaN. We don't have to worry about // it if we know the input is never NaN though. Assembler::NaNCond nanCond = Assembler::NaN_IsTrue; if (ins->mir()->operandIsNeverNaN()) { nanCond = Assembler::NaN_HandledByCond; } ScratchDoubleScope scratch(masm); masm.zeroDouble(scratch); masm.compareDouble(Assembler::DoubleEqualOrUnordered, opd, scratch); masm.emitSet(Assembler::Equal, ToRegister(ins->output()), nanCond); } void CodeGenerator::visitNotF(LNotF* ins) { FloatRegister opd = ToFloatRegister(ins->input()); // Not returns true if the input is a NaN. We don't have to worry about // it if we know the input is never NaN though. Assembler::NaNCond nanCond = Assembler::NaN_IsTrue; if (ins->mir()->operandIsNeverNaN()) { nanCond = Assembler::NaN_HandledByCond; } ScratchFloat32Scope scratch(masm); masm.zeroFloat32(scratch); masm.compareFloat(Assembler::DoubleEqualOrUnordered, opd, scratch); masm.emitSet(Assembler::Equal, ToRegister(ins->output()), nanCond); } void CodeGenerator::visitCompareDAndBranch(LCompareDAndBranch* comp) { FloatRegister lhs = ToFloatRegister(comp->left()); FloatRegister rhs = ToFloatRegister(comp->right()); Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->cmpMir()->jsop()); Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); if (comp->cmpMir()->operandsAreNeverNaN()) { nanCond = Assembler::NaN_HandledByCond; } masm.compareDouble(cond, lhs, rhs); emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(), comp->ifFalse(), nanCond); } void CodeGenerator::visitCompareFAndBranch(LCompareFAndBranch* comp) { FloatRegister lhs = ToFloatRegister(comp->left()); FloatRegister rhs = ToFloatRegister(comp->right()); Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->cmpMir()->jsop()); Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); if (comp->cmpMir()->operandsAreNeverNaN()) { nanCond = Assembler::NaN_HandledByCond; } masm.compareFloat(cond, lhs, rhs); emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(), comp->ifFalse(), nanCond); } void CodeGenerator::visitWasmStackArg(LWasmStackArg* ins) { const MWasmStackArg* mir = ins->mir(); Address dst(StackPointer, mir->spOffset()); if (ins->arg()->isConstant()) { masm.storePtr(ImmWord(ToInt32(ins->arg())), dst); } else if (ins->arg()->isGeneralReg()) { masm.storePtr(ToRegister(ins->arg()), dst); } else { switch (mir->input()->type()) { case MIRType::Double: masm.storeDouble(ToFloatRegister(ins->arg()), dst); return; case MIRType::Float32: masm.storeFloat32(ToFloatRegister(ins->arg()), dst); return; #ifdef ENABLE_WASM_SIMD case MIRType::Simd128: masm.storeUnalignedSimd128(ToFloatRegister(ins->arg()), dst); return; #endif default: break; } MOZ_CRASH("unexpected mir type in WasmStackArg"); } } void CodeGenerator::visitWasmStackArgI64(LWasmStackArgI64* ins) { const MWasmStackArg* mir = ins->mir(); Address dst(StackPointer, mir->spOffset()); if (IsConstant(ins->arg())) { masm.store64(Imm64(ToInt64(ins->arg())), dst); } else { masm.store64(ToRegister64(ins->arg()), dst); } } void CodeGenerator::visitWasmSelect(LWasmSelect* ins) { MIRType mirType = ins->mir()->type(); Register cond = ToRegister(ins->condExpr()); Operand falseExpr = ToOperand(ins->falseExpr()); masm.test32(cond, cond); if (mirType == MIRType::Int32 || mirType == MIRType::RefOrNull) { Register out = ToRegister(ins->output()); MOZ_ASSERT(ToRegister(ins->trueExpr()) == out, "true expr input is reused for output"); if (mirType == MIRType::Int32) { masm.cmovz32(falseExpr, out); } else { masm.cmovzPtr(falseExpr, out); } return; } FloatRegister out = ToFloatRegister(ins->output()); MOZ_ASSERT(ToFloatRegister(ins->trueExpr()) == out, "true expr input is reused for output"); Label done; masm.j(Assembler::NonZero, &done); if (mirType == MIRType::Float32) { if (falseExpr.kind() == Operand::FPREG) { masm.moveFloat32(ToFloatRegister(ins->falseExpr()), out); } else { masm.loadFloat32(falseExpr, out); } } else if (mirType == MIRType::Double) { if (falseExpr.kind() == Operand::FPREG) { masm.moveDouble(ToFloatRegister(ins->falseExpr()), out); } else { masm.loadDouble(falseExpr, out); } } else if (mirType == MIRType::Simd128) { if (falseExpr.kind() == Operand::FPREG) { masm.moveSimd128(ToFloatRegister(ins->falseExpr()), out); } else { masm.loadUnalignedSimd128(falseExpr, out); } } else { MOZ_CRASH("unhandled type in visitWasmSelect!"); } masm.bind(&done); } void CodeGenerator::visitWasmReinterpret(LWasmReinterpret* lir) { MOZ_ASSERT(gen->compilingWasm()); MWasmReinterpret* ins = lir->mir(); MIRType to = ins->type(); #ifdef DEBUG MIRType from = ins->input()->type(); #endif switch (to) { case MIRType::Int32: MOZ_ASSERT(from == MIRType::Float32); masm.vmovd(ToFloatRegister(lir->input()), ToRegister(lir->output())); break; case MIRType::Float32: MOZ_ASSERT(from == MIRType::Int32); masm.vmovd(ToRegister(lir->input()), ToFloatRegister(lir->output())); break; case MIRType::Double: case MIRType::Int64: MOZ_CRASH("not handled by this LIR opcode"); default: MOZ_CRASH("unexpected WasmReinterpret"); } } void CodeGenerator::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins) { const MAsmJSLoadHeap* mir = ins->mir(); MOZ_ASSERT(mir->access().offset() == 0); const LAllocation* ptr = ins->ptr(); const LAllocation* boundsCheckLimit = ins->boundsCheckLimit(); AnyRegister out = ToAnyRegister(ins->output()); Scalar::Type accessType = mir->accessType(); OutOfLineLoadTypedArrayOutOfBounds* ool = nullptr; if (mir->needsBoundsCheck()) { ool = new (alloc()) OutOfLineLoadTypedArrayOutOfBounds(out, accessType); addOutOfLineCode(ool, mir); masm.wasmBoundsCheck32(Assembler::AboveOrEqual, ToRegister(ptr), ToRegister(boundsCheckLimit), ool->entry()); } Operand srcAddr = toMemoryAccessOperand(ins, 0); masm.wasmLoad(mir->access(), srcAddr, out); if (ool) { masm.bind(ool->rejoin()); } } void CodeGeneratorX86Shared::visitOutOfLineLoadTypedArrayOutOfBounds( OutOfLineLoadTypedArrayOutOfBounds* ool) { switch (ool->viewType()) { case Scalar::Int64: case Scalar::BigInt64: case Scalar::BigUint64: case Scalar::Simd128: case Scalar::MaxTypedArrayViewType: MOZ_CRASH("unexpected array type"); case Scalar::Float32: masm.loadConstantFloat32(float(GenericNaN()), ool->dest().fpu()); break; case Scalar::Float64: masm.loadConstantDouble(GenericNaN(), ool->dest().fpu()); break; case Scalar::Int8: case Scalar::Uint8: case Scalar::Int16: case Scalar::Uint16: case Scalar::Int32: case Scalar::Uint32: case Scalar::Uint8Clamped: Register destReg = ool->dest().gpr(); masm.mov(ImmWord(0), destReg); break; } masm.jmp(ool->rejoin()); } void CodeGenerator::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins) { const MAsmJSStoreHeap* mir = ins->mir(); const LAllocation* ptr = ins->ptr(); const LAllocation* value = ins->value(); const LAllocation* boundsCheckLimit = ins->boundsCheckLimit(); Scalar::Type accessType = mir->accessType(); canonicalizeIfDeterministic(accessType, value); Label rejoin; if (mir->needsBoundsCheck()) { masm.wasmBoundsCheck32(Assembler::AboveOrEqual, ToRegister(ptr), ToRegister(boundsCheckLimit), &rejoin); } Operand dstAddr = toMemoryAccessOperand(ins, 0); masm.wasmStore(mir->access(), ToAnyRegister(value), dstAddr); if (rejoin.used()) { masm.bind(&rejoin); } } void CodeGenerator::visitWasmAddOffset(LWasmAddOffset* lir) { MWasmAddOffset* mir = lir->mir(); Register base = ToRegister(lir->base()); Register out = ToRegister(lir->output()); if (base != out) { masm.move32(base, out); } masm.add32(Imm32(mir->offset()), out); OutOfLineAbortingWasmTrap* ool = new (alloc()) OutOfLineAbortingWasmTrap(mir->bytecodeOffset(), wasm::Trap::OutOfBounds); addOutOfLineCode(ool, mir); masm.j(Assembler::CarrySet, ool->entry()); } void CodeGenerator::visitWasmAddOffset64(LWasmAddOffset64* lir) { MWasmAddOffset* mir = lir->mir(); Register64 base = ToRegister64(lir->base()); Register64 out = ToOutRegister64(lir); if (base != out) { masm.move64(base, out); } masm.add64(Imm64(mir->offset()), out); OutOfLineAbortingWasmTrap* ool = new (alloc()) OutOfLineAbortingWasmTrap(mir->bytecodeOffset(), wasm::Trap::OutOfBounds); addOutOfLineCode(ool, mir); masm.j(Assembler::CarrySet, ool->entry()); } void CodeGenerator::visitWasmTruncateToInt32(LWasmTruncateToInt32* lir) { FloatRegister input = ToFloatRegister(lir->input()); Register output = ToRegister(lir->output()); MWasmTruncateToInt32* mir = lir->mir(); MIRType inputType = mir->input()->type(); MOZ_ASSERT(inputType == MIRType::Double || inputType == MIRType::Float32); auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input, output); addOutOfLineCode(ool, mir); Label* oolEntry = ool->entry(); if (mir->isUnsigned()) { if (inputType == MIRType::Double) { masm.wasmTruncateDoubleToUInt32(input, output, mir->isSaturating(), oolEntry); } else if (inputType == MIRType::Float32) { masm.wasmTruncateFloat32ToUInt32(input, output, mir->isSaturating(), oolEntry); } else { MOZ_CRASH("unexpected type"); } if (mir->isSaturating()) { masm.bind(ool->rejoin()); } return; } if (inputType == MIRType::Double) { masm.wasmTruncateDoubleToInt32(input, output, mir->isSaturating(), oolEntry); } else if (inputType == MIRType::Float32) { masm.wasmTruncateFloat32ToInt32(input, output, mir->isSaturating(), oolEntry); } else { MOZ_CRASH("unexpected type"); } masm.bind(ool->rejoin()); } bool CodeGeneratorX86Shared::generateOutOfLineCode() { if (!CodeGeneratorShared::generateOutOfLineCode()) { return false; } if (deoptLabel_.used()) { // All non-table-based bailouts will go here. masm.bind(&deoptLabel_); // Push the frame size, so the handler can recover the IonScript. masm.push(Imm32(frameSize())); TrampolinePtr handler = gen->jitRuntime()->getGenericBailoutHandler(); masm.jump(handler); } return !masm.oom(); } class BailoutJump { Assembler::Condition cond_; public: explicit BailoutJump(Assembler::Condition cond) : cond_(cond) {} #ifdef JS_CODEGEN_X86 void operator()(MacroAssembler& masm, uint8_t* code) const { masm.j(cond_, ImmPtr(code), RelocationKind::HARDCODED); } #endif void operator()(MacroAssembler& masm, Label* label) const { masm.j(cond_, label); } }; class BailoutLabel { Label* label_; public: explicit BailoutLabel(Label* label) : label_(label) {} #ifdef JS_CODEGEN_X86 void operator()(MacroAssembler& masm, uint8_t* code) const { masm.retarget(label_, ImmPtr(code), RelocationKind::HARDCODED); } #endif void operator()(MacroAssembler& masm, Label* label) const { masm.retarget(label_, label); } }; template void CodeGeneratorX86Shared::bailout(const T& binder, LSnapshot* snapshot) { encode(snapshot); // All bailout code is associated with the bytecodeSite of the block we are // bailing out from. InlineScriptTree* tree = snapshot->mir()->block()->trackedTree(); OutOfLineBailout* ool = new (alloc()) OutOfLineBailout(snapshot); addOutOfLineCode(ool, new (alloc()) BytecodeSite(tree, tree->script()->code())); binder(masm, ool->entry()); } void CodeGeneratorX86Shared::bailoutIf(Assembler::Condition condition, LSnapshot* snapshot) { bailout(BailoutJump(condition), snapshot); } void CodeGeneratorX86Shared::bailoutIf(Assembler::DoubleCondition condition, LSnapshot* snapshot) { MOZ_ASSERT(Assembler::NaNCondFromDoubleCondition(condition) == Assembler::NaN_HandledByCond); bailoutIf(Assembler::ConditionFromDoubleCondition(condition), snapshot); } void CodeGeneratorX86Shared::bailoutFrom(Label* label, LSnapshot* snapshot) { MOZ_ASSERT_IF(!masm.oom(), label->used() && !label->bound()); bailout(BailoutLabel(label), snapshot); } void CodeGeneratorX86Shared::bailout(LSnapshot* snapshot) { Label label; masm.jump(&label); bailoutFrom(&label, snapshot); } void CodeGeneratorX86Shared::visitOutOfLineBailout(OutOfLineBailout* ool) { masm.push(Imm32(ool->snapshot()->snapshotOffset())); masm.jmp(&deoptLabel_); } void CodeGenerator::visitMinMaxD(LMinMaxD* ins) { FloatRegister first = ToFloatRegister(ins->first()); FloatRegister second = ToFloatRegister(ins->second()); #ifdef DEBUG FloatRegister output = ToFloatRegister(ins->output()); MOZ_ASSERT(first == output); #endif bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN(); if (ins->mir()->isMax()) { masm.maxDouble(second, first, handleNaN); } else { masm.minDouble(second, first, handleNaN); } } void CodeGenerator::visitMinMaxF(LMinMaxF* ins) { FloatRegister first = ToFloatRegister(ins->first()); FloatRegister second = ToFloatRegister(ins->second()); #ifdef DEBUG FloatRegister output = ToFloatRegister(ins->output()); MOZ_ASSERT(first == output); #endif bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN(); if (ins->mir()->isMax()) { masm.maxFloat32(second, first, handleNaN); } else { masm.minFloat32(second, first, handleNaN); } } void CodeGenerator::visitClzI(LClzI* ins) { Register input = ToRegister(ins->input()); Register output = ToRegister(ins->output()); bool knownNotZero = ins->mir()->operandIsNeverZero(); masm.clz32(input, output, knownNotZero); } void CodeGenerator::visitCtzI(LCtzI* ins) { Register input = ToRegister(ins->input()); Register output = ToRegister(ins->output()); bool knownNotZero = ins->mir()->operandIsNeverZero(); masm.ctz32(input, output, knownNotZero); } void CodeGenerator::visitPopcntI(LPopcntI* ins) { Register input = ToRegister(ins->input()); Register output = ToRegister(ins->output()); Register temp = ins->temp0()->isBogusTemp() ? InvalidReg : ToRegister(ins->temp0()); masm.popcnt32(input, output, temp); } void CodeGenerator::visitPowHalfD(LPowHalfD* ins) { FloatRegister input = ToFloatRegister(ins->input()); FloatRegister output = ToFloatRegister(ins->output()); ScratchDoubleScope scratch(masm); Label done, sqrt; if (!ins->mir()->operandIsNeverNegativeInfinity()) { // Branch if not -Infinity. masm.loadConstantDouble(NegativeInfinity(), scratch); Assembler::DoubleCondition cond = Assembler::DoubleNotEqualOrUnordered; if (ins->mir()->operandIsNeverNaN()) { cond = Assembler::DoubleNotEqual; } masm.branchDouble(cond, input, scratch, &sqrt); // Math.pow(-Infinity, 0.5) == Infinity. masm.zeroDouble(output); masm.subDouble(scratch, output); masm.jump(&done); masm.bind(&sqrt); } if (!ins->mir()->operandIsNeverNegativeZero()) { // Math.pow(-0, 0.5) == 0 == Math.pow(0, 0.5). // Adding 0 converts any -0 to 0. masm.zeroDouble(scratch); masm.addDouble(input, scratch); masm.vsqrtsd(scratch, output, output); } else { masm.vsqrtsd(input, output, output); } masm.bind(&done); } class OutOfLineUndoALUOperation : public OutOfLineCodeBase { LInstruction* ins_; public: explicit OutOfLineUndoALUOperation(LInstruction* ins) : ins_(ins) {} virtual void accept(CodeGeneratorX86Shared* codegen) override { codegen->visitOutOfLineUndoALUOperation(this); } LInstruction* ins() const { return ins_; } }; void CodeGenerator::visitAddI(LAddI* ins) { if (ins->rhs()->isConstant()) { masm.addl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs())); } else { masm.addl(ToOperand(ins->rhs()), ToRegister(ins->lhs())); } if (ins->snapshot()) { if (ins->recoversInput()) { OutOfLineUndoALUOperation* ool = new (alloc()) OutOfLineUndoALUOperation(ins); addOutOfLineCode(ool, ins->mir()); masm.j(Assembler::Overflow, ool->entry()); } else { bailoutIf(Assembler::Overflow, ins->snapshot()); } } } void CodeGenerator::visitAddI64(LAddI64* lir) { const LInt64Allocation lhs = lir->getInt64Operand(LAddI64::Lhs); const LInt64Allocation rhs = lir->getInt64Operand(LAddI64::Rhs); MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); if (IsConstant(rhs)) { masm.add64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); return; } masm.add64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); } void CodeGenerator::visitSubI(LSubI* ins) { if (ins->rhs()->isConstant()) { masm.subl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs())); } else { masm.subl(ToOperand(ins->rhs()), ToRegister(ins->lhs())); } if (ins->snapshot()) { if (ins->recoversInput()) { OutOfLineUndoALUOperation* ool = new (alloc()) OutOfLineUndoALUOperation(ins); addOutOfLineCode(ool, ins->mir()); masm.j(Assembler::Overflow, ool->entry()); } else { bailoutIf(Assembler::Overflow, ins->snapshot()); } } } void CodeGenerator::visitSubI64(LSubI64* lir) { const LInt64Allocation lhs = lir->getInt64Operand(LSubI64::Lhs); const LInt64Allocation rhs = lir->getInt64Operand(LSubI64::Rhs); MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); if (IsConstant(rhs)) { masm.sub64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); return; } masm.sub64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); } void CodeGeneratorX86Shared::visitOutOfLineUndoALUOperation( OutOfLineUndoALUOperation* ool) { LInstruction* ins = ool->ins(); Register reg = ToRegister(ins->getDef(0)); DebugOnly lhs = ins->getOperand(0); LAllocation* rhs = ins->getOperand(1); MOZ_ASSERT(reg == ToRegister(lhs)); MOZ_ASSERT_IF(rhs->isGeneralReg(), reg != ToRegister(rhs)); // Undo the effect of the ALU operation, which was performed on the output // register and overflowed. Writing to the output register clobbered an // input reg, and the original value of the input needs to be recovered // to satisfy the constraint imposed by any RECOVERED_INPUT operands to // the bailout snapshot. if (rhs->isConstant()) { Imm32 constant(ToInt32(rhs)); if (ins->isAddI()) { masm.subl(constant, reg); } else { masm.addl(constant, reg); } } else { if (ins->isAddI()) { masm.subl(ToOperand(rhs), reg); } else { masm.addl(ToOperand(rhs), reg); } } bailout(ool->ins()->snapshot()); } class MulNegativeZeroCheck : public OutOfLineCodeBase { LMulI* ins_; public: explicit MulNegativeZeroCheck(LMulI* ins) : ins_(ins) {} virtual void accept(CodeGeneratorX86Shared* codegen) override { codegen->visitMulNegativeZeroCheck(this); } LMulI* ins() const { return ins_; } }; void CodeGenerator::visitMulI(LMulI* ins) { const LAllocation* lhs = ins->lhs(); const LAllocation* rhs = ins->rhs(); MMul* mul = ins->mir(); MOZ_ASSERT_IF(mul->mode() == MMul::Integer, !mul->canBeNegativeZero() && !mul->canOverflow()); if (rhs->isConstant()) { // Bailout on -0.0 int32_t constant = ToInt32(rhs); if (mul->canBeNegativeZero() && constant <= 0) { Assembler::Condition bailoutCond = (constant == 0) ? Assembler::Signed : Assembler::Equal; masm.test32(ToRegister(lhs), ToRegister(lhs)); bailoutIf(bailoutCond, ins->snapshot()); } switch (constant) { case -1: masm.negl(ToOperand(lhs)); break; case 0: masm.xorl(ToOperand(lhs), ToRegister(lhs)); return; // escape overflow check; case 1: // nop return; // escape overflow check; case 2: masm.addl(ToOperand(lhs), ToRegister(lhs)); break; default: if (!mul->canOverflow() && constant > 0) { // Use shift if cannot overflow and constant is power of 2 int32_t shift = FloorLog2(constant); if ((1 << shift) == constant) { masm.shll(Imm32(shift), ToRegister(lhs)); return; } } masm.imull(Imm32(ToInt32(rhs)), ToRegister(lhs)); } // Bailout on overflow if (mul->canOverflow()) { bailoutIf(Assembler::Overflow, ins->snapshot()); } } else { masm.imull(ToOperand(rhs), ToRegister(lhs)); // Bailout on overflow if (mul->canOverflow()) { bailoutIf(Assembler::Overflow, ins->snapshot()); } if (mul->canBeNegativeZero()) { // Jump to an OOL path if the result is 0. MulNegativeZeroCheck* ool = new (alloc()) MulNegativeZeroCheck(ins); addOutOfLineCode(ool, mul); masm.test32(ToRegister(lhs), ToRegister(lhs)); masm.j(Assembler::Zero, ool->entry()); masm.bind(ool->rejoin()); } } } void CodeGenerator::visitMulI64(LMulI64* lir) { const LInt64Allocation lhs = lir->getInt64Operand(LMulI64::Lhs); const LInt64Allocation rhs = lir->getInt64Operand(LMulI64::Rhs); MOZ_ASSERT(ToRegister64(lhs) == ToOutRegister64(lir)); if (IsConstant(rhs)) { int64_t constant = ToInt64(rhs); switch (constant) { case -1: masm.neg64(ToRegister64(lhs)); return; case 0: masm.xor64(ToRegister64(lhs), ToRegister64(lhs)); return; case 1: // nop return; case 2: masm.add64(ToRegister64(lhs), ToRegister64(lhs)); return; default: if (constant > 0) { // Use shift if constant is power of 2. int32_t shift = mozilla::FloorLog2(constant); if (int64_t(1) << shift == constant) { masm.lshift64(Imm32(shift), ToRegister64(lhs)); return; } } Register temp = ToTempRegisterOrInvalid(lir->temp()); masm.mul64(Imm64(constant), ToRegister64(lhs), temp); } } else { Register temp = ToTempRegisterOrInvalid(lir->temp()); masm.mul64(ToOperandOrRegister64(rhs), ToRegister64(lhs), temp); } } class ReturnZero : public OutOfLineCodeBase { Register reg_; public: explicit ReturnZero(Register reg) : reg_(reg) {} virtual void accept(CodeGeneratorX86Shared* codegen) override { codegen->visitReturnZero(this); } Register reg() const { return reg_; } }; void CodeGeneratorX86Shared::visitReturnZero(ReturnZero* ool) { masm.mov(ImmWord(0), ool->reg()); masm.jmp(ool->rejoin()); } void CodeGenerator::visitUDivOrMod(LUDivOrMod* ins) { Register lhs = ToRegister(ins->lhs()); Register rhs = ToRegister(ins->rhs()); Register output = ToRegister(ins->output()); MOZ_ASSERT_IF(lhs != rhs, rhs != eax); MOZ_ASSERT(rhs != edx); MOZ_ASSERT_IF(output == eax, ToRegister(ins->remainder()) == edx); ReturnZero* ool = nullptr; // Put the lhs in eax. if (lhs != eax) { masm.mov(lhs, eax); } // Prevent divide by zero. if (ins->canBeDivideByZero()) { masm.test32(rhs, rhs); if (ins->mir()->isTruncated()) { if (ins->trapOnError()) { Label nonZero; masm.j(Assembler::NonZero, &nonZero); masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->bytecodeOffset()); masm.bind(&nonZero); } else { ool = new (alloc()) ReturnZero(output); masm.j(Assembler::Zero, ool->entry()); } } else { bailoutIf(Assembler::Zero, ins->snapshot()); } } // Zero extend the lhs into edx to make (edx:eax), since udiv is 64-bit. masm.mov(ImmWord(0), edx); masm.udiv(rhs); // If the remainder is > 0, bailout since this must be a double. if (ins->mir()->isDiv() && !ins->mir()->toDiv()->canTruncateRemainder()) { Register remainder = ToRegister(ins->remainder()); masm.test32(remainder, remainder); bailoutIf(Assembler::NonZero, ins->snapshot()); } // Unsigned div or mod can return a value that's not a signed int32. // If our users aren't expecting that, bail. if (!ins->mir()->isTruncated()) { masm.test32(output, output); bailoutIf(Assembler::Signed, ins->snapshot()); } if (ool) { addOutOfLineCode(ool, ins->mir()); masm.bind(ool->rejoin()); } } void CodeGenerator::visitUDivOrModConstant(LUDivOrModConstant* ins) { Register lhs = ToRegister(ins->numerator()); Register output = ToRegister(ins->output()); uint32_t d = ins->denominator(); // This emits the division answer into edx or the modulus answer into eax. MOZ_ASSERT(output == eax || output == edx); MOZ_ASSERT(lhs != eax && lhs != edx); bool isDiv = (output == edx); if (d == 0) { if (ins->mir()->isTruncated()) { if (ins->trapOnError()) { masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->bytecodeOffset()); } else { masm.xorl(output, output); } } else { bailout(ins->snapshot()); } return; } // The denominator isn't a power of 2 (see LDivPowTwoI and LModPowTwoI). MOZ_ASSERT((d & (d - 1)) != 0); auto rmc = ReciprocalMulConstants::computeUnsignedDivisionConstants(d); // We first compute (M * n) >> 32, where M = rmc.multiplier. masm.movl(Imm32(rmc.multiplier), eax); masm.umull(lhs); if (rmc.multiplier > UINT32_MAX) { // M >= 2^32 and shift == 0 is impossible, as d >= 2 implies that // ((M * n) >> (32 + shift)) >= n > floor(n/d) whenever n >= d, // contradicting the proof of correctness in computeDivisionConstants. MOZ_ASSERT(rmc.shiftAmount > 0); MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 33)); // We actually computed edx = ((uint32_t(M) * n) >> 32) instead. Since // (M * n) >> (32 + shift) is the same as (edx + n) >> shift, we can // correct for the overflow. This case is a bit trickier than the signed // case, though, as the (edx + n) addition itself can overflow; however, // note that (edx + n) >> shift == (((n - edx) >> 1) + edx) >> (shift - 1), // which is overflow-free. See Hacker's Delight, section 10-8 for details. // Compute (n - edx) >> 1 into eax. masm.movl(lhs, eax); masm.subl(edx, eax); masm.shrl(Imm32(1), eax); // Finish the computation. masm.addl(eax, edx); masm.shrl(Imm32(rmc.shiftAmount - 1), edx); } else { masm.shrl(Imm32(rmc.shiftAmount), edx); } // We now have the truncated division value in edx. If we're // computing a modulus or checking whether the division resulted // in an integer, we need to multiply the obtained value by d and // finish the computation/check. if (!isDiv) { masm.imull(Imm32(d), edx, edx); masm.movl(lhs, eax); masm.subl(edx, eax); // The final result of the modulus op, just computed above by the // sub instruction, can be a number in the range [2^31, 2^32). If // this is the case and the modulus is not truncated, we must bail // out. if (!ins->mir()->isTruncated()) { bailoutIf(Assembler::Signed, ins->snapshot()); } } else if (!ins->mir()->isTruncated()) { masm.imull(Imm32(d), edx, eax); masm.cmpl(lhs, eax); bailoutIf(Assembler::NotEqual, ins->snapshot()); } } void CodeGeneratorX86Shared::visitMulNegativeZeroCheck( MulNegativeZeroCheck* ool) { LMulI* ins = ool->ins(); Register result = ToRegister(ins->output()); Operand lhsCopy = ToOperand(ins->lhsCopy()); Operand rhs = ToOperand(ins->rhs()); MOZ_ASSERT_IF(lhsCopy.kind() == Operand::REG, lhsCopy.reg() != result.code()); // Result is -0 if lhs or rhs is negative. masm.movl(lhsCopy, result); masm.orl(rhs, result); bailoutIf(Assembler::Signed, ins->snapshot()); masm.mov(ImmWord(0), result); masm.jmp(ool->rejoin()); } void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) { Register lhs = ToRegister(ins->numerator()); DebugOnly output = ToRegister(ins->output()); int32_t shift = ins->shift(); bool negativeDivisor = ins->negativeDivisor(); MDiv* mir = ins->mir(); // We use defineReuseInput so these should always be the same, which is // convenient since all of our instructions here are two-address. MOZ_ASSERT(lhs == output); if (!mir->isTruncated() && negativeDivisor) { // 0 divided by a negative number must return a double. masm.test32(lhs, lhs); bailoutIf(Assembler::Zero, ins->snapshot()); } if (shift) { if (!mir->isTruncated()) { // If the remainder is != 0, bailout since this must be a double. masm.test32(lhs, Imm32(UINT32_MAX >> (32 - shift))); bailoutIf(Assembler::NonZero, ins->snapshot()); } if (mir->isUnsigned()) { masm.shrl(Imm32(shift), lhs); } else { // Adjust the value so that shifting produces a correctly // rounded result when the numerator is negative. See 10-1 // "Signed Division by a Known Power of 2" in Henry // S. Warren, Jr.'s Hacker's Delight. if (mir->canBeNegativeDividend() && mir->isTruncated()) { // Note: There is no need to execute this code, which handles how to // round the signed integer division towards 0, if we previously bailed // due to a non-zero remainder. Register lhsCopy = ToRegister(ins->numeratorCopy()); MOZ_ASSERT(lhsCopy != lhs); if (shift > 1) { // Copy the sign bit of the numerator. (= (2^32 - 1) or 0) masm.sarl(Imm32(31), lhs); } // Divide by 2^(32 - shift) // i.e. (= (2^32 - 1) / 2^(32 - shift) or 0) // i.e. (= (2^shift - 1) or 0) masm.shrl(Imm32(32 - shift), lhs); // If signed, make any 1 bit below the shifted bits to bubble up, such // that once shifted the value would be rounded towards 0. masm.addl(lhsCopy, lhs); } masm.sarl(Imm32(shift), lhs); if (negativeDivisor) { masm.negl(lhs); } } return; } if (negativeDivisor) { // INT32_MIN / -1 overflows. masm.negl(lhs); if (!mir->isTruncated()) { bailoutIf(Assembler::Overflow, ins->snapshot()); } else if (mir->trapOnError()) { Label ok; masm.j(Assembler::NoOverflow, &ok); masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->bytecodeOffset()); masm.bind(&ok); } } else if (mir->isUnsigned() && !mir->isTruncated()) { // Unsigned division by 1 can overflow if output is not // truncated. masm.test32(lhs, lhs); bailoutIf(Assembler::Signed, ins->snapshot()); } } void CodeGenerator::visitDivOrModConstantI(LDivOrModConstantI* ins) { Register lhs = ToRegister(ins->numerator()); Register output = ToRegister(ins->output()); int32_t d = ins->denominator(); // This emits the division answer into edx or the modulus answer into eax. MOZ_ASSERT(output == eax || output == edx); MOZ_ASSERT(lhs != eax && lhs != edx); bool isDiv = (output == edx); // The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI // and LModPowTwoI). MOZ_ASSERT((Abs(d) & (Abs(d) - 1)) != 0); // We will first divide by Abs(d), and negate the answer if d is negative. // If desired, this can be avoided by generalizing computeDivisionConstants. auto rmc = ReciprocalMulConstants::computeSignedDivisionConstants(Abs(d)); // We first compute (M * n) >> 32, where M = rmc.multiplier. masm.movl(Imm32(rmc.multiplier), eax); masm.imull(lhs); if (rmc.multiplier > INT32_MAX) { MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 32)); // We actually computed edx = ((int32_t(M) * n) >> 32) instead. Since // (M * n) >> 32 is the same as (edx + n), we can correct for the overflow. // (edx + n) can't overflow, as n and edx have opposite signs because // int32_t(M) is negative. masm.addl(lhs, edx); } // (M * n) >> (32 + shift) is the truncated division answer if n is // non-negative, as proved in the comments of computeDivisionConstants. We // must add 1 later if n is negative to get the right answer in all cases. masm.sarl(Imm32(rmc.shiftAmount), edx); // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be // computed with just a sign-extending shift of 31 bits. if (ins->canBeNegativeDividend()) { masm.movl(lhs, eax); masm.sarl(Imm32(31), eax); masm.subl(eax, edx); } // After this, edx contains the correct truncated division result. if (d < 0) { masm.negl(edx); } if (!isDiv) { masm.imull(Imm32(-d), edx, eax); masm.addl(lhs, eax); } if (!ins->mir()->isTruncated()) { if (isDiv) { // This is a division op. Multiply the obtained value by d to check if // the correct answer is an integer. This cannot overflow, since |d| > 1. masm.imull(Imm32(d), edx, eax); masm.cmp32(lhs, eax); bailoutIf(Assembler::NotEqual, ins->snapshot()); // If lhs is zero and the divisor is negative, the answer should have // been -0. if (d < 0) { masm.test32(lhs, lhs); bailoutIf(Assembler::Zero, ins->snapshot()); } } else if (ins->canBeNegativeDividend()) { // This is a mod op. If the computed value is zero and lhs // is negative, the answer should have been -0. Label done; masm.cmp32(lhs, Imm32(0)); masm.j(Assembler::GreaterThanOrEqual, &done); masm.test32(eax, eax); bailoutIf(Assembler::Zero, ins->snapshot()); masm.bind(&done); } } } void CodeGenerator::visitDivI(LDivI* ins) { Register remainder = ToRegister(ins->remainder()); Register lhs = ToRegister(ins->lhs()); Register rhs = ToRegister(ins->rhs()); Register output = ToRegister(ins->output()); MDiv* mir = ins->mir(); MOZ_ASSERT_IF(lhs != rhs, rhs != eax); MOZ_ASSERT(rhs != edx); MOZ_ASSERT(remainder == edx); MOZ_ASSERT(output == eax); Label done; ReturnZero* ool = nullptr; // Put the lhs in eax, for either the negative overflow case or the regular // divide case. if (lhs != eax) { masm.mov(lhs, eax); } // Handle divide by zero. if (mir->canBeDivideByZero()) { masm.test32(rhs, rhs); if (mir->trapOnError()) { Label nonZero; masm.j(Assembler::NonZero, &nonZero); masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset()); masm.bind(&nonZero); } else if (mir->canTruncateInfinities()) { // Truncated division by zero is zero (Infinity|0 == 0) if (!ool) { ool = new (alloc()) ReturnZero(output); } masm.j(Assembler::Zero, ool->entry()); } else { MOZ_ASSERT(mir->fallible()); bailoutIf(Assembler::Zero, ins->snapshot()); } } // Handle an integer overflow exception from -2147483648 / -1. if (mir->canBeNegativeOverflow()) { Label notOverflow; masm.cmp32(lhs, Imm32(INT32_MIN)); masm.j(Assembler::NotEqual, ¬Overflow); masm.cmp32(rhs, Imm32(-1)); if (mir->trapOnError()) { masm.j(Assembler::NotEqual, ¬Overflow); masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->bytecodeOffset()); } else if (mir->canTruncateOverflow()) { // (-INT32_MIN)|0 == INT32_MIN and INT32_MIN is already in the // output register (lhs == eax). masm.j(Assembler::Equal, &done); } else { MOZ_ASSERT(mir->fallible()); bailoutIf(Assembler::Equal, ins->snapshot()); } masm.bind(¬Overflow); } // Handle negative 0. if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) { Label nonzero; masm.test32(lhs, lhs); masm.j(Assembler::NonZero, &nonzero); masm.cmp32(rhs, Imm32(0)); bailoutIf(Assembler::LessThan, ins->snapshot()); masm.bind(&nonzero); } // Sign extend the lhs into edx to make (edx:eax), since idiv is 64-bit. if (lhs != eax) { masm.mov(lhs, eax); } masm.cdq(); masm.idiv(rhs); if (!mir->canTruncateRemainder()) { // If the remainder is > 0, bailout since this must be a double. masm.test32(remainder, remainder); bailoutIf(Assembler::NonZero, ins->snapshot()); } masm.bind(&done); if (ool) { addOutOfLineCode(ool, mir); masm.bind(ool->rejoin()); } } void CodeGenerator::visitModPowTwoI(LModPowTwoI* ins) { Register lhs = ToRegister(ins->getOperand(0)); int32_t shift = ins->shift(); Label negative; if (!ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend()) { // Switch based on sign of the lhs. // Positive numbers are just a bitmask masm.branchTest32(Assembler::Signed, lhs, lhs, &negative); } masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs); if (!ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend()) { Label done; masm.jump(&done); // Negative numbers need a negate, bitmask, negate masm.bind(&negative); // Unlike in the visitModI case, we are not computing the mod by means of a // division. Therefore, the divisor = -1 case isn't problematic (the andl // always returns 0, which is what we expect). // // The negl instruction overflows if lhs == INT32_MIN, but this is also not // a problem: shift is at most 31, and so the andl also always returns 0. masm.negl(lhs); masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs); masm.negl(lhs); // Since a%b has the same sign as b, and a is negative in this branch, // an answer of 0 means the correct result is actually -0. Bail out. if (!ins->mir()->isTruncated()) { bailoutIf(Assembler::Zero, ins->snapshot()); } masm.bind(&done); } } class ModOverflowCheck : public OutOfLineCodeBase { Label done_; LModI* ins_; Register rhs_; public: explicit ModOverflowCheck(LModI* ins, Register rhs) : ins_(ins), rhs_(rhs) {} virtual void accept(CodeGeneratorX86Shared* codegen) override { codegen->visitModOverflowCheck(this); } Label* done() { return &done_; } LModI* ins() const { return ins_; } Register rhs() const { return rhs_; } }; void CodeGeneratorX86Shared::visitModOverflowCheck(ModOverflowCheck* ool) { masm.cmp32(ool->rhs(), Imm32(-1)); if (ool->ins()->mir()->isTruncated()) { masm.j(Assembler::NotEqual, ool->rejoin()); masm.mov(ImmWord(0), edx); masm.jmp(ool->done()); } else { bailoutIf(Assembler::Equal, ool->ins()->snapshot()); masm.jmp(ool->rejoin()); } } void CodeGenerator::visitModI(LModI* ins) { Register remainder = ToRegister(ins->remainder()); Register lhs = ToRegister(ins->lhs()); Register rhs = ToRegister(ins->rhs()); // Required to use idiv. MOZ_ASSERT_IF(lhs != rhs, rhs != eax); MOZ_ASSERT(rhs != edx); MOZ_ASSERT(remainder == edx); MOZ_ASSERT(ToRegister(ins->getTemp(0)) == eax); Label done; ReturnZero* ool = nullptr; ModOverflowCheck* overflow = nullptr; // Set up eax in preparation for doing a div. if (lhs != eax) { masm.mov(lhs, eax); } MMod* mir = ins->mir(); // Prevent divide by zero. if (mir->canBeDivideByZero()) { masm.test32(rhs, rhs); if (mir->isTruncated()) { if (mir->trapOnError()) { Label nonZero; masm.j(Assembler::NonZero, &nonZero); masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset()); masm.bind(&nonZero); } else { if (!ool) { ool = new (alloc()) ReturnZero(edx); } masm.j(Assembler::Zero, ool->entry()); } } else { bailoutIf(Assembler::Zero, ins->snapshot()); } } Label negative; // Switch based on sign of the lhs. if (mir->canBeNegativeDividend()) { masm.branchTest32(Assembler::Signed, lhs, lhs, &negative); } // If lhs >= 0 then remainder = lhs % rhs. The remainder must be positive. { // Check if rhs is a power-of-two. if (mir->canBePowerOfTwoDivisor()) { MOZ_ASSERT(rhs != remainder); // Rhs y is a power-of-two if (y & (y-1)) == 0. Note that if // y is any negative number other than INT32_MIN, both y and // y-1 will have the sign bit set so these are never optimized // as powers-of-two. If y is INT32_MIN, y-1 will be INT32_MAX // and because lhs >= 0 at this point, lhs & INT32_MAX returns // the correct value. Label notPowerOfTwo; masm.mov(rhs, remainder); masm.subl(Imm32(1), remainder); masm.branchTest32(Assembler::NonZero, remainder, rhs, ¬PowerOfTwo); { masm.andl(lhs, remainder); masm.jmp(&done); } masm.bind(¬PowerOfTwo); } // Since lhs >= 0, the sign-extension will be 0 masm.mov(ImmWord(0), edx); masm.idiv(rhs); } // Otherwise, we have to beware of two special cases: if (mir->canBeNegativeDividend()) { masm.jump(&done); masm.bind(&negative); // Prevent an integer overflow exception from -2147483648 % -1 masm.cmp32(lhs, Imm32(INT32_MIN)); overflow = new (alloc()) ModOverflowCheck(ins, rhs); masm.j(Assembler::Equal, overflow->entry()); masm.bind(overflow->rejoin()); masm.cdq(); masm.idiv(rhs); if (!mir->isTruncated()) { // A remainder of 0 means that the rval must be -0, which is a double. masm.test32(remainder, remainder); bailoutIf(Assembler::Zero, ins->snapshot()); } } masm.bind(&done); if (overflow) { addOutOfLineCode(overflow, mir); masm.bind(overflow->done()); } if (ool) { addOutOfLineCode(ool, mir); masm.bind(ool->rejoin()); } } void CodeGenerator::visitBitNotI(LBitNotI* ins) { const LAllocation* input = ins->getOperand(0); MOZ_ASSERT(!input->isConstant()); masm.notl(ToOperand(input)); } void CodeGenerator::visitBitOpI(LBitOpI* ins) { const LAllocation* lhs = ins->getOperand(0); const LAllocation* rhs = ins->getOperand(1); switch (ins->bitop()) { case JSOp::BitOr: if (rhs->isConstant()) { masm.orl(Imm32(ToInt32(rhs)), ToOperand(lhs)); } else { masm.orl(ToOperand(rhs), ToRegister(lhs)); } break; case JSOp::BitXor: if (rhs->isConstant()) { masm.xorl(Imm32(ToInt32(rhs)), ToOperand(lhs)); } else { masm.xorl(ToOperand(rhs), ToRegister(lhs)); } break; case JSOp::BitAnd: if (rhs->isConstant()) { masm.andl(Imm32(ToInt32(rhs)), ToOperand(lhs)); } else { masm.andl(ToOperand(rhs), ToRegister(lhs)); } break; default: MOZ_CRASH("unexpected binary opcode"); } } void CodeGenerator::visitBitOpI64(LBitOpI64* lir) { const LInt64Allocation lhs = lir->getInt64Operand(LBitOpI64::Lhs); const LInt64Allocation rhs = lir->getInt64Operand(LBitOpI64::Rhs); MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); switch (lir->bitop()) { case JSOp::BitOr: if (IsConstant(rhs)) { masm.or64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); } else { masm.or64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); } break; case JSOp::BitXor: if (IsConstant(rhs)) { masm.xor64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); } else { masm.xor64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); } break; case JSOp::BitAnd: if (IsConstant(rhs)) { masm.and64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); } else { masm.and64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); } break; default: MOZ_CRASH("unexpected binary opcode"); } } void CodeGenerator::visitShiftI(LShiftI* ins) { Register lhs = ToRegister(ins->lhs()); const LAllocation* rhs = ins->rhs(); if (rhs->isConstant()) { int32_t shift = ToInt32(rhs) & 0x1F; switch (ins->bitop()) { case JSOp::Lsh: if (shift) { masm.lshift32(Imm32(shift), lhs); } break; case JSOp::Rsh: if (shift) { masm.rshift32Arithmetic(Imm32(shift), lhs); } break; case JSOp::Ursh: if (shift) { masm.rshift32(Imm32(shift), lhs); } else if (ins->mir()->toUrsh()->fallible()) { // x >>> 0 can overflow. masm.test32(lhs, lhs); bailoutIf(Assembler::Signed, ins->snapshot()); } break; default: MOZ_CRASH("Unexpected shift op"); } } else { Register shift = ToRegister(rhs); switch (ins->bitop()) { case JSOp::Lsh: masm.lshift32(shift, lhs); break; case JSOp::Rsh: masm.rshift32Arithmetic(shift, lhs); break; case JSOp::Ursh: masm.rshift32(shift, lhs); if (ins->mir()->toUrsh()->fallible()) { // x >>> 0 can overflow. masm.test32(lhs, lhs); bailoutIf(Assembler::Signed, ins->snapshot()); } break; default: MOZ_CRASH("Unexpected shift op"); } } } void CodeGenerator::visitShiftI64(LShiftI64* lir) { const LInt64Allocation lhs = lir->getInt64Operand(LShiftI64::Lhs); LAllocation* rhs = lir->getOperand(LShiftI64::Rhs); MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); if (rhs->isConstant()) { int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F); switch (lir->bitop()) { case JSOp::Lsh: if (shift) { masm.lshift64(Imm32(shift), ToRegister64(lhs)); } break; case JSOp::Rsh: if (shift) { masm.rshift64Arithmetic(Imm32(shift), ToRegister64(lhs)); } break; case JSOp::Ursh: if (shift) { masm.rshift64(Imm32(shift), ToRegister64(lhs)); } break; default: MOZ_CRASH("Unexpected shift op"); } return; } Register shift = ToRegister(rhs); #ifdef JS_CODEGEN_X86 MOZ_ASSERT(shift == ecx); #endif switch (lir->bitop()) { case JSOp::Lsh: masm.lshift64(shift, ToRegister64(lhs)); break; case JSOp::Rsh: masm.rshift64Arithmetic(shift, ToRegister64(lhs)); break; case JSOp::Ursh: masm.rshift64(shift, ToRegister64(lhs)); break; default: MOZ_CRASH("Unexpected shift op"); } } void CodeGenerator::visitUrshD(LUrshD* ins) { Register lhs = ToRegister(ins->lhs()); MOZ_ASSERT(ToRegister(ins->temp()) == lhs); const LAllocation* rhs = ins->rhs(); FloatRegister out = ToFloatRegister(ins->output()); if (rhs->isConstant()) { int32_t shift = ToInt32(rhs) & 0x1F; if (shift) { masm.shrl(Imm32(shift), lhs); } } else { Register shift = ToRegister(rhs); masm.rshift32(shift, lhs); } masm.convertUInt32ToDouble(lhs, out); } Operand CodeGeneratorX86Shared::ToOperand(const LAllocation& a) { if (a.isGeneralReg()) { return Operand(a.toGeneralReg()->reg()); } if (a.isFloatReg()) { return Operand(a.toFloatReg()->reg()); } return Operand(ToAddress(a)); } Operand CodeGeneratorX86Shared::ToOperand(const LAllocation* a) { return ToOperand(*a); } Operand CodeGeneratorX86Shared::ToOperand(const LDefinition* def) { return ToOperand(def->output()); } MoveOperand CodeGeneratorX86Shared::toMoveOperand(LAllocation a) const { if (a.isGeneralReg()) { return MoveOperand(ToRegister(a)); } if (a.isFloatReg()) { return MoveOperand(ToFloatRegister(a)); } MoveOperand::Kind kind = a.isStackArea() ? MoveOperand::EFFECTIVE_ADDRESS : MoveOperand::MEMORY; return MoveOperand(ToAddress(a), kind); } class OutOfLineTableSwitch : public OutOfLineCodeBase { MTableSwitch* mir_; CodeLabel jumpLabel_; void accept(CodeGeneratorX86Shared* codegen) override { codegen->visitOutOfLineTableSwitch(this); } public: explicit OutOfLineTableSwitch(MTableSwitch* mir) : mir_(mir) {} MTableSwitch* mir() const { return mir_; } CodeLabel* jumpLabel() { return &jumpLabel_; } }; void CodeGeneratorX86Shared::visitOutOfLineTableSwitch( OutOfLineTableSwitch* ool) { MTableSwitch* mir = ool->mir(); masm.haltingAlign(sizeof(void*)); masm.bind(ool->jumpLabel()); masm.addCodeLabel(*ool->jumpLabel()); for (size_t i = 0; i < mir->numCases(); i++) { LBlock* caseblock = skipTrivialBlocks(mir->getCase(i))->lir(); Label* caseheader = caseblock->label(); uint32_t caseoffset = caseheader->offset(); // The entries of the jump table need to be absolute addresses and thus // must be patched after codegen is finished. CodeLabel cl; masm.writeCodePointer(&cl); cl.target()->bind(caseoffset); masm.addCodeLabel(cl); } } void CodeGeneratorX86Shared::emitTableSwitchDispatch(MTableSwitch* mir, Register index, Register base) { Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label(); // Lower value with low value if (mir->low() != 0) { masm.subl(Imm32(mir->low()), index); } // Jump to default case if input is out of range int32_t cases = mir->numCases(); masm.cmp32(index, Imm32(cases)); masm.j(AssemblerX86Shared::AboveOrEqual, defaultcase); // To fill in the CodeLabels for the case entries, we need to first // generate the case entries (we don't yet know their offsets in the // instruction stream). OutOfLineTableSwitch* ool = new (alloc()) OutOfLineTableSwitch(mir); addOutOfLineCode(ool, mir); // Compute the position where a pointer to the right case stands. masm.mov(ool->jumpLabel(), base); BaseIndex pointer(base, index, ScalePointer); // Jump to the right case masm.branchToComputedAddress(pointer); } void CodeGenerator::visitMathD(LMathD* math) { FloatRegister lhs = ToFloatRegister(math->lhs()); Operand rhs = ToOperand(math->rhs()); FloatRegister output = ToFloatRegister(math->output()); switch (math->jsop()) { case JSOp::Add: masm.vaddsd(rhs, lhs, output); break; case JSOp::Sub: masm.vsubsd(rhs, lhs, output); break; case JSOp::Mul: masm.vmulsd(rhs, lhs, output); break; case JSOp::Div: masm.vdivsd(rhs, lhs, output); break; default: MOZ_CRASH("unexpected opcode"); } } void CodeGenerator::visitMathF(LMathF* math) { FloatRegister lhs = ToFloatRegister(math->lhs()); Operand rhs = ToOperand(math->rhs()); FloatRegister output = ToFloatRegister(math->output()); switch (math->jsop()) { case JSOp::Add: masm.vaddss(rhs, lhs, output); break; case JSOp::Sub: masm.vsubss(rhs, lhs, output); break; case JSOp::Mul: masm.vmulss(rhs, lhs, output); break; case JSOp::Div: masm.vdivss(rhs, lhs, output); break; default: MOZ_CRASH("unexpected opcode"); } } void CodeGenerator::visitNearbyInt(LNearbyInt* lir) { FloatRegister input = ToFloatRegister(lir->input()); FloatRegister output = ToFloatRegister(lir->output()); RoundingMode roundingMode = lir->mir()->roundingMode(); masm.nearbyIntDouble(roundingMode, input, output); } void CodeGenerator::visitNearbyIntF(LNearbyIntF* lir) { FloatRegister input = ToFloatRegister(lir->input()); FloatRegister output = ToFloatRegister(lir->output()); RoundingMode roundingMode = lir->mir()->roundingMode(); masm.nearbyIntFloat32(roundingMode, input, output); } void CodeGenerator::visitEffectiveAddress(LEffectiveAddress* ins) { const MEffectiveAddress* mir = ins->mir(); Register base = ToRegister(ins->base()); Register index = ToRegister(ins->index()); Register output = ToRegister(ins->output()); masm.leal(Operand(base, index, mir->scale(), mir->displacement()), output); } void CodeGeneratorX86Shared::generateInvalidateEpilogue() { // Ensure that there is enough space in the buffer for the OsiPoint // patching to occur. Otherwise, we could overwrite the invalidation // epilogue. for (size_t i = 0; i < sizeof(void*); i += Assembler::NopSize()) { masm.nop(); } masm.bind(&invalidate_); // Push the Ion script onto the stack (when we determine what that pointer // is). invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1))); // Jump to the invalidator which will replace the current frame. TrampolinePtr thunk = gen->jitRuntime()->getInvalidationThunk(); masm.jump(thunk); } void CodeGenerator::visitNegI(LNegI* ins) { Register input = ToRegister(ins->input()); MOZ_ASSERT(input == ToRegister(ins->output())); masm.neg32(input); } void CodeGenerator::visitNegI64(LNegI64* ins) { Register64 input = ToRegister64(ins->getInt64Operand(0)); MOZ_ASSERT(input == ToOutRegister64(ins)); masm.neg64(input); } void CodeGenerator::visitNegD(LNegD* ins) { FloatRegister input = ToFloatRegister(ins->input()); MOZ_ASSERT(input == ToFloatRegister(ins->output())); masm.negateDouble(input); } void CodeGenerator::visitNegF(LNegF* ins) { FloatRegister input = ToFloatRegister(ins->input()); MOZ_ASSERT(input == ToFloatRegister(ins->output())); masm.negateFloat(input); } void CodeGenerator::visitCompareExchangeTypedArrayElement( LCompareExchangeTypedArrayElement* lir) { Register elements = ToRegister(lir->elements()); AnyRegister output = ToAnyRegister(lir->output()); Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp()); Register oldval = ToRegister(lir->oldval()); Register newval = ToRegister(lir->newval()); Scalar::Type arrayType = lir->mir()->arrayType(); if (lir->index()->isConstant()) { Address dest = ToAddress(elements, lir->index(), arrayType); masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval, newval, temp, output); } else { BaseIndex dest(elements, ToRegister(lir->index()), ScaleFromScalarType(arrayType)); masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval, newval, temp, output); } } void CodeGenerator::visitAtomicExchangeTypedArrayElement( LAtomicExchangeTypedArrayElement* lir) { Register elements = ToRegister(lir->elements()); AnyRegister output = ToAnyRegister(lir->output()); Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp()); Register value = ToRegister(lir->value()); Scalar::Type arrayType = lir->mir()->arrayType(); if (lir->index()->isConstant()) { Address dest = ToAddress(elements, lir->index(), arrayType); masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp, output); } else { BaseIndex dest(elements, ToRegister(lir->index()), ScaleFromScalarType(arrayType)); masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp, output); } } template static inline void AtomicBinopToTypedArray(MacroAssembler& masm, AtomicOp op, Scalar::Type arrayType, const LAllocation* value, const T& mem, Register temp1, Register temp2, AnyRegister output) { if (value->isConstant()) { masm.atomicFetchOpJS(arrayType, Synchronization::Full(), op, Imm32(ToInt32(value)), mem, temp1, temp2, output); } else { masm.atomicFetchOpJS(arrayType, Synchronization::Full(), op, ToRegister(value), mem, temp1, temp2, output); } } void CodeGenerator::visitAtomicTypedArrayElementBinop( LAtomicTypedArrayElementBinop* lir) { MOZ_ASSERT(!lir->mir()->isForEffect()); AnyRegister output = ToAnyRegister(lir->output()); Register elements = ToRegister(lir->elements()); Register temp1 = lir->temp1()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp1()); Register temp2 = lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2()); const LAllocation* value = lir->value(); Scalar::Type arrayType = lir->mir()->arrayType(); if (lir->index()->isConstant()) { Address mem = ToAddress(elements, lir->index(), arrayType); AtomicBinopToTypedArray(masm, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output); } else { BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromScalarType(arrayType)); AtomicBinopToTypedArray(masm, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output); } } template static inline void AtomicBinopToTypedArray(MacroAssembler& masm, Scalar::Type arrayType, AtomicOp op, const LAllocation* value, const T& mem) { if (value->isConstant()) { masm.atomicEffectOpJS(arrayType, Synchronization::Full(), op, Imm32(ToInt32(value)), mem, InvalidReg); } else { masm.atomicEffectOpJS(arrayType, Synchronization::Full(), op, ToRegister(value), mem, InvalidReg); } } void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect( LAtomicTypedArrayElementBinopForEffect* lir) { MOZ_ASSERT(lir->mir()->isForEffect()); Register elements = ToRegister(lir->elements()); const LAllocation* value = lir->value(); Scalar::Type arrayType = lir->mir()->arrayType(); if (lir->index()->isConstant()) { Address mem = ToAddress(elements, lir->index(), arrayType); AtomicBinopToTypedArray(masm, arrayType, lir->mir()->operation(), value, mem); } else { BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromScalarType(arrayType)); AtomicBinopToTypedArray(masm, arrayType, lir->mir()->operation(), value, mem); } } void CodeGenerator::visitMemoryBarrier(LMemoryBarrier* ins) { if (ins->type() & MembarStoreLoad) { masm.storeLoadFence(); } } void CodeGeneratorX86Shared::visitOutOfLineWasmTruncateCheck( OutOfLineWasmTruncateCheck* ool) { FloatRegister input = ool->input(); Register output = ool->output(); Register64 output64 = ool->output64(); MIRType fromType = ool->fromType(); MIRType toType = ool->toType(); Label* oolRejoin = ool->rejoin(); TruncFlags flags = ool->flags(); wasm::BytecodeOffset off = ool->bytecodeOffset(); if (fromType == MIRType::Float32) { if (toType == MIRType::Int32) { masm.oolWasmTruncateCheckF32ToI32(input, output, flags, off, oolRejoin); } else if (toType == MIRType::Int64) { masm.oolWasmTruncateCheckF32ToI64(input, output64, flags, off, oolRejoin); } else { MOZ_CRASH("unexpected type"); } } else if (fromType == MIRType::Double) { if (toType == MIRType::Int32) { masm.oolWasmTruncateCheckF64ToI32(input, output, flags, off, oolRejoin); } else if (toType == MIRType::Int64) { masm.oolWasmTruncateCheckF64ToI64(input, output64, flags, off, oolRejoin); } else { MOZ_CRASH("unexpected type"); } } else { MOZ_CRASH("unexpected type"); } } void CodeGeneratorX86Shared::canonicalizeIfDeterministic( Scalar::Type type, const LAllocation* value) { #ifdef DEBUG if (!js::SupportDifferentialTesting()) { return; } switch (type) { case Scalar::Float32: { FloatRegister in = ToFloatRegister(value); masm.canonicalizeFloatIfDeterministic(in); break; } case Scalar::Float64: { FloatRegister in = ToFloatRegister(value); masm.canonicalizeDoubleIfDeterministic(in); break; } default: { // Other types don't need canonicalization. break; } } #endif // DEBUG } template Operand CodeGeneratorX86Shared::toMemoryAccessOperand(T* lir, int32_t disp) { const LAllocation* ptr = lir->ptr(); #ifdef JS_CODEGEN_X86 const LAllocation* memoryBase = lir->memoryBase(); Operand destAddr = ptr->isBogus() ? Operand(ToRegister(memoryBase), disp) : Operand(ToRegister(memoryBase), ToRegister(ptr), TimesOne, disp); #else Operand destAddr = ptr->isBogus() ? Operand(HeapReg, disp) : Operand(HeapReg, ToRegister(ptr), TimesOne, disp); #endif return destAddr; } void CodeGenerator::visitCopySignF(LCopySignF* lir) { FloatRegister lhs = ToFloatRegister(lir->getOperand(0)); FloatRegister rhs = ToFloatRegister(lir->getOperand(1)); FloatRegister out = ToFloatRegister(lir->output()); if (lhs == rhs) { if (lhs != out) { masm.moveFloat32(lhs, out); } return; } masm.copySignFloat32(lhs, rhs, out); } void CodeGenerator::visitCopySignD(LCopySignD* lir) { FloatRegister lhs = ToFloatRegister(lir->getOperand(0)); FloatRegister rhs = ToFloatRegister(lir->getOperand(1)); FloatRegister out = ToFloatRegister(lir->output()); if (lhs == rhs) { if (lhs != out) { masm.moveDouble(lhs, out); } return; } masm.copySignDouble(lhs, rhs, out); } void CodeGenerator::visitRotateI64(LRotateI64* lir) { MRotate* mir = lir->mir(); LAllocation* count = lir->count(); Register64 input = ToRegister64(lir->input()); Register64 output = ToOutRegister64(lir); Register temp = ToTempRegisterOrInvalid(lir->temp()); MOZ_ASSERT(input == output); if (count->isConstant()) { int32_t c = int32_t(count->toConstant()->toInt64() & 0x3F); if (!c) { return; } if (mir->isLeftRotate()) { masm.rotateLeft64(Imm32(c), input, output, temp); } else { masm.rotateRight64(Imm32(c), input, output, temp); } } else { if (mir->isLeftRotate()) { masm.rotateLeft64(ToRegister(count), input, output, temp); } else { masm.rotateRight64(ToRegister(count), input, output, temp); } } } void CodeGenerator::visitPopcntI64(LPopcntI64* lir) { Register64 input = ToRegister64(lir->getInt64Operand(0)); Register64 output = ToOutRegister64(lir); Register temp = InvalidReg; if (!AssemblerX86Shared::HasPOPCNT()) { temp = ToRegister(lir->getTemp(0)); } masm.popcnt64(input, output, temp); } void CodeGenerator::visitSimd128(LSimd128* ins) { #ifdef ENABLE_WASM_SIMD const LDefinition* out = ins->getDef(0); masm.loadConstantSimd128(ins->simd128(), ToFloatRegister(out)); #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmTernarySimd128(LWasmTernarySimd128* ins) { #ifdef ENABLE_WASM_SIMD switch (ins->simdOp()) { case wasm::SimdOp::V128Bitselect: { FloatRegister lhsDest = ToFloatRegister(ins->v0()); FloatRegister rhs = ToFloatRegister(ins->v1()); FloatRegister control = ToFloatRegister(ins->v2()); FloatRegister temp = ToFloatRegister(ins->temp()); masm.bitwiseSelectSimd128(control, lhsDest, rhs, lhsDest, temp); break; } case wasm::SimdOp::F32x4RelaxedFma: masm.fmaFloat32x4(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), ToFloatRegister(ins->v2())); break; case wasm::SimdOp::F32x4RelaxedFnma: masm.fnmaFloat32x4(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), ToFloatRegister(ins->v2())); break; case wasm::SimdOp::F64x2RelaxedFma: masm.fmaFloat64x2(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), ToFloatRegister(ins->v2())); break; case wasm::SimdOp::F64x2RelaxedFnma: masm.fnmaFloat64x2(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), ToFloatRegister(ins->v2())); break; case wasm::SimdOp::I8x16RelaxedLaneSelect: case wasm::SimdOp::I16x8RelaxedLaneSelect: case wasm::SimdOp::I32x4RelaxedLaneSelect: case wasm::SimdOp::I64x2RelaxedLaneSelect: { FloatRegister lhs = ToFloatRegister(ins->v0()); FloatRegister rhs = ToFloatRegister(ins->v1()); FloatRegister mask = ToFloatRegister(ins->v2()); FloatRegister dest = ToFloatRegister(ins->output()); masm.laneSelectSimd128(mask, lhs, rhs, dest); break; } case wasm::SimdOp::I32x4DotI8x16I7x16AddS: masm.dotInt8x16Int7x16ThenAdd(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), ToFloatRegister(ins->v2())); break; case wasm::SimdOp::F32x4RelaxedDotBF16x8AddF32x4: masm.dotBFloat16x8ThenAdd( ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), ToFloatRegister(ins->v2()), ToFloatRegister(ins->temp())); break; default: MOZ_CRASH("NYI"); } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) { #ifdef ENABLE_WASM_SIMD FloatRegister lhs = ToFloatRegister(ins->lhsDest()); FloatRegister rhs = ToFloatRegister(ins->rhs()); FloatRegister temp1 = ToTempFloatRegisterOrInvalid(ins->getTemp(0)); FloatRegister temp2 = ToTempFloatRegisterOrInvalid(ins->getTemp(1)); FloatRegister dest = ToFloatRegister(ins->output()); switch (ins->simdOp()) { case wasm::SimdOp::V128And: masm.bitwiseAndSimd128(lhs, rhs, dest); break; case wasm::SimdOp::V128Or: masm.bitwiseOrSimd128(lhs, rhs, dest); break; case wasm::SimdOp::V128Xor: masm.bitwiseXorSimd128(lhs, rhs, dest); break; case wasm::SimdOp::V128AndNot: // x86/x64 specific: The CPU provides ~A & B. The operands were swapped // during lowering, and we'll compute A & ~B here as desired. masm.bitwiseNotAndSimd128(lhs, rhs, dest); break; case wasm::SimdOp::I8x16AvgrU: masm.unsignedAverageInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I16x8AvgrU: masm.unsignedAverageInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I8x16Add: masm.addInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I8x16AddSatS: masm.addSatInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I8x16AddSatU: masm.unsignedAddSatInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I8x16Sub: masm.subInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I8x16SubSatS: masm.subSatInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I8x16SubSatU: masm.unsignedSubSatInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I8x16MinS: masm.minInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I8x16MinU: masm.unsignedMinInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I8x16MaxS: masm.maxInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I8x16MaxU: masm.unsignedMaxInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I16x8Add: masm.addInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8AddSatS: masm.addSatInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8AddSatU: masm.unsignedAddSatInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8Sub: masm.subInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8SubSatS: masm.subSatInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8SubSatU: masm.unsignedSubSatInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8Mul: masm.mulInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8MinS: masm.minInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8MinU: masm.unsignedMinInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8MaxS: masm.maxInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8MaxU: masm.unsignedMaxInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I32x4Add: masm.addInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I32x4Sub: masm.subInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I32x4Mul: masm.mulInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I32x4MinS: masm.minInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I32x4MinU: masm.unsignedMinInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I32x4MaxS: masm.maxInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I32x4MaxU: masm.unsignedMaxInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I64x2Add: masm.addInt64x2(lhs, rhs, dest); break; case wasm::SimdOp::I64x2Sub: masm.subInt64x2(lhs, rhs, dest); break; case wasm::SimdOp::I64x2Mul: masm.mulInt64x2(lhs, rhs, dest, temp1); break; case wasm::SimdOp::F32x4Add: masm.addFloat32x4(lhs, rhs, dest); break; case wasm::SimdOp::F32x4Sub: masm.subFloat32x4(lhs, rhs, dest); break; case wasm::SimdOp::F32x4Mul: masm.mulFloat32x4(lhs, rhs, dest); break; case wasm::SimdOp::F32x4Div: masm.divFloat32x4(lhs, rhs, dest); break; case wasm::SimdOp::F32x4Min: masm.minFloat32x4(lhs, rhs, dest, temp1, temp2); break; case wasm::SimdOp::F32x4Max: masm.maxFloat32x4(lhs, rhs, dest, temp1, temp2); break; case wasm::SimdOp::F64x2Add: masm.addFloat64x2(lhs, rhs, dest); break; case wasm::SimdOp::F64x2Sub: masm.subFloat64x2(lhs, rhs, dest); break; case wasm::SimdOp::F64x2Mul: masm.mulFloat64x2(lhs, rhs, dest); break; case wasm::SimdOp::F64x2Div: masm.divFloat64x2(lhs, rhs, dest); break; case wasm::SimdOp::F64x2Min: masm.minFloat64x2(lhs, rhs, dest, temp1, temp2); break; case wasm::SimdOp::F64x2Max: masm.maxFloat64x2(lhs, rhs, dest, temp1, temp2); break; case wasm::SimdOp::I8x16Swizzle: masm.swizzleInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I8x16RelaxedSwizzle: masm.swizzleInt8x16Relaxed(lhs, rhs, dest); break; case wasm::SimdOp::I8x16NarrowI16x8S: masm.narrowInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I8x16NarrowI16x8U: masm.unsignedNarrowInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8NarrowI32x4S: masm.narrowInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I16x8NarrowI32x4U: masm.unsignedNarrowInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I8x16Eq: masm.compareInt8x16(Assembler::Equal, lhs, rhs, dest); break; case wasm::SimdOp::I8x16Ne: masm.compareInt8x16(Assembler::NotEqual, lhs, rhs, dest); break; case wasm::SimdOp::I8x16LtS: masm.compareInt8x16(Assembler::LessThan, lhs, rhs, dest); break; case wasm::SimdOp::I8x16GtS: masm.compareInt8x16(Assembler::GreaterThan, lhs, rhs, dest); break; case wasm::SimdOp::I8x16LeS: masm.compareInt8x16(Assembler::LessThanOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I8x16GeS: masm.compareInt8x16(Assembler::GreaterThanOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I8x16LtU: masm.compareInt8x16(Assembler::Below, lhs, rhs, dest); break; case wasm::SimdOp::I8x16GtU: masm.compareInt8x16(Assembler::Above, lhs, rhs, dest); break; case wasm::SimdOp::I8x16LeU: masm.compareInt8x16(Assembler::BelowOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I8x16GeU: masm.compareInt8x16(Assembler::AboveOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I16x8Eq: masm.compareInt16x8(Assembler::Equal, lhs, rhs, dest); break; case wasm::SimdOp::I16x8Ne: masm.compareInt16x8(Assembler::NotEqual, lhs, rhs, dest); break; case wasm::SimdOp::I16x8LtS: masm.compareInt16x8(Assembler::LessThan, lhs, rhs, dest); break; case wasm::SimdOp::I16x8GtS: masm.compareInt16x8(Assembler::GreaterThan, lhs, rhs, dest); break; case wasm::SimdOp::I16x8LeS: masm.compareInt16x8(Assembler::LessThanOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I16x8GeS: masm.compareInt16x8(Assembler::GreaterThanOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I16x8LtU: masm.compareInt16x8(Assembler::Below, lhs, rhs, dest); break; case wasm::SimdOp::I16x8GtU: masm.compareInt16x8(Assembler::Above, lhs, rhs, dest); break; case wasm::SimdOp::I16x8LeU: masm.compareInt16x8(Assembler::BelowOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I16x8GeU: masm.compareInt16x8(Assembler::AboveOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I32x4Eq: masm.compareInt32x4(Assembler::Equal, lhs, rhs, dest); break; case wasm::SimdOp::I32x4Ne: masm.compareInt32x4(Assembler::NotEqual, lhs, rhs, dest); break; case wasm::SimdOp::I32x4LtS: masm.compareInt32x4(Assembler::LessThan, lhs, rhs, dest); break; case wasm::SimdOp::I32x4GtS: masm.compareInt32x4(Assembler::GreaterThan, lhs, rhs, dest); break; case wasm::SimdOp::I32x4LeS: masm.compareInt32x4(Assembler::LessThanOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I32x4GeS: masm.compareInt32x4(Assembler::GreaterThanOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I32x4LtU: masm.compareInt32x4(Assembler::Below, lhs, rhs, dest); break; case wasm::SimdOp::I32x4GtU: masm.compareInt32x4(Assembler::Above, lhs, rhs, dest); break; case wasm::SimdOp::I32x4LeU: masm.compareInt32x4(Assembler::BelowOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I32x4GeU: masm.compareInt32x4(Assembler::AboveOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I64x2Eq: masm.compareForEqualityInt64x2(Assembler::Equal, lhs, rhs, dest); break; case wasm::SimdOp::I64x2Ne: masm.compareForEqualityInt64x2(Assembler::NotEqual, lhs, rhs, dest); break; case wasm::SimdOp::I64x2LtS: masm.compareForOrderingInt64x2(Assembler::LessThan, lhs, rhs, dest, temp1, temp2); break; case wasm::SimdOp::I64x2GtS: masm.compareForOrderingInt64x2(Assembler::GreaterThan, lhs, rhs, dest, temp1, temp2); break; case wasm::SimdOp::I64x2LeS: masm.compareForOrderingInt64x2(Assembler::LessThanOrEqual, lhs, rhs, dest, temp1, temp2); break; case wasm::SimdOp::I64x2GeS: masm.compareForOrderingInt64x2(Assembler::GreaterThanOrEqual, lhs, rhs, dest, temp1, temp2); break; case wasm::SimdOp::F32x4Eq: masm.compareFloat32x4(Assembler::Equal, lhs, rhs, dest); break; case wasm::SimdOp::F32x4Ne: masm.compareFloat32x4(Assembler::NotEqual, lhs, rhs, dest); break; case wasm::SimdOp::F32x4Lt: masm.compareFloat32x4(Assembler::LessThan, lhs, rhs, dest); break; case wasm::SimdOp::F32x4Le: masm.compareFloat32x4(Assembler::LessThanOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::F64x2Eq: masm.compareFloat64x2(Assembler::Equal, lhs, rhs, dest); break; case wasm::SimdOp::F64x2Ne: masm.compareFloat64x2(Assembler::NotEqual, lhs, rhs, dest); break; case wasm::SimdOp::F64x2Lt: masm.compareFloat64x2(Assembler::LessThan, lhs, rhs, dest); break; case wasm::SimdOp::F64x2Le: masm.compareFloat64x2(Assembler::LessThanOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::F32x4PMax: // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs. masm.pseudoMaxFloat32x4(lhs, rhs, dest); break; case wasm::SimdOp::F32x4PMin: // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs. masm.pseudoMinFloat32x4(lhs, rhs, dest); break; case wasm::SimdOp::F64x2PMax: // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs. masm.pseudoMaxFloat64x2(lhs, rhs, dest); break; case wasm::SimdOp::F64x2PMin: // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs. masm.pseudoMinFloat64x2(lhs, rhs, dest); break; case wasm::SimdOp::I32x4DotI16x8S: masm.widenDotInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8ExtmulLowI8x16S: masm.extMulLowInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I16x8ExtmulHighI8x16S: masm.extMulHighInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I16x8ExtmulLowI8x16U: masm.unsignedExtMulLowInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I16x8ExtmulHighI8x16U: masm.unsignedExtMulHighInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I32x4ExtmulLowI16x8S: masm.extMulLowInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I32x4ExtmulHighI16x8S: masm.extMulHighInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I32x4ExtmulLowI16x8U: masm.unsignedExtMulLowInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I32x4ExtmulHighI16x8U: masm.unsignedExtMulHighInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I64x2ExtmulLowI32x4S: masm.extMulLowInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I64x2ExtmulHighI32x4S: masm.extMulHighInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I64x2ExtmulLowI32x4U: masm.unsignedExtMulLowInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I64x2ExtmulHighI32x4U: masm.unsignedExtMulHighInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I16x8Q15MulrSatS: masm.q15MulrSatInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::F32x4RelaxedMin: masm.minFloat32x4Relaxed(lhs, rhs, dest); break; case wasm::SimdOp::F32x4RelaxedMax: masm.maxFloat32x4Relaxed(lhs, rhs, dest); break; case wasm::SimdOp::F64x2RelaxedMin: masm.minFloat64x2Relaxed(lhs, rhs, dest); break; case wasm::SimdOp::F64x2RelaxedMax: masm.maxFloat64x2Relaxed(lhs, rhs, dest); break; case wasm::SimdOp::I16x8RelaxedQ15MulrS: masm.q15MulrInt16x8Relaxed(lhs, rhs, dest); break; case wasm::SimdOp::I16x8DotI8x16I7x16S: masm.dotInt8x16Int7x16(lhs, rhs, dest); break; case wasm::SimdOp::MozPMADDUBSW: masm.vpmaddubsw(rhs, lhs, dest); break; default: MOZ_CRASH("Binary SimdOp not implemented"); } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmBinarySimd128WithConstant( LWasmBinarySimd128WithConstant* ins) { #ifdef ENABLE_WASM_SIMD FloatRegister lhs = ToFloatRegister(ins->lhsDest()); const SimdConstant& rhs = ins->rhs(); FloatRegister dest = ToFloatRegister(ins->output()); FloatRegister temp = ToTempFloatRegisterOrInvalid(ins->getTemp(0)); switch (ins->simdOp()) { case wasm::SimdOp::I8x16Add: masm.addInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I16x8Add: masm.addInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I32x4Add: masm.addInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I64x2Add: masm.addInt64x2(lhs, rhs, dest); break; case wasm::SimdOp::I8x16Sub: masm.subInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I16x8Sub: masm.subInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I32x4Sub: masm.subInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I64x2Sub: masm.subInt64x2(lhs, rhs, dest); break; case wasm::SimdOp::I16x8Mul: masm.mulInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I32x4Mul: masm.mulInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I8x16AddSatS: masm.addSatInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I8x16AddSatU: masm.unsignedAddSatInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I16x8AddSatS: masm.addSatInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8AddSatU: masm.unsignedAddSatInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I8x16SubSatS: masm.subSatInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I8x16SubSatU: masm.unsignedSubSatInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I16x8SubSatS: masm.subSatInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8SubSatU: masm.unsignedSubSatInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I8x16MinS: masm.minInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I8x16MinU: masm.unsignedMinInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I16x8MinS: masm.minInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8MinU: masm.unsignedMinInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I32x4MinS: masm.minInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I32x4MinU: masm.unsignedMinInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I8x16MaxS: masm.maxInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I8x16MaxU: masm.unsignedMaxInt8x16(lhs, rhs, dest); break; case wasm::SimdOp::I16x8MaxS: masm.maxInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8MaxU: masm.unsignedMaxInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I32x4MaxS: masm.maxInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I32x4MaxU: masm.unsignedMaxInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::V128And: masm.bitwiseAndSimd128(lhs, rhs, dest); break; case wasm::SimdOp::V128Or: masm.bitwiseOrSimd128(lhs, rhs, dest); break; case wasm::SimdOp::V128Xor: masm.bitwiseXorSimd128(lhs, rhs, dest); break; case wasm::SimdOp::I8x16Eq: masm.compareInt8x16(Assembler::Equal, lhs, rhs, dest); break; case wasm::SimdOp::I8x16Ne: masm.compareInt8x16(Assembler::NotEqual, lhs, rhs, dest); break; case wasm::SimdOp::I8x16GtS: masm.compareInt8x16(Assembler::GreaterThan, lhs, rhs, dest); break; case wasm::SimdOp::I8x16LeS: masm.compareInt8x16(Assembler::LessThanOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I16x8Eq: masm.compareInt16x8(Assembler::Equal, lhs, rhs, dest); break; case wasm::SimdOp::I16x8Ne: masm.compareInt16x8(Assembler::NotEqual, lhs, rhs, dest); break; case wasm::SimdOp::I16x8GtS: masm.compareInt16x8(Assembler::GreaterThan, lhs, rhs, dest); break; case wasm::SimdOp::I16x8LeS: masm.compareInt16x8(Assembler::LessThanOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I32x4Eq: masm.compareInt32x4(Assembler::Equal, lhs, rhs, dest); break; case wasm::SimdOp::I32x4Ne: masm.compareInt32x4(Assembler::NotEqual, lhs, rhs, dest); break; case wasm::SimdOp::I32x4GtS: masm.compareInt32x4(Assembler::GreaterThan, lhs, rhs, dest); break; case wasm::SimdOp::I32x4LeS: masm.compareInt32x4(Assembler::LessThanOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I64x2Mul: masm.mulInt64x2(lhs, rhs, dest, temp); break; case wasm::SimdOp::F32x4Eq: masm.compareFloat32x4(Assembler::Equal, lhs, rhs, dest); break; case wasm::SimdOp::F32x4Ne: masm.compareFloat32x4(Assembler::NotEqual, lhs, rhs, dest); break; case wasm::SimdOp::F32x4Lt: masm.compareFloat32x4(Assembler::LessThan, lhs, rhs, dest); break; case wasm::SimdOp::F32x4Le: masm.compareFloat32x4(Assembler::LessThanOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::F64x2Eq: masm.compareFloat64x2(Assembler::Equal, lhs, rhs, dest); break; case wasm::SimdOp::F64x2Ne: masm.compareFloat64x2(Assembler::NotEqual, lhs, rhs, dest); break; case wasm::SimdOp::F64x2Lt: masm.compareFloat64x2(Assembler::LessThan, lhs, rhs, dest); break; case wasm::SimdOp::F64x2Le: masm.compareFloat64x2(Assembler::LessThanOrEqual, lhs, rhs, dest); break; case wasm::SimdOp::I32x4DotI16x8S: masm.widenDotInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::F32x4Add: masm.addFloat32x4(lhs, rhs, dest); break; case wasm::SimdOp::F64x2Add: masm.addFloat64x2(lhs, rhs, dest); break; case wasm::SimdOp::F32x4Sub: masm.subFloat32x4(lhs, rhs, dest); break; case wasm::SimdOp::F64x2Sub: masm.subFloat64x2(lhs, rhs, dest); break; case wasm::SimdOp::F32x4Div: masm.divFloat32x4(lhs, rhs, dest); break; case wasm::SimdOp::F64x2Div: masm.divFloat64x2(lhs, rhs, dest); break; case wasm::SimdOp::F32x4Mul: masm.mulFloat32x4(lhs, rhs, dest); break; case wasm::SimdOp::F64x2Mul: masm.mulFloat64x2(lhs, rhs, dest); break; case wasm::SimdOp::I8x16NarrowI16x8S: masm.narrowInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I8x16NarrowI16x8U: masm.unsignedNarrowInt16x8(lhs, rhs, dest); break; case wasm::SimdOp::I16x8NarrowI32x4S: masm.narrowInt32x4(lhs, rhs, dest); break; case wasm::SimdOp::I16x8NarrowI32x4U: masm.unsignedNarrowInt32x4(lhs, rhs, dest); break; default: MOZ_CRASH("Binary SimdOp with constant not implemented"); } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmVariableShiftSimd128( LWasmVariableShiftSimd128* ins) { #ifdef ENABLE_WASM_SIMD FloatRegister lhsDest = ToFloatRegister(ins->lhsDest()); Register rhs = ToRegister(ins->rhs()); FloatRegister temp = ToTempFloatRegisterOrInvalid(ins->getTemp(0)); MOZ_ASSERT(ToFloatRegister(ins->output()) == lhsDest); switch (ins->simdOp()) { case wasm::SimdOp::I8x16Shl: masm.leftShiftInt8x16(rhs, lhsDest, temp); break; case wasm::SimdOp::I8x16ShrS: masm.rightShiftInt8x16(rhs, lhsDest, temp); break; case wasm::SimdOp::I8x16ShrU: masm.unsignedRightShiftInt8x16(rhs, lhsDest, temp); break; case wasm::SimdOp::I16x8Shl: masm.leftShiftInt16x8(rhs, lhsDest); break; case wasm::SimdOp::I16x8ShrS: masm.rightShiftInt16x8(rhs, lhsDest); break; case wasm::SimdOp::I16x8ShrU: masm.unsignedRightShiftInt16x8(rhs, lhsDest); break; case wasm::SimdOp::I32x4Shl: masm.leftShiftInt32x4(rhs, lhsDest); break; case wasm::SimdOp::I32x4ShrS: masm.rightShiftInt32x4(rhs, lhsDest); break; case wasm::SimdOp::I32x4ShrU: masm.unsignedRightShiftInt32x4(rhs, lhsDest); break; case wasm::SimdOp::I64x2Shl: masm.leftShiftInt64x2(rhs, lhsDest); break; case wasm::SimdOp::I64x2ShrS: masm.rightShiftInt64x2(rhs, lhsDest, temp); break; case wasm::SimdOp::I64x2ShrU: masm.unsignedRightShiftInt64x2(rhs, lhsDest); break; default: MOZ_CRASH("Shift SimdOp not implemented"); } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmConstantShiftSimd128( LWasmConstantShiftSimd128* ins) { #ifdef ENABLE_WASM_SIMD FloatRegister src = ToFloatRegister(ins->src()); FloatRegister dest = ToFloatRegister(ins->output()); int32_t shift = ins->shift(); if (shift == 0) { masm.moveSimd128(src, dest); return; } switch (ins->simdOp()) { case wasm::SimdOp::I8x16Shl: masm.leftShiftInt8x16(Imm32(shift), src, dest); break; case wasm::SimdOp::I8x16ShrS: masm.rightShiftInt8x16(Imm32(shift), src, dest); break; case wasm::SimdOp::I8x16ShrU: masm.unsignedRightShiftInt8x16(Imm32(shift), src, dest); break; case wasm::SimdOp::I16x8Shl: masm.leftShiftInt16x8(Imm32(shift), src, dest); break; case wasm::SimdOp::I16x8ShrS: masm.rightShiftInt16x8(Imm32(shift), src, dest); break; case wasm::SimdOp::I16x8ShrU: masm.unsignedRightShiftInt16x8(Imm32(shift), src, dest); break; case wasm::SimdOp::I32x4Shl: masm.leftShiftInt32x4(Imm32(shift), src, dest); break; case wasm::SimdOp::I32x4ShrS: masm.rightShiftInt32x4(Imm32(shift), src, dest); break; case wasm::SimdOp::I32x4ShrU: masm.unsignedRightShiftInt32x4(Imm32(shift), src, dest); break; case wasm::SimdOp::I64x2Shl: masm.leftShiftInt64x2(Imm32(shift), src, dest); break; case wasm::SimdOp::I64x2ShrS: masm.rightShiftInt64x2(Imm32(shift), src, dest); break; case wasm::SimdOp::I64x2ShrU: masm.unsignedRightShiftInt64x2(Imm32(shift), src, dest); break; default: MOZ_CRASH("Shift SimdOp not implemented"); } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmSignReplicationSimd128( LWasmSignReplicationSimd128* ins) { #ifdef ENABLE_WASM_SIMD FloatRegister src = ToFloatRegister(ins->src()); FloatRegister dest = ToFloatRegister(ins->output()); switch (ins->simdOp()) { case wasm::SimdOp::I8x16ShrS: masm.signReplicationInt8x16(src, dest); break; case wasm::SimdOp::I16x8ShrS: masm.signReplicationInt16x8(src, dest); break; case wasm::SimdOp::I32x4ShrS: masm.signReplicationInt32x4(src, dest); break; case wasm::SimdOp::I64x2ShrS: masm.signReplicationInt64x2(src, dest); break; default: MOZ_CRASH("Shift SimdOp unsupported sign replication optimization"); } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmShuffleSimd128(LWasmShuffleSimd128* ins) { #ifdef ENABLE_WASM_SIMD FloatRegister lhsDest = ToFloatRegister(ins->lhsDest()); FloatRegister rhs = ToFloatRegister(ins->rhs()); SimdConstant control = ins->control(); FloatRegister output = ToFloatRegister(ins->output()); switch (ins->op()) { case SimdShuffleOp::BLEND_8x16: { masm.blendInt8x16(reinterpret_cast(control.asInt8x16()), lhsDest, rhs, output, ToFloatRegister(ins->temp())); break; } case SimdShuffleOp::BLEND_16x8: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.blendInt16x8(reinterpret_cast(control.asInt16x8()), lhsDest, rhs, output); break; } case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16: { MOZ_ASSERT(ins->temp()->isBogusTemp()); int8_t count = 16 - control.asInt8x16()[0]; MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); masm.concatAndRightShiftSimd128(lhsDest, rhs, output, count); break; } case SimdShuffleOp::INTERLEAVE_HIGH_8x16: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveHighInt8x16(lhsDest, rhs, output); break; } case SimdShuffleOp::INTERLEAVE_HIGH_16x8: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveHighInt16x8(lhsDest, rhs, output); break; } case SimdShuffleOp::INTERLEAVE_HIGH_32x4: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveHighInt32x4(lhsDest, rhs, output); break; } case SimdShuffleOp::INTERLEAVE_HIGH_64x2: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveHighInt64x2(lhsDest, rhs, output); break; } case SimdShuffleOp::INTERLEAVE_LOW_8x16: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveLowInt8x16(lhsDest, rhs, output); break; } case SimdShuffleOp::INTERLEAVE_LOW_16x8: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveLowInt16x8(lhsDest, rhs, output); break; } case SimdShuffleOp::INTERLEAVE_LOW_32x4: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveLowInt32x4(lhsDest, rhs, output); break; } case SimdShuffleOp::INTERLEAVE_LOW_64x2: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveLowInt64x2(lhsDest, rhs, output); break; } case SimdShuffleOp::SHUFFLE_BLEND_8x16: { masm.shuffleInt8x16(reinterpret_cast(control.asInt8x16()), lhsDest, rhs, output); break; } default: { MOZ_CRASH("Unsupported SIMD shuffle operation"); } } #else MOZ_CRASH("No SIMD"); #endif } #ifdef ENABLE_WASM_SIMD enum PermuteX64I16x8Action : uint16_t { UNAVAILABLE = 0, SWAP_QWORDS = 1, // Swap qwords first PERM_LOW = 2, // Permute low qword by control_[0..3] PERM_HIGH = 4 // Permute high qword by control_[4..7] }; // Skip lanes that equal v starting at i, returning the index just beyond the // last of those. There is no requirement that the initial lanes[i] == v. template static int ScanConstant(const T* lanes, int v, int i) { int len = int(16 / sizeof(T)); MOZ_ASSERT(i <= len); while (i < len && lanes[i] == v) { i++; } return i; } // Apply a transformation to each lane value. template static void MapLanes(T* result, const T* input, int (*f)(int)) { int len = int(16 / sizeof(T)); for (int i = 0; i < len; i++) { result[i] = f(input[i]); } } // Recognize part of an identity permutation starting at start, with // the first value of the permutation expected to be bias. template static bool IsIdentity(const T* lanes, int start, int len, int bias) { if (lanes[start] != bias) { return false; } for (int i = start + 1; i < start + len; i++) { if (lanes[i] != lanes[i - 1] + 1) { return false; } } return true; } // We can permute by words if the mask is reducible to a word mask, but the x64 // lowering is only efficient if we can permute the high and low quadwords // separately, possibly after swapping quadwords. static PermuteX64I16x8Action CalculateX64Permute16x8(SimdConstant* control) { const SimdConstant::I16x8& lanes = control->asInt16x8(); SimdConstant::I16x8 mapped; MapLanes(mapped, lanes, [](int x) -> int { return x < 4 ? 0 : 1; }); int i = ScanConstant(mapped, mapped[0], 0); if (i != 4) { return PermuteX64I16x8Action::UNAVAILABLE; } i = ScanConstant(mapped, mapped[4], 4); if (i != 8) { return PermuteX64I16x8Action::UNAVAILABLE; } // Now compute the operation bits. `mapped` holds the adjusted lane mask. memcpy(mapped, lanes, sizeof(mapped)); uint16_t op = 0; if (mapped[0] > mapped[4]) { op |= PermuteX64I16x8Action::SWAP_QWORDS; } for (auto& m : mapped) { m &= 3; } if (!IsIdentity(mapped, 0, 4, 0)) { op |= PermuteX64I16x8Action::PERM_LOW; } if (!IsIdentity(mapped, 4, 4, 0)) { op |= PermuteX64I16x8Action::PERM_HIGH; } MOZ_ASSERT(op != PermuteX64I16x8Action::UNAVAILABLE); *control = SimdConstant::CreateX8(mapped); return (PermuteX64I16x8Action)op; } #endif void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { #ifdef ENABLE_WASM_SIMD FloatRegister src = ToFloatRegister(ins->src()); FloatRegister dest = ToFloatRegister(ins->output()); SimdConstant control = ins->control(); switch (ins->op()) { // For broadcast, would MOVDDUP be better than PSHUFD for the last step? case SimdPermuteOp::BROADCAST_8x16: { const SimdConstant::I8x16& mask = control.asInt8x16(); int8_t source = mask[0]; if (source == 0 && Assembler::HasAVX2()) { masm.vbroadcastb(Operand(src), dest); break; } MOZ_ASSERT_IF(!Assembler::HasAVX(), src == dest); if (source < 8) { masm.interleaveLowInt8x16(src, src, dest); } else { masm.interleaveHighInt8x16(src, src, dest); source -= 8; } uint16_t v = uint16_t(source & 3); uint16_t wordMask[4] = {v, v, v, v}; if (source < 4) { masm.permuteLowInt16x8(wordMask, dest, dest); uint32_t dwordMask[4] = {0, 0, 0, 0}; masm.permuteInt32x4(dwordMask, dest, dest); } else { masm.permuteHighInt16x8(wordMask, dest, dest); uint32_t dwordMask[4] = {2, 2, 2, 2}; masm.permuteInt32x4(dwordMask, dest, dest); } break; } case SimdPermuteOp::BROADCAST_16x8: { const SimdConstant::I16x8& mask = control.asInt16x8(); int16_t source = mask[0]; if (source == 0 && Assembler::HasAVX2()) { masm.vbroadcastw(Operand(src), dest); break; } uint16_t v = uint16_t(source & 3); uint16_t wordMask[4] = {v, v, v, v}; if (source < 4) { masm.permuteLowInt16x8(wordMask, src, dest); uint32_t dwordMask[4] = {0, 0, 0, 0}; masm.permuteInt32x4(dwordMask, dest, dest); } else { masm.permuteHighInt16x8(wordMask, src, dest); uint32_t dwordMask[4] = {2, 2, 2, 2}; masm.permuteInt32x4(dwordMask, dest, dest); } break; } case SimdPermuteOp::MOVE: { masm.moveSimd128(src, dest); break; } case SimdPermuteOp::PERMUTE_8x16: { const SimdConstant::I8x16& mask = control.asInt8x16(); # ifdef DEBUG DebugOnly i; for (i = 0; i < 16 && mask[i] == i; i++) { } MOZ_ASSERT(i < 16, "Should have been a MOVE operation"); # endif masm.permuteInt8x16(reinterpret_cast(mask), src, dest); break; } case SimdPermuteOp::PERMUTE_16x8: { # ifdef DEBUG const SimdConstant::I16x8& mask = control.asInt16x8(); DebugOnly i; for (i = 0; i < 8 && mask[i] == i; i++) { } MOZ_ASSERT(i < 8, "Should have been a MOVE operation"); # endif PermuteX64I16x8Action op = CalculateX64Permute16x8(&control); if (op != PermuteX64I16x8Action::UNAVAILABLE) { const SimdConstant::I16x8& mask = control.asInt16x8(); if (op & PermuteX64I16x8Action::SWAP_QWORDS) { uint32_t dwordMask[4] = {2, 3, 0, 1}; masm.permuteInt32x4(dwordMask, src, dest); src = dest; } if (op & PermuteX64I16x8Action::PERM_LOW) { masm.permuteLowInt16x8(reinterpret_cast(mask) + 0, src, dest); src = dest; } if (op & PermuteX64I16x8Action::PERM_HIGH) { masm.permuteHighInt16x8(reinterpret_cast(mask) + 4, src, dest); src = dest; } } else { const SimdConstant::I16x8& wmask = control.asInt16x8(); uint8_t mask[16]; for (unsigned i = 0; i < 16; i += 2) { mask[i] = wmask[i / 2] * 2; mask[i + 1] = wmask[i / 2] * 2 + 1; } masm.permuteInt8x16(mask, src, dest); } break; } case SimdPermuteOp::PERMUTE_32x4: { const SimdConstant::I32x4& mask = control.asInt32x4(); if (Assembler::HasAVX2() && mask[0] == 0 && mask[1] == 0 && mask[2] == 0 && mask[3] == 0) { masm.vbroadcastd(Operand(src), dest); break; } # ifdef DEBUG DebugOnly i; for (i = 0; i < 4 && mask[i] == i; i++) { } MOZ_ASSERT(i < 4, "Should have been a MOVE operation"); # endif masm.permuteInt32x4(reinterpret_cast(mask), src, dest); break; } case SimdPermuteOp::ROTATE_RIGHT_8x16: { MOZ_ASSERT_IF(!Assembler::HasAVX(), src == dest); int8_t count = control.asInt8x16()[0]; MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); masm.concatAndRightShiftSimd128(src, src, dest, count); break; } case SimdPermuteOp::SHIFT_LEFT_8x16: { int8_t count = control.asInt8x16()[0]; MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); masm.leftShiftSimd128(Imm32(count), src, dest); break; } case SimdPermuteOp::SHIFT_RIGHT_8x16: { int8_t count = control.asInt8x16()[0]; MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); masm.rightShiftSimd128(Imm32(count), src, dest); break; } case SimdPermuteOp::REVERSE_16x8: masm.reverseInt16x8(src, dest); break; case SimdPermuteOp::REVERSE_32x4: masm.reverseInt32x4(src, dest); break; case SimdPermuteOp::REVERSE_64x2: masm.reverseInt64x2(src, dest); break; default: { MOZ_CRASH("Unsupported SIMD permutation operation"); } } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmReplaceLaneSimd128(LWasmReplaceLaneSimd128* ins) { #ifdef ENABLE_WASM_SIMD FloatRegister lhs = ToFloatRegister(ins->lhsDest()); FloatRegister dest = ToFloatRegister(ins->output()); const LAllocation* rhs = ins->rhs(); uint32_t laneIndex = ins->laneIndex(); switch (ins->simdOp()) { case wasm::SimdOp::I8x16ReplaceLane: masm.replaceLaneInt8x16(laneIndex, lhs, ToRegister(rhs), dest); break; case wasm::SimdOp::I16x8ReplaceLane: masm.replaceLaneInt16x8(laneIndex, lhs, ToRegister(rhs), dest); break; case wasm::SimdOp::I32x4ReplaceLane: masm.replaceLaneInt32x4(laneIndex, lhs, ToRegister(rhs), dest); break; case wasm::SimdOp::F32x4ReplaceLane: masm.replaceLaneFloat32x4(laneIndex, lhs, ToFloatRegister(rhs), dest); break; case wasm::SimdOp::F64x2ReplaceLane: masm.replaceLaneFloat64x2(laneIndex, lhs, ToFloatRegister(rhs), dest); break; default: MOZ_CRASH("ReplaceLane SimdOp not implemented"); } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmReplaceInt64LaneSimd128( LWasmReplaceInt64LaneSimd128* ins) { #ifdef ENABLE_WASM_SIMD MOZ_RELEASE_ASSERT(ins->simdOp() == wasm::SimdOp::I64x2ReplaceLane); masm.replaceLaneInt64x2(ins->laneIndex(), ToFloatRegister(ins->lhs()), ToRegister64(ins->rhs()), ToFloatRegister(ins->output())); #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmScalarToSimd128(LWasmScalarToSimd128* ins) { #ifdef ENABLE_WASM_SIMD FloatRegister dest = ToFloatRegister(ins->output()); switch (ins->simdOp()) { case wasm::SimdOp::I8x16Splat: masm.splatX16(ToRegister(ins->src()), dest); break; case wasm::SimdOp::I16x8Splat: masm.splatX8(ToRegister(ins->src()), dest); break; case wasm::SimdOp::I32x4Splat: masm.splatX4(ToRegister(ins->src()), dest); break; case wasm::SimdOp::F32x4Splat: masm.splatX4(ToFloatRegister(ins->src()), dest); break; case wasm::SimdOp::F64x2Splat: masm.splatX2(ToFloatRegister(ins->src()), dest); break; default: MOZ_CRASH("ScalarToSimd128 SimdOp not implemented"); } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmInt64ToSimd128(LWasmInt64ToSimd128* ins) { #ifdef ENABLE_WASM_SIMD Register64 src = ToRegister64(ins->src()); FloatRegister dest = ToFloatRegister(ins->output()); switch (ins->simdOp()) { case wasm::SimdOp::I64x2Splat: masm.splatX2(src, dest); break; case wasm::SimdOp::V128Load8x8S: masm.moveGPR64ToDouble(src, dest); masm.widenLowInt8x16(dest, dest); break; case wasm::SimdOp::V128Load8x8U: masm.moveGPR64ToDouble(src, dest); masm.unsignedWidenLowInt8x16(dest, dest); break; case wasm::SimdOp::V128Load16x4S: masm.moveGPR64ToDouble(src, dest); masm.widenLowInt16x8(dest, dest); break; case wasm::SimdOp::V128Load16x4U: masm.moveGPR64ToDouble(src, dest); masm.unsignedWidenLowInt16x8(dest, dest); break; case wasm::SimdOp::V128Load32x2S: masm.moveGPR64ToDouble(src, dest); masm.widenLowInt32x4(dest, dest); break; case wasm::SimdOp::V128Load32x2U: masm.moveGPR64ToDouble(src, dest); masm.unsignedWidenLowInt32x4(dest, dest); break; default: MOZ_CRASH("Int64ToSimd128 SimdOp not implemented"); } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmUnarySimd128(LWasmUnarySimd128* ins) { #ifdef ENABLE_WASM_SIMD FloatRegister src = ToFloatRegister(ins->src()); FloatRegister dest = ToFloatRegister(ins->output()); switch (ins->simdOp()) { case wasm::SimdOp::I8x16Neg: masm.negInt8x16(src, dest); break; case wasm::SimdOp::I16x8Neg: masm.negInt16x8(src, dest); break; case wasm::SimdOp::I16x8ExtendLowI8x16S: masm.widenLowInt8x16(src, dest); break; case wasm::SimdOp::I16x8ExtendHighI8x16S: masm.widenHighInt8x16(src, dest); break; case wasm::SimdOp::I16x8ExtendLowI8x16U: masm.unsignedWidenLowInt8x16(src, dest); break; case wasm::SimdOp::I16x8ExtendHighI8x16U: masm.unsignedWidenHighInt8x16(src, dest); break; case wasm::SimdOp::I32x4Neg: masm.negInt32x4(src, dest); break; case wasm::SimdOp::I32x4ExtendLowI16x8S: masm.widenLowInt16x8(src, dest); break; case wasm::SimdOp::I32x4ExtendHighI16x8S: masm.widenHighInt16x8(src, dest); break; case wasm::SimdOp::I32x4ExtendLowI16x8U: masm.unsignedWidenLowInt16x8(src, dest); break; case wasm::SimdOp::I32x4ExtendHighI16x8U: masm.unsignedWidenHighInt16x8(src, dest); break; case wasm::SimdOp::I32x4TruncSatF32x4S: masm.truncSatFloat32x4ToInt32x4(src, dest); break; case wasm::SimdOp::I32x4TruncSatF32x4U: masm.unsignedTruncSatFloat32x4ToInt32x4(src, dest, ToFloatRegister(ins->temp())); break; case wasm::SimdOp::I64x2Neg: masm.negInt64x2(src, dest); break; case wasm::SimdOp::I64x2ExtendLowI32x4S: masm.widenLowInt32x4(src, dest); break; case wasm::SimdOp::I64x2ExtendHighI32x4S: masm.widenHighInt32x4(src, dest); break; case wasm::SimdOp::I64x2ExtendLowI32x4U: masm.unsignedWidenLowInt32x4(src, dest); break; case wasm::SimdOp::I64x2ExtendHighI32x4U: masm.unsignedWidenHighInt32x4(src, dest); break; case wasm::SimdOp::F32x4Abs: masm.absFloat32x4(src, dest); break; case wasm::SimdOp::F32x4Neg: masm.negFloat32x4(src, dest); break; case wasm::SimdOp::F32x4Sqrt: masm.sqrtFloat32x4(src, dest); break; case wasm::SimdOp::F32x4ConvertI32x4S: masm.convertInt32x4ToFloat32x4(src, dest); break; case wasm::SimdOp::F32x4ConvertI32x4U: masm.unsignedConvertInt32x4ToFloat32x4(src, dest); break; case wasm::SimdOp::F64x2Abs: masm.absFloat64x2(src, dest); break; case wasm::SimdOp::F64x2Neg: masm.negFloat64x2(src, dest); break; case wasm::SimdOp::F64x2Sqrt: masm.sqrtFloat64x2(src, dest); break; case wasm::SimdOp::V128Not: masm.bitwiseNotSimd128(src, dest); break; case wasm::SimdOp::I8x16Popcnt: masm.popcntInt8x16(src, dest, ToFloatRegister(ins->temp())); break; case wasm::SimdOp::I8x16Abs: masm.absInt8x16(src, dest); break; case wasm::SimdOp::I16x8Abs: masm.absInt16x8(src, dest); break; case wasm::SimdOp::I32x4Abs: masm.absInt32x4(src, dest); break; case wasm::SimdOp::I64x2Abs: masm.absInt64x2(src, dest); break; case wasm::SimdOp::F32x4Ceil: masm.ceilFloat32x4(src, dest); break; case wasm::SimdOp::F32x4Floor: masm.floorFloat32x4(src, dest); break; case wasm::SimdOp::F32x4Trunc: masm.truncFloat32x4(src, dest); break; case wasm::SimdOp::F32x4Nearest: masm.nearestFloat32x4(src, dest); break; case wasm::SimdOp::F64x2Ceil: masm.ceilFloat64x2(src, dest); break; case wasm::SimdOp::F64x2Floor: masm.floorFloat64x2(src, dest); break; case wasm::SimdOp::F64x2Trunc: masm.truncFloat64x2(src, dest); break; case wasm::SimdOp::F64x2Nearest: masm.nearestFloat64x2(src, dest); break; case wasm::SimdOp::F32x4DemoteF64x2Zero: masm.convertFloat64x2ToFloat32x4(src, dest); break; case wasm::SimdOp::F64x2PromoteLowF32x4: masm.convertFloat32x4ToFloat64x2(src, dest); break; case wasm::SimdOp::F64x2ConvertLowI32x4S: masm.convertInt32x4ToFloat64x2(src, dest); break; case wasm::SimdOp::F64x2ConvertLowI32x4U: masm.unsignedConvertInt32x4ToFloat64x2(src, dest); break; case wasm::SimdOp::I32x4TruncSatF64x2SZero: masm.truncSatFloat64x2ToInt32x4(src, dest, ToFloatRegister(ins->temp())); break; case wasm::SimdOp::I32x4TruncSatF64x2UZero: masm.unsignedTruncSatFloat64x2ToInt32x4(src, dest, ToFloatRegister(ins->temp())); break; case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S: masm.extAddPairwiseInt8x16(src, dest); break; case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U: masm.unsignedExtAddPairwiseInt8x16(src, dest); break; case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S: masm.extAddPairwiseInt16x8(src, dest); break; case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U: masm.unsignedExtAddPairwiseInt16x8(src, dest); break; case wasm::SimdOp::I32x4RelaxedTruncSSatF32x4: masm.truncSatFloat32x4ToInt32x4Relaxed(src, dest); break; case wasm::SimdOp::I32x4RelaxedTruncUSatF32x4: masm.unsignedTruncSatFloat32x4ToInt32x4Relaxed(src, dest); break; case wasm::SimdOp::I32x4RelaxedTruncSatF64x2SZero: masm.truncSatFloat64x2ToInt32x4Relaxed(src, dest); break; case wasm::SimdOp::I32x4RelaxedTruncSatF64x2UZero: masm.unsignedTruncSatFloat64x2ToInt32x4Relaxed(src, dest); break; default: MOZ_CRASH("Unary SimdOp not implemented"); } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmReduceSimd128(LWasmReduceSimd128* ins) { #ifdef ENABLE_WASM_SIMD FloatRegister src = ToFloatRegister(ins->src()); const LDefinition* dest = ins->output(); uint32_t imm = ins->imm(); switch (ins->simdOp()) { case wasm::SimdOp::V128AnyTrue: masm.anyTrueSimd128(src, ToRegister(dest)); break; case wasm::SimdOp::I8x16AllTrue: masm.allTrueInt8x16(src, ToRegister(dest)); break; case wasm::SimdOp::I16x8AllTrue: masm.allTrueInt16x8(src, ToRegister(dest)); break; case wasm::SimdOp::I32x4AllTrue: masm.allTrueInt32x4(src, ToRegister(dest)); break; case wasm::SimdOp::I64x2AllTrue: masm.allTrueInt64x2(src, ToRegister(dest)); break; case wasm::SimdOp::I8x16Bitmask: masm.bitmaskInt8x16(src, ToRegister(dest)); break; case wasm::SimdOp::I16x8Bitmask: masm.bitmaskInt16x8(src, ToRegister(dest)); break; case wasm::SimdOp::I32x4Bitmask: masm.bitmaskInt32x4(src, ToRegister(dest)); break; case wasm::SimdOp::I64x2Bitmask: masm.bitmaskInt64x2(src, ToRegister(dest)); break; case wasm::SimdOp::I8x16ExtractLaneS: masm.extractLaneInt8x16(imm, src, ToRegister(dest)); break; case wasm::SimdOp::I8x16ExtractLaneU: masm.unsignedExtractLaneInt8x16(imm, src, ToRegister(dest)); break; case wasm::SimdOp::I16x8ExtractLaneS: masm.extractLaneInt16x8(imm, src, ToRegister(dest)); break; case wasm::SimdOp::I16x8ExtractLaneU: masm.unsignedExtractLaneInt16x8(imm, src, ToRegister(dest)); break; case wasm::SimdOp::I32x4ExtractLane: masm.extractLaneInt32x4(imm, src, ToRegister(dest)); break; case wasm::SimdOp::F32x4ExtractLane: masm.extractLaneFloat32x4(imm, src, ToFloatRegister(dest)); break; case wasm::SimdOp::F64x2ExtractLane: masm.extractLaneFloat64x2(imm, src, ToFloatRegister(dest)); break; default: MOZ_CRASH("Reduce SimdOp not implemented"); } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmReduceAndBranchSimd128( LWasmReduceAndBranchSimd128* ins) { #ifdef ENABLE_WASM_SIMD FloatRegister src = ToFloatRegister(ins->src()); switch (ins->simdOp()) { case wasm::SimdOp::V128AnyTrue: // Set the zero flag if all of the lanes are zero, and branch on that. masm.vptest(src, src); emitBranch(Assembler::NotEqual, ins->ifTrue(), ins->ifFalse()); break; case wasm::SimdOp::I8x16AllTrue: case wasm::SimdOp::I16x8AllTrue: case wasm::SimdOp::I32x4AllTrue: case wasm::SimdOp::I64x2AllTrue: { // Compare all lanes to zero, set the zero flag if none of the lanes are // zero, and branch on that. ScratchSimd128Scope tmp(masm); masm.vpxor(tmp, tmp, tmp); switch (ins->simdOp()) { case wasm::SimdOp::I8x16AllTrue: masm.vpcmpeqb(Operand(src), tmp, tmp); break; case wasm::SimdOp::I16x8AllTrue: masm.vpcmpeqw(Operand(src), tmp, tmp); break; case wasm::SimdOp::I32x4AllTrue: masm.vpcmpeqd(Operand(src), tmp, tmp); break; case wasm::SimdOp::I64x2AllTrue: masm.vpcmpeqq(Operand(src), tmp, tmp); break; default: MOZ_CRASH(); } masm.vptest(tmp, tmp); emitBranch(Assembler::Equal, ins->ifTrue(), ins->ifFalse()); break; } case wasm::SimdOp::I16x8Bitmask: { masm.bitwiseTestSimd128(SimdConstant::SplatX8(0x8000), src); emitBranch(Assembler::NotEqual, ins->ifTrue(), ins->ifFalse()); break; } default: MOZ_CRASH("Reduce-and-branch SimdOp not implemented"); } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmReduceSimd128ToInt64( LWasmReduceSimd128ToInt64* ins) { #ifdef ENABLE_WASM_SIMD FloatRegister src = ToFloatRegister(ins->src()); Register64 dest = ToOutRegister64(ins); uint32_t imm = ins->imm(); switch (ins->simdOp()) { case wasm::SimdOp::I64x2ExtractLane: masm.extractLaneInt64x2(imm, src, dest); break; default: MOZ_CRASH("Reduce SimdOp not implemented"); } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmLoadLaneSimd128(LWasmLoadLaneSimd128* ins) { #ifdef ENABLE_WASM_SIMD const MWasmLoadLaneSimd128* mir = ins->mir(); const wasm::MemoryAccessDesc& access = mir->access(); uint32_t offset = access.offset(); MOZ_ASSERT(offset < masm.wasmMaxOffsetGuardLimit()); const LAllocation* value = ins->src(); Operand srcAddr = toMemoryAccessOperand(ins, offset); masm.append(access, masm.size()); switch (ins->laneSize()) { case 1: { masm.vpinsrb(ins->laneIndex(), srcAddr, ToFloatRegister(value), ToFloatRegister(value)); break; } case 2: { masm.vpinsrw(ins->laneIndex(), srcAddr, ToFloatRegister(value), ToFloatRegister(value)); break; } case 4: { masm.vinsertps(ins->laneIndex() << 4, srcAddr, ToFloatRegister(value), ToFloatRegister(value)); break; } case 8: { if (ins->laneIndex() == 0) { masm.vmovlps(srcAddr, ToFloatRegister(value), ToFloatRegister(value)); } else { masm.vmovhps(srcAddr, ToFloatRegister(value), ToFloatRegister(value)); } break; } default: MOZ_CRASH("Unsupported load lane size"); } #else MOZ_CRASH("No SIMD"); #endif } void CodeGenerator::visitWasmStoreLaneSimd128(LWasmStoreLaneSimd128* ins) { #ifdef ENABLE_WASM_SIMD const MWasmStoreLaneSimd128* mir = ins->mir(); const wasm::MemoryAccessDesc& access = mir->access(); uint32_t offset = access.offset(); MOZ_ASSERT(offset < masm.wasmMaxOffsetGuardLimit()); const LAllocation* src = ins->src(); Operand destAddr = toMemoryAccessOperand(ins, offset); masm.append(access, masm.size()); switch (ins->laneSize()) { case 1: { masm.vpextrb(ins->laneIndex(), ToFloatRegister(src), destAddr); break; } case 2: { masm.vpextrw(ins->laneIndex(), ToFloatRegister(src), destAddr); break; } case 4: { unsigned lane = ins->laneIndex(); if (lane == 0) { masm.vmovss(ToFloatRegister(src), destAddr); } else { masm.vextractps(lane, ToFloatRegister(src), destAddr); } break; } case 8: { if (ins->laneIndex() == 0) { masm.vmovlps(ToFloatRegister(src), destAddr); } else { masm.vmovhps(ToFloatRegister(src), destAddr); } break; } default: MOZ_CRASH("Unsupported store lane size"); } #else MOZ_CRASH("No SIMD"); #endif } } // namespace jit } // namespace js