diff options
Diffstat (limited to 'deps/v8/src/compiler/backend')
40 files changed, 2102 insertions, 943 deletions
diff --git a/deps/v8/src/compiler/backend/arm/code-generator-arm.cc b/deps/v8/src/compiler/backend/arm/code-generator-arm.cc index 88a9c52a33..65a569d755 100644 --- a/deps/v8/src/compiler/backend/arm/code-generator-arm.cc +++ b/deps/v8/src/compiler/backend/arm/code-generator-arm.cc @@ -909,10 +909,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( DCHECK_EQ(LeaveCC, i.OutputSBit()); break; case kArchDeoptimize: { - int deopt_state_id = + DeoptimizationExit* exit = BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); - CodeGenResult result = - AssembleDeoptimizerCall(deopt_state_id, current_source_position_); + CodeGenResult result = AssembleDeoptimizerCall(exit); if (result != kSuccess) return result; unwinding_info_writer_.MarkBlockWillExit(); break; @@ -921,10 +920,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( AssembleReturn(instr->InputAt(0)); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; - case kArchStackPointer: - __ mov(i.OutputRegister(), sp); - DCHECK_EQ(LeaveCC, i.OutputSBit()); - break; case kArchFramePointer: __ mov(i.OutputRegister(), fp); DCHECK_EQ(LeaveCC, i.OutputSBit()); @@ -936,6 +931,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mov(i.OutputRegister(), fp); } break; + case kArchStackPointerGreaterThan: { + constexpr size_t kValueIndex = 0; + DCHECK(instr->InputAt(kValueIndex)->IsRegister()); + __ cmp(sp, i.InputRegister(kValueIndex)); + break; + } case kArchTruncateDoubleToI: __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(), i.InputDoubleRegister(0), DetermineStubCallMode()); @@ -1838,6 +1839,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1)); break; } + case kArmF32x4Div: { + QwNeonRegister dst = i.OutputSimd128Register(); + QwNeonRegister src1 = i.InputSimd128Register(0); + QwNeonRegister src2 = i.InputSimd128Register(1); + DCHECK_EQ(dst, q0); + DCHECK_EQ(src1, q0); + DCHECK_EQ(src2, q1); +#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane) + __ vdiv(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0), S_FROM_Q(src2, 0)); + __ vdiv(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1), S_FROM_Q(src2, 1)); + __ vdiv(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2), S_FROM_Q(src2, 2)); + __ vdiv(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3), S_FROM_Q(src2, 3)); +#undef S_FROM_Q + break; + } case kArmF32x4Min: { __ vmin(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputSimd128Register(1)); @@ -1902,13 +1918,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmI32x4Shl: { + QwNeonRegister tmp = i.TempSimd128Register(0); + __ vdup(Neon32, tmp, i.InputRegister(1)); __ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt5(1)); + tmp); break; } case kArmI32x4ShrS: { - __ vshr(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt5(1)); + QwNeonRegister tmp = i.TempSimd128Register(0); + __ vdup(Neon32, tmp, i.InputRegister(1)); + __ vneg(Neon32, tmp, tmp); + __ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), + tmp); break; } case kArmI32x4Add: { @@ -1976,8 +1997,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmI32x4ShrU: { - __ vshr(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt5(1)); + QwNeonRegister tmp = i.TempSimd128Register(0); + __ vdup(Neon32, tmp, i.InputRegister(1)); + __ vneg(Neon32, tmp, tmp); + __ vshl(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0), + tmp); break; } case kArmI32x4MinU: { @@ -2029,13 +2053,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmI16x8Shl: { + QwNeonRegister tmp = i.TempSimd128Register(0); + __ vdup(Neon16, tmp, i.InputRegister(1)); __ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt4(1)); + tmp); break; } case kArmI16x8ShrS: { - __ vshr(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt4(1)); + QwNeonRegister tmp = i.TempSimd128Register(0); + __ vdup(Neon16, tmp, i.InputRegister(1)); + __ vneg(Neon16, tmp, tmp); + __ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), + tmp); break; } case kArmI16x8SConvertI32x4: @@ -2112,8 +2141,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmI16x8ShrU: { - __ vshr(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt4(1)); + QwNeonRegister tmp = i.TempSimd128Register(0); + __ vdup(Neon16, tmp, i.InputRegister(1)); + __ vneg(Neon16, tmp, tmp); + __ vshl(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0), + tmp); break; } case kArmI16x8UConvertI32x4: @@ -2168,13 +2200,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmI8x16Shl: { + QwNeonRegister tmp = i.TempSimd128Register(0); + __ vdup(Neon8, tmp, i.InputRegister(1)); __ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt3(1)); + tmp); break; } case kArmI8x16ShrS: { - __ vshr(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt3(1)); + QwNeonRegister tmp = i.TempSimd128Register(0); + __ vdup(Neon8, tmp, i.InputRegister(1)); + __ vneg(Neon8, tmp, tmp); + __ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0), + tmp); break; } case kArmI8x16SConvertI16x8: @@ -2237,8 +2274,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmI8x16ShrU: { - __ vshr(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt3(1)); + QwNeonRegister tmp = i.TempSimd128Register(0); + __ vdup(Neon8, tmp, i.InputRegister(1)); + __ vneg(Neon8, tmp, tmp); + __ vshl(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0), + tmp); break; } case kArmI8x16UConvertI16x8: @@ -3192,6 +3232,8 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) { void CodeGenerator::FinishCode() { __ CheckConstPool(true, false); } +void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) {} + void CodeGenerator::AssembleMove(InstructionOperand* source, InstructionOperand* destination) { ArmOperandConverter g(this, nullptr); diff --git a/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h b/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h index 165ca39f9d..3551e26aea 100644 --- a/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h +++ b/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h @@ -141,6 +141,7 @@ namespace compiler { V(ArmF32x4AddHoriz) \ V(ArmF32x4Sub) \ V(ArmF32x4Mul) \ + V(ArmF32x4Div) \ V(ArmF32x4Min) \ V(ArmF32x4Max) \ V(ArmF32x4Eq) \ diff --git a/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc b/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc index 41d7b4055f..1d7cf61dfe 100644 --- a/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc +++ b/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc @@ -121,6 +121,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmF32x4AddHoriz: case kArmF32x4Sub: case kArmF32x4Mul: + case kArmF32x4Div: case kArmF32x4Min: case kArmF32x4Max: case kArmF32x4Eq: diff --git a/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc b/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc index 06aba4491a..ce74faa4a6 100644 --- a/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc +++ b/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc @@ -74,17 +74,6 @@ class ArmOperandGenerator : public OperandGenerator { } return false; } - - // Use the stack pointer if the node is LoadStackPointer, otherwise assign a - // register. - InstructionOperand UseRegisterOrStackPointer(Node* node) { - if (node->opcode() == IrOpcode::kLoadStackPointer) { - return LocationOperand(LocationOperand::EXPLICIT, - LocationOperand::REGISTER, - MachineRepresentation::kWord32, sp.code()); - } - return UseRegister(node); - } }; namespace { @@ -102,6 +91,15 @@ void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) { g.UseRegister(node->InputAt(1))); } +void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode, + Node* node) { + ArmOperandGenerator g(selector); + InstructionOperand temps[] = {g.TempSimd128Register()}; + selector->Emit(opcode, g.DefineAsRegister(node), + g.UseRegister(node->InputAt(0)), + g.UseRegister(node->InputAt(1)), arraysize(temps), temps); +} + void VisitRRRShuffle(InstructionSelector* selector, ArchOpcode opcode, Node* node) { ArmOperandGenerator g(selector); @@ -509,7 +507,8 @@ void InstructionSelector::VisitStore(Node* node) { WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind(); MachineRepresentation rep = store_rep.representation(); - if (write_barrier_kind != kNoWriteBarrier) { + if (write_barrier_kind != kNoWriteBarrier && + V8_LIKELY(!FLAG_disable_write_barriers)) { DCHECK(CanBeTaggedPointer(rep)); AddressingMode addressing_mode; InstructionOperand inputs[3]; @@ -887,6 +886,15 @@ void InstructionSelector::VisitWord32Xor(Node* node) { VisitBinop(this, node, kArmEor, kArmEor); } +void InstructionSelector::VisitStackPointerGreaterThan( + Node* node, FlagsContinuation* cont) { + Node* const value = node->InputAt(0); + InstructionCode opcode = kArchStackPointerGreaterThan; + + ArmOperandGenerator g(this); + EmitWithContinuation(opcode, g.UseRegister(value), cont); +} + namespace { template <typename TryMatchShift> @@ -1686,17 +1694,17 @@ void VisitWordCompare(InstructionSelector* selector, Node* node, if (TryMatchImmediateOrShift(selector, &opcode, m.right().node(), &input_count, &inputs[1])) { - inputs[0] = g.UseRegisterOrStackPointer(m.left().node()); + inputs[0] = g.UseRegister(m.left().node()); input_count++; } else if (TryMatchImmediateOrShift(selector, &opcode, m.left().node(), &input_count, &inputs[1])) { if (!node->op()->HasProperty(Operator::kCommutative)) cont->Commute(); - inputs[0] = g.UseRegisterOrStackPointer(m.right().node()); + inputs[0] = g.UseRegister(m.right().node()); input_count++; } else { opcode |= AddressingModeField::encode(kMode_Operand2_R); - inputs[input_count++] = g.UseRegisterOrStackPointer(m.left().node()); - inputs[input_count++] = g.UseRegisterOrStackPointer(m.right().node()); + inputs[input_count++] = g.UseRegister(m.left().node()); + inputs[input_count++] = g.UseRegister(m.right().node()); } if (has_result) { @@ -1848,6 +1856,9 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, return VisitShift(this, value, TryMatchLSR, cont); case IrOpcode::kWord32Ror: return VisitShift(this, value, TryMatchROR, cont); + case IrOpcode::kStackPointerGreaterThan: + cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition); + return VisitStackPointerGreaterThan(value, cont); default: break; } @@ -2488,7 +2499,7 @@ SIMD_UNOP_LIST(SIMD_VISIT_UNOP) #define SIMD_VISIT_SHIFT_OP(Name) \ void InstructionSelector::Visit##Name(Node* node) { \ - VisitRRI(this, kArm##Name, node); \ + VisitSimdShiftRRR(this, kArm##Name, node); \ } SIMD_SHIFT_OP_LIST(SIMD_VISIT_SHIFT_OP) #undef SIMD_VISIT_SHIFT_OP @@ -2502,6 +2513,14 @@ SIMD_BINOP_LIST(SIMD_VISIT_BINOP) #undef SIMD_VISIT_BINOP #undef SIMD_BINOP_LIST +void InstructionSelector::VisitF32x4Div(Node* node) { + ArmOperandGenerator g(this); + // Use fixed registers in the lower 8 Q-registers so we can directly access + // mapped registers S0-S31. + Emit(kArmF32x4Div, g.DefineAsFixed(node, q0), + g.UseFixed(node->InputAt(0), q0), g.UseFixed(node->InputAt(1), q1)); +} + void InstructionSelector::VisitS128Select(Node* node) { ArmOperandGenerator g(this); Emit(kArmS128Select, g.DefineSameAsFirst(node), diff --git a/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc b/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc index c71a63cc3d..66ca7f6cf0 100644 --- a/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc +++ b/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc @@ -820,20 +820,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // don't emit code for nops. break; case kArchDeoptimize: { - int deopt_state_id = + DeoptimizationExit* exit = BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); - CodeGenResult result = - AssembleDeoptimizerCall(deopt_state_id, current_source_position_); - if (result != kSuccess) return result; - unwinding_info_writer_.MarkBlockWillExit(); + __ B(exit->label()); break; } case kArchRet: AssembleReturn(instr->InputAt(0)); break; - case kArchStackPointer: - __ mov(i.OutputRegister(), sp); - break; case kArchFramePointer: __ mov(i.OutputRegister(), fp); break; @@ -844,6 +838,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mov(i.OutputRegister(), fp); } break; + case kArchStackPointerGreaterThan: { + constexpr size_t kValueIndex = 0; + DCHECK(instr->InputAt(kValueIndex)->IsRegister()); + __ Cmp(sp, i.InputRegister(kValueIndex)); + break; + } case kArchTruncateDoubleToI: __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(), i.InputDoubleRegister(0), DetermineStubCallMode()); @@ -1598,12 +1598,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ DecompressAnyTagged(i.OutputRegister(), i.InputRegister(0)); break; } - case kArm64CompressSigned: // Fall through. - case kArm64CompressPointer: // Fall through. - case kArm64CompressAny: { - __ Uxtw(i.OutputRegister(), i.InputRegister(0)); - break; - } case kArm64LdrS: __ Ldr(i.OutputDoubleRegister().S(), i.MemoryOperand()); break; @@ -1780,6 +1774,50 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1).V##FORMAT()); \ break; + case kArm64F64x2Splat: { + __ Dup(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).D(), 0); + break; + } + case kArm64F64x2ExtractLane: { + __ Mov(i.OutputSimd128Register().D(), i.InputSimd128Register(0).V2D(), + i.InputInt8(1)); + break; + } + case kArm64F64x2ReplaceLane: { + VRegister dst = i.OutputSimd128Register().V2D(), + src1 = i.InputSimd128Register(0).V2D(); + if (!dst.is(src1)) { + __ Mov(dst, src1); + } + __ Mov(dst, i.InputInt8(1), i.InputSimd128Register(2).V2D(), 0); + break; + } + SIMD_UNOP_CASE(kArm64F64x2Abs, Fabs, 2D); + SIMD_UNOP_CASE(kArm64F64x2Neg, Fneg, 2D); + SIMD_BINOP_CASE(kArm64F64x2Add, Fadd, 2D); + SIMD_BINOP_CASE(kArm64F64x2Sub, Fsub, 2D); + SIMD_BINOP_CASE(kArm64F64x2Mul, Fmul, 2D); + SIMD_BINOP_CASE(kArm64F64x2Div, Fdiv, 2D); + SIMD_BINOP_CASE(kArm64F64x2Min, Fmin, 2D); + SIMD_BINOP_CASE(kArm64F64x2Max, Fmax, 2D); + SIMD_BINOP_CASE(kArm64F64x2Eq, Fcmeq, 2D); + case kArm64F64x2Ne: { + VRegister dst = i.OutputSimd128Register().V2D(); + __ Fcmeq(dst, i.InputSimd128Register(0).V2D(), + i.InputSimd128Register(1).V2D()); + __ Mvn(dst, dst); + break; + } + case kArm64F64x2Lt: { + __ Fcmgt(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(), + i.InputSimd128Register(0).V2D()); + break; + } + case kArm64F64x2Le: { + __ Fcmge(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(), + i.InputSimd128Register(0).V2D()); + break; + } case kArm64F32x4Splat: { __ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0); break; @@ -1808,6 +1846,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_BINOP_CASE(kArm64F32x4AddHoriz, Faddp, 4S); SIMD_BINOP_CASE(kArm64F32x4Sub, Fsub, 4S); SIMD_BINOP_CASE(kArm64F32x4Mul, Fmul, 4S); + SIMD_BINOP_CASE(kArm64F32x4Div, Fdiv, 4S); SIMD_BINOP_CASE(kArm64F32x4Min, Fmin, 4S); SIMD_BINOP_CASE(kArm64F32x4Max, Fmax, 4S); SIMD_BINOP_CASE(kArm64F32x4Eq, Fcmeq, 4S); @@ -1828,6 +1867,62 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(0).V4S()); break; } + case kArm64I64x2Splat: { + __ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0)); + break; + } + case kArm64I64x2ExtractLane: { + __ Mov(i.OutputRegister64(), i.InputSimd128Register(0).V2D(), + i.InputInt8(1)); + break; + } + case kArm64I64x2ReplaceLane: { + VRegister dst = i.OutputSimd128Register().V2D(), + src1 = i.InputSimd128Register(0).V2D(); + if (!dst.is(src1)) { + __ Mov(dst, src1); + } + __ Mov(dst, i.InputInt8(1), i.InputRegister64(2)); + break; + } + SIMD_UNOP_CASE(kArm64I64x2Neg, Neg, 2D); + case kArm64I64x2Shl: { + VRegister tmp = i.TempSimd128Register(0); + __ Dup(tmp.V2D(), i.InputRegister64(1)); + __ Sshl(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).V2D(), + tmp.V2D()); + break; + } + case kArm64I64x2ShrS: { + VRegister tmp = i.TempSimd128Register(0); + __ Dup(tmp.V2D(), i.InputRegister64(1)); + __ Neg(tmp.V2D(), tmp.V2D()); + __ Sshl(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).V2D(), + tmp.V2D()); + break; + } + SIMD_BINOP_CASE(kArm64I64x2Add, Add, 2D); + SIMD_BINOP_CASE(kArm64I64x2Sub, Sub, 2D); + SIMD_BINOP_CASE(kArm64I64x2Eq, Cmeq, 2D); + case kArm64I64x2Ne: { + VRegister dst = i.OutputSimd128Register().V2D(); + __ Cmeq(dst, i.InputSimd128Register(0).V2D(), + i.InputSimd128Register(1).V2D()); + __ Mvn(dst, dst); + break; + } + SIMD_BINOP_CASE(kArm64I64x2GtS, Cmgt, 2D); + SIMD_BINOP_CASE(kArm64I64x2GeS, Cmge, 2D); + case kArm64I64x2ShrU: { + VRegister tmp = i.TempSimd128Register(0); + __ Dup(tmp.V2D(), i.InputRegister64(1)); + __ Neg(tmp.V2D(), tmp.V2D()); + __ Ushl(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).V2D(), + tmp.V2D()); + break; + } + SIMD_BINOP_CASE(kArm64I64x2GtU, Cmhi, 2D); + SIMD_BINOP_CASE(kArm64I64x2GeU, Cmhs, 2D); case kArm64I32x4Splat: { __ Dup(i.OutputSimd128Register().V4S(), i.InputRegister32(0)); break; @@ -1851,13 +1946,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_WIDENING_UNOP_CASE(kArm64I32x4SConvertI16x8High, Sxtl2, 4S, 8H); SIMD_UNOP_CASE(kArm64I32x4Neg, Neg, 4S); case kArm64I32x4Shl: { - __ Shl(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(), - i.InputInt5(1)); + VRegister tmp = i.TempSimd128Register(0); + __ Dup(tmp.V4S(), i.InputRegister32(1)); + __ Sshl(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(), + tmp.V4S()); break; } case kArm64I32x4ShrS: { - __ Sshr(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(), - i.InputInt5(1)); + VRegister tmp = i.TempSimd128Register(0); + __ Dup(tmp.V4S(), i.InputRegister32(1)); + __ Neg(tmp.V4S(), tmp.V4S()); + __ Sshl(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(), + tmp.V4S()); break; } SIMD_BINOP_CASE(kArm64I32x4Add, Add, 4S); @@ -1880,8 +1980,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_WIDENING_UNOP_CASE(kArm64I32x4UConvertI16x8Low, Uxtl, 4S, 4H); SIMD_WIDENING_UNOP_CASE(kArm64I32x4UConvertI16x8High, Uxtl2, 4S, 8H); case kArm64I32x4ShrU: { - __ Ushr(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(), - i.InputInt5(1)); + VRegister tmp = i.TempSimd128Register(0); + __ Dup(tmp.V4S(), i.InputRegister32(1)); + __ Neg(tmp.V4S(), tmp.V4S()); + __ Ushl(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(), + tmp.V4S()); break; } SIMD_BINOP_CASE(kArm64I32x4MinU, Umin, 4S); @@ -1910,13 +2013,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_WIDENING_UNOP_CASE(kArm64I16x8SConvertI8x16High, Sxtl2, 8H, 16B); SIMD_UNOP_CASE(kArm64I16x8Neg, Neg, 8H); case kArm64I16x8Shl: { - __ Shl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(), - i.InputInt5(1)); + VRegister tmp = i.TempSimd128Register(0); + __ Dup(tmp.V8H(), i.InputRegister32(1)); + __ Sshl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(), + tmp.V8H()); break; } case kArm64I16x8ShrS: { - __ Sshr(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(), - i.InputInt5(1)); + VRegister tmp = i.TempSimd128Register(0); + __ Dup(tmp.V8H(), i.InputRegister32(1)); + __ Neg(tmp.V8H(), tmp.V8H()); + __ Sshl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(), + tmp.V8H()); break; } case kArm64I16x8SConvertI32x4: { @@ -1961,8 +2069,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArm64I16x8ShrU: { - __ Ushr(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(), - i.InputInt5(1)); + VRegister tmp = i.TempSimd128Register(0); + __ Dup(tmp.V8H(), i.InputRegister32(1)); + __ Neg(tmp.V8H(), tmp.V8H()); + __ Ushl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(), + tmp.V8H()); break; } case kArm64I16x8UConvertI32x4: { @@ -2005,13 +2116,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } SIMD_UNOP_CASE(kArm64I8x16Neg, Neg, 16B); case kArm64I8x16Shl: { - __ Shl(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(), - i.InputInt5(1)); + VRegister tmp = i.TempSimd128Register(0); + __ Dup(tmp.V16B(), i.InputRegister32(1)); + __ Sshl(i.OutputSimd128Register().V16B(), + i.InputSimd128Register(0).V16B(), tmp.V16B()); break; } case kArm64I8x16ShrS: { - __ Sshr(i.OutputSimd128Register().V16B(), - i.InputSimd128Register(0).V16B(), i.InputInt5(1)); + VRegister tmp = i.TempSimd128Register(0); + __ Dup(tmp.V16B(), i.InputRegister32(1)); + __ Neg(tmp.V16B(), tmp.V16B()); + __ Sshl(i.OutputSimd128Register().V16B(), + i.InputSimd128Register(0).V16B(), tmp.V16B()); break; } case kArm64I8x16SConvertI16x8: { @@ -2046,8 +2162,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_BINOP_CASE(kArm64I8x16GtS, Cmgt, 16B); SIMD_BINOP_CASE(kArm64I8x16GeS, Cmge, 16B); case kArm64I8x16ShrU: { - __ Ushr(i.OutputSimd128Register().V16B(), - i.InputSimd128Register(0).V16B(), i.InputInt5(1)); + VRegister tmp = i.TempSimd128Register(0); + __ Dup(tmp.V16B(), i.InputRegister32(1)); + __ Neg(tmp.V16B(), tmp.V16B()); + __ Ushl(i.OutputSimd128Register().V16B(), + i.InputSimd128Register(0).V16B(), tmp.V16B()); break; } case kArm64I8x16UConvertI16x8: { @@ -2192,7 +2311,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_UNOP_CASE(kArm64S8x8Reverse, Rev64, 16B); SIMD_UNOP_CASE(kArm64S8x4Reverse, Rev32, 16B); SIMD_UNOP_CASE(kArm64S8x2Reverse, Rev16, 16B); + case kArm64S1x2AllTrue: { + UseScratchRegisterScope scope(tasm()); + VRegister temp1 = scope.AcquireV(kFormat2D); + VRegister temp2 = scope.AcquireV(kFormatS); + __ Cmeq(temp1, i.InputSimd128Register(0).V2D(), 0); + __ Umaxv(temp2, temp1.V4S()); + __ Umov(i.OutputRegister32(), temp2, 0); + __ Add(i.OutputRegister32(), i.OutputRegister32(), 1); + break; + } #define SIMD_REDUCE_OP_CASE(Op, Instr, format, FORMAT) \ case Op: { \ UseScratchRegisterScope scope(tasm()); \ @@ -2203,6 +2332,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Cset(i.OutputRegister32(), ne); \ break; \ } + // for AnyTrue, the format does not matter, umaxv does not support 2D + SIMD_REDUCE_OP_CASE(kArm64S1x2AnyTrue, Umaxv, kFormatS, 4S); SIMD_REDUCE_OP_CASE(kArm64S1x4AnyTrue, Umaxv, kFormatS, 4S); SIMD_REDUCE_OP_CASE(kArm64S1x4AllTrue, Uminv, kFormatS, 4S); SIMD_REDUCE_OP_CASE(kArm64S1x8AnyTrue, Umaxv, kFormatH, 8H); @@ -2669,6 +2800,11 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) { void CodeGenerator::FinishCode() { __ ForceConstantPoolEmissionWithoutJump(); } +void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) { + __ ForceConstantPoolEmissionWithoutJump(); + __ CheckVeneerPool(false, false, deopt_count * Deoptimizer::kDeoptExitSize); +} + void CodeGenerator::AssembleMove(InstructionOperand* source, InstructionOperand* destination) { Arm64OperandConverter g(this, nullptr); diff --git a/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h b/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h index 1c4c0e3335..4b56e402c1 100644 --- a/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h +++ b/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h @@ -168,11 +168,23 @@ namespace compiler { V(Arm64DecompressSigned) \ V(Arm64DecompressPointer) \ V(Arm64DecompressAny) \ - V(Arm64CompressSigned) \ - V(Arm64CompressPointer) \ - V(Arm64CompressAny) \ V(Arm64DmbIsh) \ V(Arm64DsbIsb) \ + V(Arm64F64x2Splat) \ + V(Arm64F64x2ExtractLane) \ + V(Arm64F64x2ReplaceLane) \ + V(Arm64F64x2Abs) \ + V(Arm64F64x2Neg) \ + V(Arm64F64x2Add) \ + V(Arm64F64x2Sub) \ + V(Arm64F64x2Mul) \ + V(Arm64F64x2Div) \ + V(Arm64F64x2Min) \ + V(Arm64F64x2Max) \ + V(Arm64F64x2Eq) \ + V(Arm64F64x2Ne) \ + V(Arm64F64x2Lt) \ + V(Arm64F64x2Le) \ V(Arm64F32x4Splat) \ V(Arm64F32x4ExtractLane) \ V(Arm64F32x4ReplaceLane) \ @@ -186,12 +198,28 @@ namespace compiler { V(Arm64F32x4AddHoriz) \ V(Arm64F32x4Sub) \ V(Arm64F32x4Mul) \ + V(Arm64F32x4Div) \ V(Arm64F32x4Min) \ V(Arm64F32x4Max) \ V(Arm64F32x4Eq) \ V(Arm64F32x4Ne) \ V(Arm64F32x4Lt) \ V(Arm64F32x4Le) \ + V(Arm64I64x2Splat) \ + V(Arm64I64x2ExtractLane) \ + V(Arm64I64x2ReplaceLane) \ + V(Arm64I64x2Neg) \ + V(Arm64I64x2Shl) \ + V(Arm64I64x2ShrS) \ + V(Arm64I64x2Add) \ + V(Arm64I64x2Sub) \ + V(Arm64I64x2Eq) \ + V(Arm64I64x2Ne) \ + V(Arm64I64x2GtS) \ + V(Arm64I64x2GeS) \ + V(Arm64I64x2ShrU) \ + V(Arm64I64x2GtU) \ + V(Arm64I64x2GeU) \ V(Arm64I32x4Splat) \ V(Arm64I32x4ExtractLane) \ V(Arm64I32x4ReplaceLane) \ @@ -310,6 +338,8 @@ namespace compiler { V(Arm64S8x8Reverse) \ V(Arm64S8x4Reverse) \ V(Arm64S8x2Reverse) \ + V(Arm64S1x2AnyTrue) \ + V(Arm64S1x2AllTrue) \ V(Arm64S1x4AnyTrue) \ V(Arm64S1x4AllTrue) \ V(Arm64S1x8AnyTrue) \ diff --git a/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc b/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc index 8344887ec2..7cba2d50ea 100644 --- a/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc +++ b/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc @@ -137,6 +137,21 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64Float64MoveU64: case kArm64U64MoveFloat64: case kArm64Float64SilenceNaN: + case kArm64F64x2Splat: + case kArm64F64x2ExtractLane: + case kArm64F64x2ReplaceLane: + case kArm64F64x2Abs: + case kArm64F64x2Neg: + case kArm64F64x2Add: + case kArm64F64x2Sub: + case kArm64F64x2Mul: + case kArm64F64x2Div: + case kArm64F64x2Min: + case kArm64F64x2Max: + case kArm64F64x2Eq: + case kArm64F64x2Ne: + case kArm64F64x2Lt: + case kArm64F64x2Le: case kArm64F32x4Splat: case kArm64F32x4ExtractLane: case kArm64F32x4ReplaceLane: @@ -150,12 +165,28 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64F32x4AddHoriz: case kArm64F32x4Sub: case kArm64F32x4Mul: + case kArm64F32x4Div: case kArm64F32x4Min: case kArm64F32x4Max: case kArm64F32x4Eq: case kArm64F32x4Ne: case kArm64F32x4Lt: case kArm64F32x4Le: + case kArm64I64x2Splat: + case kArm64I64x2ExtractLane: + case kArm64I64x2ReplaceLane: + case kArm64I64x2Neg: + case kArm64I64x2Shl: + case kArm64I64x2ShrS: + case kArm64I64x2Add: + case kArm64I64x2Sub: + case kArm64I64x2Eq: + case kArm64I64x2Ne: + case kArm64I64x2GtS: + case kArm64I64x2GeS: + case kArm64I64x2ShrU: + case kArm64I64x2GtU: + case kArm64I64x2GeU: case kArm64I32x4Splat: case kArm64I32x4ExtractLane: case kArm64I32x4ReplaceLane: @@ -274,6 +305,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64S8x8Reverse: case kArm64S8x4Reverse: case kArm64S8x2Reverse: + case kArm64S1x2AnyTrue: + case kArm64S1x2AllTrue: case kArm64S1x4AnyTrue: case kArm64S1x4AllTrue: case kArm64S1x8AnyTrue: @@ -287,9 +320,6 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64DecompressSigned: case kArm64DecompressPointer: case kArm64DecompressAny: - case kArm64CompressSigned: - case kArm64CompressPointer: - case kArm64CompressAny: return kNoOpcodeFlags; case kArm64LdrS: diff --git a/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc b/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc index a953e35a66..4abbd68c49 100644 --- a/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc +++ b/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc @@ -48,16 +48,6 @@ class Arm64OperandGenerator final : public OperandGenerator { return UseRegister(node); } - // Use the stack pointer if the node is LoadStackPointer, otherwise assign a - // register. - InstructionOperand UseRegisterOrStackPointer(Node* node, bool sp_allowed) { - if (sp_allowed && node->opcode() == IrOpcode::kLoadStackPointer) - return LocationOperand(LocationOperand::EXPLICIT, - LocationOperand::REGISTER, - MachineRepresentation::kWord64, sp.code()); - return UseRegister(node); - } - // Use the provided node if it has the required value, or create a // TempImmediate otherwise. InstructionOperand UseImmediateOrTemp(Node* node, int32_t value) { @@ -160,6 +150,15 @@ void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) { g.UseRegister(node->InputAt(1))); } +void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode, + Node* node) { + Arm64OperandGenerator g(selector); + InstructionOperand temps[] = {g.TempSimd128Register()}; + selector->Emit(opcode, g.DefineAsRegister(node), + g.UseRegister(node->InputAt(0)), + g.UseRegister(node->InputAt(1)), arraysize(temps), temps); +} + void VisitRRI(InstructionSelector* selector, ArchOpcode opcode, Node* node) { Arm64OperandGenerator g(selector); int32_t imm = OpParameter<int32_t>(node->op()); @@ -554,23 +553,21 @@ void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode, // is used when we merge a conversion into the load. outputs[0] = g.DefineAsRegister(output == nullptr ? node : output); - if (selector->CanAddressRelativeToRootsRegister()) { - ExternalReferenceMatcher m(base); - if (m.HasValue() && g.IsIntegerConstant(index)) { - ptrdiff_t const delta = - g.GetIntegerConstantValue(index) + - TurboAssemblerBase::RootRegisterOffsetForExternalReference( - selector->isolate(), m.Value()); - input_count = 1; - // Check that the delta is a 32-bit integer due to the limitations of - // immediate operands. - if (is_int32(delta)) { - inputs[0] = g.UseImmediate(static_cast<int32_t>(delta)); - opcode |= AddressingModeField::encode(kMode_Root); - selector->Emit(opcode, arraysize(outputs), outputs, input_count, - inputs); - return; - } + ExternalReferenceMatcher m(base); + if (m.HasValue() && g.IsIntegerConstant(index) && + selector->CanAddressRelativeToRootsRegister(m.Value())) { + ptrdiff_t const delta = + g.GetIntegerConstantValue(index) + + TurboAssemblerBase::RootRegisterOffsetForExternalReference( + selector->isolate(), m.Value()); + input_count = 1; + // Check that the delta is a 32-bit integer due to the limitations of + // immediate operands. + if (is_int32(delta)) { + inputs[0] = g.UseImmediate(static_cast<int32_t>(delta)); + opcode |= AddressingModeField::encode(kMode_Root); + selector->Emit(opcode, arraysize(outputs), outputs, input_count, inputs); + return; } } @@ -670,7 +667,8 @@ void InstructionSelector::VisitStore(Node* node) { MachineRepresentation rep = store_rep.representation(); // TODO(arm64): I guess this could be done in a better way. - if (write_barrier_kind != kNoWriteBarrier) { + if (write_barrier_kind != kNoWriteBarrier && + V8_LIKELY(!FLAG_disable_write_barriers)) { DCHECK(CanBeTaggedOrCompressedPointer(rep)); AddressingMode addressing_mode; InstructionOperand inputs[3]; @@ -1004,6 +1002,15 @@ void InstructionSelector::VisitWord64Shl(Node* node) { VisitRRO(this, kArm64Lsl, node, kShift64Imm); } +void InstructionSelector::VisitStackPointerGreaterThan( + Node* node, FlagsContinuation* cont) { + Node* const value = node->InputAt(0); + InstructionCode opcode = kArchStackPointerGreaterThan; + + Arm64OperandGenerator g(this); + EmitWithContinuation(opcode, g.UseRegister(value), cont); +} + namespace { bool TryEmitBitfieldExtract32(InstructionSelector* selector, Node* node) { @@ -1625,23 +1632,23 @@ void InstructionSelector::VisitChangeUint32ToUint64(Node* node) { } void InstructionSelector::VisitChangeTaggedToCompressed(Node* node) { - Arm64OperandGenerator g(this); - Node* value = node->InputAt(0); - Emit(kArm64CompressAny, g.DefineAsRegister(node), g.UseRegister(value)); + // The top 32 bits in the 64-bit register will be undefined, and + // must not be used by a dependent node. + EmitIdentity(node); } void InstructionSelector::VisitChangeTaggedPointerToCompressedPointer( Node* node) { - Arm64OperandGenerator g(this); - Node* value = node->InputAt(0); - Emit(kArm64CompressPointer, g.DefineAsRegister(node), g.UseRegister(value)); + // The top 32 bits in the 64-bit register will be undefined, and + // must not be used by a dependent node. + EmitIdentity(node); } void InstructionSelector::VisitChangeTaggedSignedToCompressedSigned( Node* node) { - Arm64OperandGenerator g(this); - Node* value = node->InputAt(0); - Emit(kArm64CompressSigned, g.DefineAsRegister(node), g.UseRegister(value)); + // The top 32 bits in the 64-bit register will be undefined, and + // must not be used by a dependent node. + EmitIdentity(node); } void InstructionSelector::VisitChangeCompressedToTagged(Node* node) { @@ -1826,26 +1833,25 @@ void VisitCompare(InstructionSelector* selector, InstructionCode opcode, // Shared routine for multiple word compare operations. void VisitWordCompare(InstructionSelector* selector, Node* node, InstructionCode opcode, FlagsContinuation* cont, - bool commutative, ImmediateMode immediate_mode) { + ImmediateMode immediate_mode) { Arm64OperandGenerator g(selector); + Node* left = node->InputAt(0); Node* right = node->InputAt(1); - if (right->opcode() == IrOpcode::kLoadStackPointer || + // If one of the two inputs is an immediate, make sure it's on the right. + if (!g.CanBeImmediate(right, immediate_mode) && g.CanBeImmediate(left, immediate_mode)) { - if (!commutative) cont->Commute(); + cont->Commute(); std::swap(left, right); } - // Match immediates on left or right side of comparison. if (g.CanBeImmediate(right, immediate_mode)) { - VisitCompare(selector, opcode, - g.UseRegisterOrStackPointer(left, opcode == kArm64Cmp), - g.UseImmediate(right), cont); + VisitCompare(selector, opcode, g.UseRegister(left), g.UseImmediate(right), + cont); } else { - VisitCompare(selector, opcode, - g.UseRegisterOrStackPointer(left, opcode == kArm64Cmp), - g.UseRegister(right), cont); + VisitCompare(selector, opcode, g.UseRegister(left), g.UseRegister(right), + cont); } } @@ -2370,8 +2376,7 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, if (m.right().Is(0)) { Node* const left = m.left().node(); if (CanCover(value, left) && left->opcode() == IrOpcode::kWord64And) { - return VisitWordCompare(this, left, kArm64Tst, cont, true, - kLogical64Imm); + return VisitWordCompare(this, left, kArm64Tst, cont, kLogical64Imm); } // Merge the Word64Equal(x, 0) comparison into a cbz instruction. if ((cont->IsBranch() || cont->IsDeoptimize()) && @@ -2381,25 +2386,20 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, return; } } - return VisitWordCompare(this, value, kArm64Cmp, cont, false, - kArithmeticImm); + return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm); } case IrOpcode::kInt64LessThan: cont->OverwriteAndNegateIfEqual(kSignedLessThan); - return VisitWordCompare(this, value, kArm64Cmp, cont, false, - kArithmeticImm); + return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm); case IrOpcode::kInt64LessThanOrEqual: cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual); - return VisitWordCompare(this, value, kArm64Cmp, cont, false, - kArithmeticImm); + return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm); case IrOpcode::kUint64LessThan: cont->OverwriteAndNegateIfEqual(kUnsignedLessThan); - return VisitWordCompare(this, value, kArm64Cmp, cont, false, - kArithmeticImm); + return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm); case IrOpcode::kUint64LessThanOrEqual: cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual); - return VisitWordCompare(this, value, kArm64Cmp, cont, false, - kArithmeticImm); + return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm); case IrOpcode::kFloat32Equal: cont->OverwriteAndNegateIfEqual(kEqual); return VisitFloat32Compare(this, value, cont); @@ -2461,16 +2461,16 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, } break; case IrOpcode::kInt32Add: - return VisitWordCompare(this, value, kArm64Cmn32, cont, true, - kArithmeticImm); + return VisitWordCompare(this, value, kArm64Cmn32, cont, kArithmeticImm); case IrOpcode::kInt32Sub: return VisitWord32Compare(this, value, cont); case IrOpcode::kWord32And: - return VisitWordCompare(this, value, kArm64Tst32, cont, true, - kLogical32Imm); + return VisitWordCompare(this, value, kArm64Tst32, cont, kLogical32Imm); case IrOpcode::kWord64And: - return VisitWordCompare(this, value, kArm64Tst, cont, true, - kLogical64Imm); + return VisitWordCompare(this, value, kArm64Tst, cont, kLogical64Imm); + case IrOpcode::kStackPointerGreaterThan: + cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition); + return VisitStackPointerGreaterThan(value, cont); default: break; } @@ -2530,7 +2530,7 @@ void InstructionSelector::VisitWord32Equal(Node* const node) { case IrOpcode::kWord32And: return VisitWord32Compare(this, node, &cont); case IrOpcode::kInt32Sub: - return VisitWordCompare(this, value, kArm64Cmp32, &cont, false, + return VisitWordCompare(this, value, kArm64Cmp32, &cont, kArithmeticImm); case IrOpcode::kWord32Equal: { // Word32Equal(Word32Equal(x, y), 0) => Word32Compare(x, y, ne). @@ -2587,15 +2587,14 @@ void InstructionSelector::VisitWord64Equal(Node* const node) { if (CanCover(user, value)) { switch (value->opcode()) { case IrOpcode::kWord64And: - return VisitWordCompare(this, value, kArm64Tst, &cont, true, - kLogical64Imm); + return VisitWordCompare(this, value, kArm64Tst, &cont, kLogical64Imm); default: break; } return VisitWord64Test(this, value, &cont); } } - VisitWordCompare(this, node, kArm64Cmp, &cont, false, kArithmeticImm); + VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm); } void InstructionSelector::VisitInt32AddWithOverflow(Node* node) { @@ -2653,24 +2652,24 @@ void InstructionSelector::VisitInt64SubWithOverflow(Node* node) { void InstructionSelector::VisitInt64LessThan(Node* node) { FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node); - VisitWordCompare(this, node, kArm64Cmp, &cont, false, kArithmeticImm); + VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm); } void InstructionSelector::VisitInt64LessThanOrEqual(Node* node) { FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThanOrEqual, node); - VisitWordCompare(this, node, kArm64Cmp, &cont, false, kArithmeticImm); + VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm); } void InstructionSelector::VisitUint64LessThan(Node* node) { FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node); - VisitWordCompare(this, node, kArm64Cmp, &cont, false, kArithmeticImm); + VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm); } void InstructionSelector::VisitUint64LessThanOrEqual(Node* node) { FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node); - VisitWordCompare(this, node, kArm64Cmp, &cont, false, kArithmeticImm); + VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm); } void InstructionSelector::VisitFloat32Neg(Node* node) { @@ -3045,18 +3044,23 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { } #define SIMD_TYPE_LIST(V) \ + V(F64x2) \ V(F32x4) \ + V(I64x2) \ V(I32x4) \ V(I16x8) \ V(I8x16) #define SIMD_UNOP_LIST(V) \ + V(F64x2Abs, kArm64F64x2Abs) \ + V(F64x2Neg, kArm64F64x2Neg) \ V(F32x4SConvertI32x4, kArm64F32x4SConvertI32x4) \ V(F32x4UConvertI32x4, kArm64F32x4UConvertI32x4) \ V(F32x4Abs, kArm64F32x4Abs) \ V(F32x4Neg, kArm64F32x4Neg) \ V(F32x4RecipApprox, kArm64F32x4RecipApprox) \ V(F32x4RecipSqrtApprox, kArm64F32x4RecipSqrtApprox) \ + V(I64x2Neg, kArm64I64x2Neg) \ V(I32x4SConvertF32x4, kArm64I32x4SConvertF32x4) \ V(I32x4SConvertI16x8Low, kArm64I32x4SConvertI16x8Low) \ V(I32x4SConvertI16x8High, kArm64I32x4SConvertI16x8High) \ @@ -3071,6 +3075,8 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(I16x8UConvertI8x16High, kArm64I16x8UConvertI8x16High) \ V(I8x16Neg, kArm64I8x16Neg) \ V(S128Not, kArm64S128Not) \ + V(S1x2AnyTrue, kArm64S1x2AnyTrue) \ + V(S1x2AllTrue, kArm64S1x2AllTrue) \ V(S1x4AnyTrue, kArm64S1x4AnyTrue) \ V(S1x4AllTrue, kArm64S1x4AllTrue) \ V(S1x8AnyTrue, kArm64S1x8AnyTrue) \ @@ -3079,6 +3085,9 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(S1x16AllTrue, kArm64S1x16AllTrue) #define SIMD_SHIFT_OP_LIST(V) \ + V(I64x2Shl) \ + V(I64x2ShrS) \ + V(I64x2ShrU) \ V(I32x4Shl) \ V(I32x4ShrS) \ V(I32x4ShrU) \ @@ -3090,16 +3099,35 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(I8x16ShrU) #define SIMD_BINOP_LIST(V) \ + V(F64x2Add, kArm64F64x2Add) \ + V(F64x2Sub, kArm64F64x2Sub) \ + V(F64x2Mul, kArm64F64x2Mul) \ + V(F64x2Div, kArm64F64x2Div) \ + V(F64x2Min, kArm64F64x2Min) \ + V(F64x2Max, kArm64F64x2Max) \ + V(F64x2Eq, kArm64F64x2Eq) \ + V(F64x2Ne, kArm64F64x2Ne) \ + V(F64x2Lt, kArm64F64x2Lt) \ + V(F64x2Le, kArm64F64x2Le) \ V(F32x4Add, kArm64F32x4Add) \ V(F32x4AddHoriz, kArm64F32x4AddHoriz) \ V(F32x4Sub, kArm64F32x4Sub) \ V(F32x4Mul, kArm64F32x4Mul) \ + V(F32x4Div, kArm64F32x4Div) \ V(F32x4Min, kArm64F32x4Min) \ V(F32x4Max, kArm64F32x4Max) \ V(F32x4Eq, kArm64F32x4Eq) \ V(F32x4Ne, kArm64F32x4Ne) \ V(F32x4Lt, kArm64F32x4Lt) \ V(F32x4Le, kArm64F32x4Le) \ + V(I64x2Add, kArm64I64x2Add) \ + V(I64x2Sub, kArm64I64x2Sub) \ + V(I64x2Eq, kArm64I64x2Eq) \ + V(I64x2Ne, kArm64I64x2Ne) \ + V(I64x2GtS, kArm64I64x2GtS) \ + V(I64x2GeS, kArm64I64x2GeS) \ + V(I64x2GtU, kArm64I64x2GtU) \ + V(I64x2GeU, kArm64I64x2GeU) \ V(I32x4Add, kArm64I32x4Add) \ V(I32x4AddHoriz, kArm64I32x4AddHoriz) \ V(I32x4Sub, kArm64I32x4Sub) \ @@ -3194,7 +3222,7 @@ SIMD_UNOP_LIST(SIMD_VISIT_UNOP) #define SIMD_VISIT_SHIFT_OP(Name) \ void InstructionSelector::Visit##Name(Node* node) { \ - VisitRRI(this, kArm64##Name, node); \ + VisitSimdShiftRRR(this, kArm64##Name, node); \ } SIMD_SHIFT_OP_LIST(SIMD_VISIT_SHIFT_OP) #undef SIMD_VISIT_SHIFT_OP diff --git a/deps/v8/src/compiler/backend/code-generator-impl.h b/deps/v8/src/compiler/backend/code-generator-impl.h index 75f8e70203..2bfb009980 100644 --- a/deps/v8/src/compiler/backend/code-generator-impl.h +++ b/deps/v8/src/compiler/backend/code-generator-impl.h @@ -116,6 +116,10 @@ class InstructionOperandConverter { return ToSimd128Register(instr_->Output()); } + Simd128Register TempSimd128Register(size_t index) { + return ToSimd128Register(instr_->TempAt(index)); + } + // -- Conversions for operands ----------------------------------------------- Label* ToLabel(InstructionOperand* op) { @@ -176,20 +180,55 @@ class InstructionOperandConverter { Instruction* instr_; }; -// Eager deoptimization exit. +// Deoptimization exit. class DeoptimizationExit : public ZoneObject { public: - explicit DeoptimizationExit(int deoptimization_id, SourcePosition pos) - : deoptimization_id_(deoptimization_id), pos_(pos) {} - - int deoptimization_id() const { return deoptimization_id_; } - Label* label() { return &label_; } + explicit DeoptimizationExit(SourcePosition pos, BailoutId bailout_id, + int translation_id, int pc_offset, + DeoptimizeKind kind, DeoptimizeReason reason) + : deoptimization_id_(kNoDeoptIndex), + pos_(pos), + bailout_id_(bailout_id), + translation_id_(translation_id), + pc_offset_(pc_offset), + kind_(kind), + reason_(reason), + emitted_(false) {} + + bool has_deoptimization_id() const { + return deoptimization_id_ != kNoDeoptIndex; + } + int deoptimization_id() const { + DCHECK(has_deoptimization_id()); + return deoptimization_id_; + } + void set_deoptimization_id(int deoptimization_id) { + deoptimization_id_ = deoptimization_id; + } SourcePosition pos() const { return pos_; } + Label* label() { return &label_; } + BailoutId bailout_id() const { return bailout_id_; } + int translation_id() const { return translation_id_; } + int pc_offset() const { return pc_offset_; } + DeoptimizeKind kind() const { return kind_; } + DeoptimizeReason reason() const { return reason_; } + // Returns whether the deopt exit has already been emitted. Most deopt exits + // are emitted contiguously at the end of the code, but unconditional deopt + // exits (kArchDeoptimize) may be inlined where they are encountered. + bool emitted() const { return emitted_; } + void set_emitted() { emitted_ = true; } private: - int const deoptimization_id_; + static const int kNoDeoptIndex = kMaxInt16 + 1; + int deoptimization_id_; + const SourcePosition pos_; Label label_; - SourcePosition const pos_; + const BailoutId bailout_id_; + const int translation_id_; + const int pc_offset_; + const DeoptimizeKind kind_; + const DeoptimizeReason reason_; + bool emitted_; }; // Generator for out-of-line code that is emitted after the main code is done. diff --git a/deps/v8/src/compiler/backend/code-generator.cc b/deps/v8/src/compiler/backend/code-generator.cc index 9ce92dadaa..e7702bcdf6 100644 --- a/deps/v8/src/compiler/backend/code-generator.cc +++ b/deps/v8/src/compiler/backend/code-generator.cc @@ -47,7 +47,8 @@ CodeGenerator::CodeGenerator( Isolate* isolate, base::Optional<OsrHelper> osr_helper, int start_source_position, JumpOptimizationInfo* jump_opt, PoisoningMitigationLevel poisoning_level, const AssemblerOptions& options, - int32_t builtin_index, std::unique_ptr<AssemblerBuffer> buffer) + int32_t builtin_index, size_t max_unoptimized_frame_height, + std::unique_ptr<AssemblerBuffer> buffer) : zone_(codegen_zone), isolate_(isolate), frame_access_state_(nullptr), @@ -64,9 +65,9 @@ CodeGenerator::CodeGenerator( safepoints_(zone()), handlers_(zone()), deoptimization_exits_(zone()), - deoptimization_states_(zone()), deoptimization_literals_(zone()), translations_(zone()), + max_unoptimized_frame_height_(max_unoptimized_frame_height), caller_registers_saved_(false), jump_tables_(nullptr), ools_(nullptr), @@ -91,6 +92,7 @@ CodeGenerator::CodeGenerator( code_kind == Code::WASM_TO_CAPI_FUNCTION || code_kind == Code::WASM_TO_JS_FUNCTION || code_kind == Code::WASM_INTERPRETER_ENTRY || + code_kind == Code::JS_TO_WASM_FUNCTION || (Builtins::IsBuiltinId(builtin_index) && Builtins::IsWasmRuntimeStub(builtin_index))) { tasm_.set_abort_hard(true); @@ -114,20 +116,22 @@ void CodeGenerator::CreateFrameAccessState(Frame* frame) { } CodeGenerator::CodeGenResult CodeGenerator::AssembleDeoptimizerCall( - int deoptimization_id, SourcePosition pos) { + DeoptimizationExit* exit) { + int deoptimization_id = exit->deoptimization_id(); if (deoptimization_id > Deoptimizer::kMaxNumberOfEntries) { return kTooManyDeoptimizationBailouts; } - DeoptimizeKind deopt_kind = GetDeoptimizationKind(deoptimization_id); - DeoptimizeReason deoptimization_reason = - GetDeoptimizationReason(deoptimization_id); + DeoptimizeKind deopt_kind = exit->kind(); + DeoptimizeReason deoptimization_reason = exit->reason(); Address deopt_entry = Deoptimizer::GetDeoptimizationEntry(tasm()->isolate(), deopt_kind); if (info()->is_source_positions_enabled()) { - tasm()->RecordDeoptReason(deoptimization_reason, pos, deoptimization_id); + tasm()->RecordDeoptReason(deoptimization_reason, exit->pos(), + deoptimization_id); } tasm()->CallForDeoptimization(deopt_entry, deoptimization_id); + exit->set_emitted(); return kSuccess; } @@ -146,7 +150,7 @@ void CodeGenerator::AssembleCode() { if (info->is_source_positions_enabled()) { AssembleSourcePosition(start_source_position()); } - + offsets_info_.code_start_register_check = tasm()->pc_offset(); // Check that {kJavaScriptCallCodeStartRegister} has been set correctly. if (FLAG_debug_code && (info->code_kind() == Code::OPTIMIZED_FUNCTION || info->code_kind() == Code::BYTECODE_HANDLER)) { @@ -154,6 +158,7 @@ void CodeGenerator::AssembleCode() { AssembleCodeStartRegisterCheck(); } + offsets_info_.deopt_check = tasm()->pc_offset(); // We want to bailout only from JS functions, which are the only ones // that are optimized. if (info->IsOptimizing()) { @@ -162,6 +167,7 @@ void CodeGenerator::AssembleCode() { BailoutIfDeoptimized(); } + offsets_info_.init_poison = tasm()->pc_offset(); InitializeSpeculationPoison(); // Define deoptimization literals for all inlined functions. @@ -191,10 +197,10 @@ void CodeGenerator::AssembleCode() { if (info->trace_turbo_json_enabled()) { block_starts_.assign(instructions()->instruction_blocks().size(), -1); - instr_starts_.assign(instructions()->instructions().size(), -1); + instr_starts_.assign(instructions()->instructions().size(), {}); } - // Assemble instructions in assembly order. + offsets_info_.blocks_start = tasm()->pc_offset(); for (const InstructionBlock* block : instructions()->ao_blocks()) { // Align loop headers on vendor recommended boundaries. if (block->ShouldAlign() && !tasm()->jump_optimization_info()) { @@ -252,6 +258,7 @@ void CodeGenerator::AssembleCode() { } // Assemble all out-of-line code. + offsets_info_.out_of_line_code = tasm()->pc_offset(); if (ools_) { tasm()->RecordComment("-- Out of line code --"); for (OutOfLineCode* ool = ools_; ool; ool = ool->next()) { @@ -266,28 +273,45 @@ void CodeGenerator::AssembleCode() { // The test regress/regress-259 is an example of where we need it. tasm()->nop(); + // For some targets, we must make sure that constant and veneer pools are + // emitted before emitting the deoptimization exits. + PrepareForDeoptimizationExits(static_cast<int>(deoptimization_exits_.size())); + + if (Deoptimizer::kSupportsFixedDeoptExitSize) { + deopt_exit_start_offset_ = tasm()->pc_offset(); + } + // Assemble deoptimization exits. + offsets_info_.deoptimization_exits = tasm()->pc_offset(); int last_updated = 0; for (DeoptimizationExit* exit : deoptimization_exits_) { + if (exit->emitted()) continue; + if (Deoptimizer::kSupportsFixedDeoptExitSize) { + exit->set_deoptimization_id(next_deoptimization_id_++); + } tasm()->bind(exit->label()); - int trampoline_pc = tasm()->pc_offset(); - int deoptimization_id = exit->deoptimization_id(); - DeoptimizationState* ds = deoptimization_states_[deoptimization_id]; - if (ds->kind() == DeoptimizeKind::kLazy) { + // UpdateDeoptimizationInfo expects lazy deopts to be visited in pc_offset + // order, which is always the case since they are added to + // deoptimization_exits_ in that order. + if (exit->kind() == DeoptimizeKind::kLazy) { + int trampoline_pc = tasm()->pc_offset(); last_updated = safepoints()->UpdateDeoptimizationInfo( - ds->pc_offset(), trampoline_pc, last_updated); + exit->pc_offset(), trampoline_pc, last_updated, + exit->deoptimization_id()); } - result_ = AssembleDeoptimizerCall(deoptimization_id, exit->pos()); + result_ = AssembleDeoptimizerCall(exit); if (result_ != kSuccess) return; } + offsets_info_.pools = tasm()->pc_offset(); // TODO(jgruber): Move all inlined metadata generation into a new, // architecture-independent version of FinishCode. Currently, this includes // the safepoint table, handler table, constant pool, and code comments, in // that order. FinishCode(); + offsets_info_.jump_tables = tasm()->pc_offset(); // Emit the jump tables. if (jump_tables_) { tasm()->Align(kSystemPointerSize); @@ -396,12 +420,12 @@ MaybeHandle<Code> CodeGenerator::FinalizeCode() { CodeDesc desc; tasm()->GetCode(isolate(), &desc, safepoints(), handler_table_offset_); -#if defined(V8_OS_WIN_X64) +#if defined(V8_OS_WIN64) if (Builtins::IsBuiltinId(info_->builtin_index())) { isolate_->SetBuiltinUnwindData(info_->builtin_index(), tasm()->GetUnwindInfo()); } -#endif +#endif // V8_OS_WIN64 if (unwinding_info_writer_.eh_frame_writer()) { unwinding_info_writer_.eh_frame_writer()->GetEhFrame(&desc); @@ -473,11 +497,7 @@ bool CodeGenerator::IsMaterializableFromRoot(Handle<HeapObject> object, CodeGenerator::CodeGenResult CodeGenerator::AssembleBlock( const InstructionBlock* block) { for (int i = block->code_start(); i < block->code_end(); ++i) { - if (info()->trace_turbo_json_enabled()) { - instr_starts_[i] = tasm()->pc_offset(); - } - Instruction* instr = instructions()->InstructionAt(i); - CodeGenResult result = AssembleInstruction(instr, block); + CodeGenResult result = AssembleInstruction(i, block); if (result != kSuccess) return result; } return kSuccess; @@ -631,7 +651,11 @@ RpoNumber CodeGenerator::ComputeBranchInfo(BranchInfo* branch, } CodeGenerator::CodeGenResult CodeGenerator::AssembleInstruction( - Instruction* instr, const InstructionBlock* block) { + int instruction_index, const InstructionBlock* block) { + Instruction* instr = instructions()->InstructionAt(instruction_index); + if (info()->trace_turbo_json_enabled()) { + instr_starts_[instruction_index].gap_pc_offset = tasm()->pc_offset(); + } int first_unused_stack_slot; FlagsMode mode = FlagsModeField::decode(instr->opcode()); if (mode != kFlags_trap) { @@ -649,10 +673,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleInstruction( if (instr->IsJump() && block->must_deconstruct_frame()) { AssembleDeconstructFrame(); } + if (info()->trace_turbo_json_enabled()) { + instr_starts_[instruction_index].arch_instr_pc_offset = tasm()->pc_offset(); + } // Assemble architecture-specific code for the instruction. CodeGenResult result = AssembleArchInstruction(instr); if (result != kSuccess) return result; + if (info()->trace_turbo_json_enabled()) { + instr_starts_[instruction_index].condition_pc_offset = tasm()->pc_offset(); + } + FlagsCondition condition = FlagsConditionField::decode(instr->opcode()); switch (mode) { case kFlags_branch: @@ -801,7 +832,7 @@ Handle<PodArray<InliningPosition>> CreateInliningPositions( Handle<DeoptimizationData> CodeGenerator::GenerateDeoptimizationData() { OptimizedCompilationInfo* info = this->info(); - int deopt_count = static_cast<int>(deoptimization_states_.size()); + int deopt_count = static_cast<int>(deoptimization_exits_.size()); if (deopt_count == 0 && !info->is_osr()) { return DeoptimizationData::Empty(isolate()); } @@ -816,6 +847,8 @@ Handle<DeoptimizationData> CodeGenerator::GenerateDeoptimizationData() { Smi::FromInt(static_cast<int>(inlined_function_count_))); data->SetOptimizationId(Smi::FromInt(info->optimization_id())); + data->SetDeoptExitStart(Smi::FromInt(deopt_exit_start_offset_)); + if (info->has_shared_info()) { data->SetSharedFunctionInfo(*info->shared_info()); } else { @@ -846,12 +879,13 @@ Handle<DeoptimizationData> CodeGenerator::GenerateDeoptimizationData() { // Populate deoptimization entries. for (int i = 0; i < deopt_count; i++) { - DeoptimizationState* deoptimization_state = deoptimization_states_[i]; - data->SetBytecodeOffset(i, deoptimization_state->bailout_id()); - CHECK(deoptimization_state); + DeoptimizationExit* deoptimization_exit = deoptimization_exits_[i]; + CHECK_NOT_NULL(deoptimization_exit); + DCHECK_EQ(i, deoptimization_exit->deoptimization_id()); + data->SetBytecodeOffset(i, deoptimization_exit->bailout_id()); data->SetTranslationIndex( - i, Smi::FromInt(deoptimization_state->translation_id())); - data->SetPc(i, Smi::FromInt(deoptimization_state->pc_offset())); + i, Smi::FromInt(deoptimization_exit->translation_id())); + data->SetPc(i, Smi::FromInt(deoptimization_exit->pc_offset())); } return data; @@ -885,13 +919,8 @@ void CodeGenerator::RecordCallPosition(Instruction* instr) { FrameStateDescriptor* descriptor = GetDeoptimizationEntry(instr, frame_state_offset).descriptor(); int pc_offset = tasm()->pc_offset(); - int deopt_state_id = BuildTranslation(instr, pc_offset, frame_state_offset, - descriptor->state_combine()); - - DeoptimizationExit* const exit = new (zone()) - DeoptimizationExit(deopt_state_id, current_source_position_); - deoptimization_exits_.push_back(exit); - safepoints()->RecordLazyDeoptimizationIndex(deopt_state_id); + BuildTranslation(instr, pc_offset, frame_state_offset, + descriptor->state_combine()); } } @@ -911,20 +940,6 @@ DeoptimizationEntry const& CodeGenerator::GetDeoptimizationEntry( return instructions()->GetDeoptimizationEntry(state_id); } -DeoptimizeKind CodeGenerator::GetDeoptimizationKind( - int deoptimization_id) const { - size_t const index = static_cast<size_t>(deoptimization_id); - DCHECK_LT(index, deoptimization_states_.size()); - return deoptimization_states_[index]->kind(); -} - -DeoptimizeReason CodeGenerator::GetDeoptimizationReason( - int deoptimization_id) const { - size_t const index = static_cast<size_t>(deoptimization_id); - DCHECK_LT(index, deoptimization_states_.size()); - return deoptimization_states_[index]->reason(); -} - void CodeGenerator::TranslateStateValueDescriptor( StateValueDescriptor* desc, StateValueList* nested, Translation* translation, InstructionOperandIterator* iter) { @@ -996,8 +1011,12 @@ void CodeGenerator::BuildTranslationForFrameStateDescriptor( } shared_info = info()->shared_info(); } - int shared_info_id = + + const BailoutId bailout_id = descriptor->bailout_id(); + const int shared_info_id = DefineDeoptimizationLiteral(DeoptimizationLiteral(shared_info)); + const unsigned int height = + static_cast<unsigned int>(descriptor->GetHeight()); switch (descriptor->type()) { case FrameStateType::kInterpretedFunction: { @@ -1007,45 +1026,30 @@ void CodeGenerator::BuildTranslationForFrameStateDescriptor( return_offset = static_cast<int>(state_combine.GetOffsetToPokeAt()); return_count = static_cast<int>(iter->instruction()->OutputCount()); } - translation->BeginInterpretedFrame( - descriptor->bailout_id(), shared_info_id, - static_cast<unsigned int>(descriptor->locals_count() + 1), - return_offset, return_count); + translation->BeginInterpretedFrame(bailout_id, shared_info_id, height, + return_offset, return_count); break; } case FrameStateType::kArgumentsAdaptor: - translation->BeginArgumentsAdaptorFrame( - shared_info_id, - static_cast<unsigned int>(descriptor->parameters_count())); + translation->BeginArgumentsAdaptorFrame(shared_info_id, height); break; case FrameStateType::kConstructStub: - DCHECK(descriptor->bailout_id().IsValidForConstructStub()); - translation->BeginConstructStubFrame( - descriptor->bailout_id(), shared_info_id, - static_cast<unsigned int>(descriptor->parameters_count() + 1)); + DCHECK(bailout_id.IsValidForConstructStub()); + translation->BeginConstructStubFrame(bailout_id, shared_info_id, height); break; case FrameStateType::kBuiltinContinuation: { - BailoutId bailout_id = descriptor->bailout_id(); - int parameter_count = - static_cast<unsigned int>(descriptor->parameters_count()); translation->BeginBuiltinContinuationFrame(bailout_id, shared_info_id, - parameter_count); + height); break; } case FrameStateType::kJavaScriptBuiltinContinuation: { - BailoutId bailout_id = descriptor->bailout_id(); - int parameter_count = - static_cast<unsigned int>(descriptor->parameters_count()); translation->BeginJavaScriptBuiltinContinuationFrame( - bailout_id, shared_info_id, parameter_count); + bailout_id, shared_info_id, height); break; } case FrameStateType::kJavaScriptBuiltinContinuationWithCatch: { - BailoutId bailout_id = descriptor->bailout_id(); - int parameter_count = - static_cast<unsigned int>(descriptor->parameters_count()); translation->BeginJavaScriptBuiltinContinuationWithCatchFrame( - bailout_id, shared_info_id, parameter_count); + bailout_id, shared_info_id, height); break; } } @@ -1053,9 +1057,9 @@ void CodeGenerator::BuildTranslationForFrameStateDescriptor( TranslateFrameStateDescriptorOperands(descriptor, iter, translation); } -int CodeGenerator::BuildTranslation(Instruction* instr, int pc_offset, - size_t frame_state_offset, - OutputFrameStateCombine state_combine) { +DeoptimizationExit* CodeGenerator::BuildTranslation( + Instruction* instr, int pc_offset, size_t frame_state_offset, + OutputFrameStateCombine state_combine) { DeoptimizationEntry const& entry = GetDeoptimizationEntry(instr, frame_state_offset); FrameStateDescriptor* const descriptor = entry.descriptor(); @@ -1068,21 +1072,24 @@ int CodeGenerator::BuildTranslation(Instruction* instr, int pc_offset, update_feedback_count, zone()); if (entry.feedback().IsValid()) { DeoptimizationLiteral literal = - DeoptimizationLiteral(entry.feedback().vector()); + DeoptimizationLiteral(entry.feedback().vector); int literal_id = DefineDeoptimizationLiteral(literal); - translation.AddUpdateFeedback(literal_id, entry.feedback().slot().ToInt()); + translation.AddUpdateFeedback(literal_id, entry.feedback().slot.ToInt()); } InstructionOperandIterator iter(instr, frame_state_offset); BuildTranslationForFrameStateDescriptor(descriptor, &iter, &translation, state_combine); - int deoptimization_id = static_cast<int>(deoptimization_states_.size()); + DeoptimizationExit* const exit = new (zone()) DeoptimizationExit( + current_source_position_, descriptor->bailout_id(), translation.index(), + pc_offset, entry.kind(), entry.reason()); - deoptimization_states_.push_back(new (zone()) DeoptimizationState( - descriptor->bailout_id(), translation.index(), pc_offset, entry.kind(), - entry.reason())); + if (!Deoptimizer::kSupportsFixedDeoptExitSize) { + exit->set_deoptimization_id(next_deoptimization_id_++); + } - return deoptimization_id; + deoptimization_exits_.push_back(exit); + return exit; } void CodeGenerator::AddTranslationForOperand(Translation* translation, @@ -1236,13 +1243,8 @@ void CodeGenerator::MarkLazyDeoptSite() { DeoptimizationExit* CodeGenerator::AddDeoptimizationExit( Instruction* instr, size_t frame_state_offset) { - int const deoptimization_id = BuildTranslation( - instr, -1, frame_state_offset, OutputFrameStateCombine::Ignore()); - - DeoptimizationExit* const exit = new (zone()) - DeoptimizationExit(deoptimization_id, current_source_position_); - deoptimization_exits_.push_back(exit); - return exit; + return BuildTranslation(instr, -1, frame_state_offset, + OutputFrameStateCombine::Ignore()); } void CodeGenerator::InitializeSpeculationPoison() { diff --git a/deps/v8/src/compiler/backend/code-generator.h b/deps/v8/src/compiler/backend/code-generator.h index 74dd90c5de..e9ebf67590 100644 --- a/deps/v8/src/compiler/backend/code-generator.h +++ b/deps/v8/src/compiler/backend/code-generator.h @@ -85,6 +85,25 @@ class DeoptimizationLiteral { const StringConstantBase* string_ = nullptr; }; +// These structs hold pc offsets for generated instructions and is only used +// when tracing for turbolizer is enabled. +struct TurbolizerCodeOffsetsInfo { + int code_start_register_check = -1; + int deopt_check = -1; + int init_poison = -1; + int blocks_start = -1; + int out_of_line_code = -1; + int deoptimization_exits = -1; + int pools = -1; + int jump_tables = -1; +}; + +struct TurbolizerInstructionStartInfo { + int gap_pc_offset = -1; + int arch_instr_pc_offset = -1; + int condition_pc_offset = -1; +}; + // Generates native code for a sequence of instructions. class V8_EXPORT_PRIVATE CodeGenerator final : public GapResolver::Assembler { public: @@ -96,6 +115,7 @@ class V8_EXPORT_PRIVATE CodeGenerator final : public GapResolver::Assembler { JumpOptimizationInfo* jump_opt, PoisoningMitigationLevel poisoning_level, const AssemblerOptions& options, int32_t builtin_index, + size_t max_unoptimized_frame_height, std::unique_ptr<AssemblerBuffer> = {}); // Generate native code. After calling AssembleCode, call FinalizeCode to @@ -139,7 +159,13 @@ class V8_EXPORT_PRIVATE CodeGenerator final : public GapResolver::Assembler { size_t GetHandlerTableOffset() const { return handler_table_offset_; } const ZoneVector<int>& block_starts() const { return block_starts_; } - const ZoneVector<int>& instr_starts() const { return instr_starts_; } + const ZoneVector<TurbolizerInstructionStartInfo>& instr_starts() const { + return instr_starts_; + } + + const TurbolizerCodeOffsetsInfo& offsets_info() const { + return offsets_info_; + } static constexpr int kBinarySearchSwitchMinimalCases = 4; @@ -182,7 +208,7 @@ class V8_EXPORT_PRIVATE CodeGenerator final : public GapResolver::Assembler { void GenerateSpeculationPoisonFromCodeStartRegister(); // Assemble code for the specified instruction. - CodeGenResult AssembleInstruction(Instruction* instr, + CodeGenResult AssembleInstruction(int instruction_index, const InstructionBlock* block); void AssembleGaps(Instruction* instr); @@ -199,8 +225,7 @@ class V8_EXPORT_PRIVATE CodeGenerator final : public GapResolver::Assembler { // Determines how to call helper stubs depending on the code kind. StubCallMode DetermineStubCallMode() const; - CodeGenResult AssembleDeoptimizerCall(int deoptimization_id, - SourcePosition pos); + CodeGenResult AssembleDeoptimizerCall(DeoptimizationExit* exit); // =========================================================================== // ============= Architecture-specific code generation methods. ============== @@ -342,11 +367,9 @@ class V8_EXPORT_PRIVATE CodeGenerator final : public GapResolver::Assembler { int DefineDeoptimizationLiteral(DeoptimizationLiteral literal); DeoptimizationEntry const& GetDeoptimizationEntry(Instruction* instr, size_t frame_state_offset); - DeoptimizeKind GetDeoptimizationKind(int deoptimization_id) const; - DeoptimizeReason GetDeoptimizationReason(int deoptimization_id) const; - int BuildTranslation(Instruction* instr, int pc_offset, - size_t frame_state_offset, - OutputFrameStateCombine state_combine); + DeoptimizationExit* BuildTranslation(Instruction* instr, int pc_offset, + size_t frame_state_offset, + OutputFrameStateCombine state_combine); void BuildTranslationForFrameStateDescriptor( FrameStateDescriptor* descriptor, InstructionOperandIterator* iter, Translation* translation, OutputFrameStateCombine state_combine); @@ -361,35 +384,12 @@ class V8_EXPORT_PRIVATE CodeGenerator final : public GapResolver::Assembler { InstructionOperand* op, MachineType type); void MarkLazyDeoptSite(); + void PrepareForDeoptimizationExits(int deopt_count); DeoptimizationExit* AddDeoptimizationExit(Instruction* instr, size_t frame_state_offset); // =========================================================================== - class DeoptimizationState final : public ZoneObject { - public: - DeoptimizationState(BailoutId bailout_id, int translation_id, int pc_offset, - DeoptimizeKind kind, DeoptimizeReason reason) - : bailout_id_(bailout_id), - translation_id_(translation_id), - pc_offset_(pc_offset), - kind_(kind), - reason_(reason) {} - - BailoutId bailout_id() const { return bailout_id_; } - int translation_id() const { return translation_id_; } - int pc_offset() const { return pc_offset_; } - DeoptimizeKind kind() const { return kind_; } - DeoptimizeReason reason() const { return reason_; } - - private: - BailoutId bailout_id_; - int translation_id_; - int pc_offset_; - DeoptimizeKind kind_; - DeoptimizeReason reason_; - }; - struct HandlerInfo { Label* handler; int pc_offset; @@ -414,14 +414,19 @@ class V8_EXPORT_PRIVATE CodeGenerator final : public GapResolver::Assembler { GapResolver resolver_; SafepointTableBuilder safepoints_; ZoneVector<HandlerInfo> handlers_; + int next_deoptimization_id_ = 0; + int deopt_exit_start_offset_ = 0; ZoneDeque<DeoptimizationExit*> deoptimization_exits_; - ZoneDeque<DeoptimizationState*> deoptimization_states_; ZoneDeque<DeoptimizationLiteral> deoptimization_literals_; size_t inlined_function_count_ = 0; TranslationBuffer translations_; int handler_table_offset_ = 0; int last_lazy_deopt_pc_ = 0; + // The maximal combined height of all frames produced upon deoptimization. + // Applied as an offset to the first stack check of an optimized function. + const size_t max_unoptimized_frame_height_; + // kArchCallCFunction could be reached either: // kArchCallCFunction; // or: @@ -444,7 +449,8 @@ class V8_EXPORT_PRIVATE CodeGenerator final : public GapResolver::Assembler { CodeGenResult result_; PoisoningMitigationLevel poisoning_level_; ZoneVector<int> block_starts_; - ZoneVector<int> instr_starts_; + TurbolizerCodeOffsetsInfo offsets_info_; + ZoneVector<TurbolizerInstructionStartInfo> instr_starts_; }; } // namespace compiler diff --git a/deps/v8/src/compiler/backend/frame-elider.cc b/deps/v8/src/compiler/backend/frame-elider.cc index 2167d0abaa..064501b097 100644 --- a/deps/v8/src/compiler/backend/frame-elider.cc +++ b/deps/v8/src/compiler/backend/frame-elider.cc @@ -24,7 +24,7 @@ void FrameElider::MarkBlocks() { for (int i = block->code_start(); i < block->code_end(); ++i) { const Instruction* instr = InstructionAt(i); if (instr->IsCall() || instr->IsDeoptimizeCall() || - instr->arch_opcode() == ArchOpcode::kArchStackPointer || + instr->arch_opcode() == ArchOpcode::kArchStackPointerGreaterThan || instr->arch_opcode() == ArchOpcode::kArchFramePointer) { block->mark_needs_frame(); break; diff --git a/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc b/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc index ed4be7a47c..4542da643b 100644 --- a/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc +++ b/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc @@ -165,6 +165,11 @@ class IA32OperandConverter : public InstructionOperandConverter { Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset))); return Operand(ctant.ToInt32(), ctant.rmode()); } + case kMode_Root: { + Register base = kRootRegister; + int32_t disp = InputInt32(NextOffset(offset)); + return Operand(base, disp); + } case kMode_None: UNREACHABLE(); } @@ -205,10 +210,18 @@ class IA32OperandConverter : public InstructionOperandConverter { namespace { +bool HasAddressingMode(Instruction* instr) { + return instr->addressing_mode() != kMode_None; +} + bool HasImmediateInput(Instruction* instr, size_t index) { return instr->InputAt(index)->IsImmediate(); } +bool HasRegisterInput(Instruction* instr, size_t index) { + return instr->InputAt(index)->IsRegister(); +} + class OutOfLineLoadFloat32NaN final : public OutOfLineCode { public: OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result) @@ -256,6 +269,8 @@ class OutOfLineTruncateDoubleToI final : public OutOfLineCode { // Just encode the stub index. This will be patched when the code // is added to the native module and copied into wasm code space. __ wasm_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL); + } else if (tasm()->options().inline_offheap_trampolines) { + __ CallBuiltin(Builtins::kDoubleToI); } else { __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET); } @@ -326,31 +341,31 @@ class OutOfLineRecordWrite final : public OutOfLineCode { } // namespace -#define ASSEMBLE_COMPARE(asm_instr) \ - do { \ - if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ - size_t index = 0; \ - Operand left = i.MemoryOperand(&index); \ - if (HasImmediateInput(instr, index)) { \ - __ asm_instr(left, i.InputImmediate(index)); \ - } else { \ - __ asm_instr(left, i.InputRegister(index)); \ - } \ - } else { \ - if (HasImmediateInput(instr, 1)) { \ - if (instr->InputAt(0)->IsRegister()) { \ - __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ - } else { \ - __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ - } \ - } else { \ - if (instr->InputAt(1)->IsRegister()) { \ - __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ - } else { \ - __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ - } \ - } \ - } \ +#define ASSEMBLE_COMPARE(asm_instr) \ + do { \ + if (HasAddressingMode(instr)) { \ + size_t index = 0; \ + Operand left = i.MemoryOperand(&index); \ + if (HasImmediateInput(instr, index)) { \ + __ asm_instr(left, i.InputImmediate(index)); \ + } else { \ + __ asm_instr(left, i.InputRegister(index)); \ + } \ + } else { \ + if (HasImmediateInput(instr, 1)) { \ + if (HasRegisterInput(instr, 0)) { \ + __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ + } else { \ + __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ + } \ + } else { \ + if (HasRegisterInput(instr, 1)) { \ + __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ + } else { \ + __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ + } \ + } \ + } \ } while (0) #define ASSEMBLE_IEEE754_BINOP(name) \ @@ -382,19 +397,19 @@ class OutOfLineRecordWrite final : public OutOfLineCode { __ add(esp, Immediate(kDoubleSize)); \ } while (false) -#define ASSEMBLE_BINOP(asm_instr) \ - do { \ - if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ - size_t index = 1; \ - Operand right = i.MemoryOperand(&index); \ - __ asm_instr(i.InputRegister(0), right); \ - } else { \ - if (HasImmediateInput(instr, 1)) { \ - __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ - } else { \ - __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ - } \ - } \ +#define ASSEMBLE_BINOP(asm_instr) \ + do { \ + if (HasAddressingMode(instr)) { \ + size_t index = 1; \ + Operand right = i.MemoryOperand(&index); \ + __ asm_instr(i.InputRegister(0), right); \ + } else { \ + if (HasImmediateInput(instr, 1)) { \ + __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ + } else { \ + __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ + } \ + } \ } while (0) #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \ @@ -431,9 +446,9 @@ class OutOfLineRecordWrite final : public OutOfLineCode { #define ASSEMBLE_MOVX(mov_instr) \ do { \ - if (instr->addressing_mode() != kMode_None) { \ + if (HasAddressingMode(instr)) { \ __ mov_instr(i.OutputRegister(), i.MemoryOperand()); \ - } else if (instr->InputAt(0)->IsRegister()) { \ + } else if (HasRegisterInput(instr, 0)) { \ __ mov_instr(i.OutputRegister(), i.InputRegister(0)); \ } else { \ __ mov_instr(i.OutputRegister(), i.InputOperand(0)); \ @@ -905,19 +920,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // don't emit code for nops. break; case kArchDeoptimize: { - int deopt_state_id = + DeoptimizationExit* exit = BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); - CodeGenResult result = - AssembleDeoptimizerCall(deopt_state_id, current_source_position_); + CodeGenResult result = AssembleDeoptimizerCall(exit); if (result != kSuccess) return result; break; } case kArchRet: AssembleReturn(instr->InputAt(0)); break; - case kArchStackPointer: - __ mov(i.OutputRegister(), esp); - break; case kArchFramePointer: __ mov(i.OutputRegister(), ebp); break; @@ -928,6 +939,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mov(i.OutputRegister(), ebp); } break; + case kArchStackPointerGreaterThan: { + constexpr size_t kValueIndex = 0; + if (HasAddressingMode(instr)) { + __ cmp(esp, i.MemoryOperand(kValueIndex)); + } else { + __ cmp(esp, i.InputRegister(kValueIndex)); + } + break; + } case kArchTruncateDoubleToI: { auto result = i.OutputRegister(); auto input = i.InputDoubleRegister(0); @@ -1115,7 +1135,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // i.InputRegister(2) ... right low word. // i.InputRegister(3) ... right high word. bool use_temp = false; - if ((instr->InputAt(1)->IsRegister() && + if ((HasRegisterInput(instr, 1) && i.OutputRegister(0).code() == i.InputRegister(1).code()) || i.OutputRegister(0).code() == i.InputRegister(3).code()) { // We cannot write to the output register directly, because it would @@ -1140,7 +1160,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // i.InputRegister(2) ... right low word. // i.InputRegister(3) ... right high word. bool use_temp = false; - if ((instr->InputAt(1)->IsRegister() && + if ((HasRegisterInput(instr, 1) && i.OutputRegister(0).code() == i.InputRegister(1).code()) || i.OutputRegister(0).code() == i.InputRegister(3).code()) { // We cannot write to the output register directly, because it would @@ -1671,7 +1691,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; case kIA32BitcastIF: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ movd(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ movss(i.OutputDoubleRegister(), i.InputOperand(0)); @@ -1762,7 +1782,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( frame_access_state()->IncreaseSPDelta(kSimd128Size / kSystemPointerSize); break; case kIA32Push: - if (AddressingModeField::decode(instr->opcode()) != kMode_None) { + if (HasAddressingMode(instr)) { size_t index = 0; Operand operand = i.MemoryOperand(&index); __ push(operand); @@ -1984,6 +2004,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputOperand(1)); break; } + case kSSEF32x4Div: { + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); + __ divps(i.OutputSimd128Register(), i.InputOperand(1)); + break; + } + case kAVXF32x4Div: { + CpuFeatureScope avx_scope(tasm(), AVX); + __ vdivps(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputOperand(1)); + break; + } case kSSEF32x4Min: { XMMRegister src1 = i.InputSimd128Register(1), dst = i.OutputSimd128Register(); @@ -2180,24 +2211,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kSSEI32x4Shl: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ pslld(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movd(tmp, i.InputRegister(1)); + __ pslld(i.OutputSimd128Register(), tmp); break; } case kAVXI32x4Shl: { CpuFeatureScope avx_scope(tasm(), AVX); - __ vpslld(i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movd(tmp, i.InputRegister(1)); + __ vpslld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } case kSSEI32x4ShrS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ psrad(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movd(tmp, i.InputRegister(1)); + __ psrad(i.OutputSimd128Register(), tmp); break; } case kAVXI32x4ShrS: { CpuFeatureScope avx_scope(tasm(), AVX); - __ vpsrad(i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movd(tmp, i.InputRegister(1)); + __ vpsrad(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } case kSSEI32x4Add: { @@ -2329,7 +2366,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); - XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); + XMMRegister tmp = i.TempSimd128Register(0); // NAN->0, negative->0 __ pxor(kScratchDoubleReg, kScratchDoubleReg); __ maxps(dst, kScratchDoubleReg); @@ -2357,7 +2394,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); - XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); + XMMRegister tmp = i.TempSimd128Register(0); // NAN->0, negative->0 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vmaxps(dst, dst, kScratchDoubleReg); @@ -2392,13 +2429,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kSSEI32x4ShrU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ psrld(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movd(tmp, i.InputRegister(1)); + __ psrld(i.OutputSimd128Register(), tmp); break; } case kAVXI32x4ShrU: { CpuFeatureScope avx_scope(tasm(), AVX); - __ vpsrld(i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movd(tmp, i.InputRegister(1)); + __ vpsrld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } case kSSEI32x4MinU: { @@ -2512,24 +2552,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kSSEI16x8Shl: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ psllw(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movd(tmp, i.InputRegister(1)); + __ psllw(i.OutputSimd128Register(), tmp); break; } case kAVXI16x8Shl: { CpuFeatureScope avx_scope(tasm(), AVX); - __ vpsllw(i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movd(tmp, i.InputRegister(1)); + __ vpsllw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } case kSSEI16x8ShrS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ psraw(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movd(tmp, i.InputRegister(1)); + __ psraw(i.OutputSimd128Register(), tmp); break; } case kAVXI16x8ShrS: { CpuFeatureScope avx_scope(tasm(), AVX); - __ vpsraw(i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movd(tmp, i.InputRegister(1)); + __ vpsraw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } case kSSEI16x8SConvertI32x4: { @@ -2698,13 +2744,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kSSEI16x8ShrU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ psrlw(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movd(tmp, i.InputRegister(1)); + __ psrlw(i.OutputSimd128Register(), tmp); break; } case kAVXI16x8ShrU: { CpuFeatureScope avx_scope(tasm(), AVX); - __ vpsrlw(i.OutputSimd128Register(), i.InputSimd128Register(0), - i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movd(tmp, i.InputRegister(1)); + __ vpsrlw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); break; } case kSSEI16x8UConvertI32x4: { @@ -2867,53 +2916,54 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kSSEI8x16Shl: { XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); - int8_t shift = i.InputInt8(1) & 0x7; - if (shift < 4) { - // For small shifts, doubling is faster. - for (int i = 0; i < shift; ++i) { - __ paddb(dst, dst); - } - } else { - // Mask off the unwanted bits before word-shifting. - __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); - __ psrlw(kScratchDoubleReg, 8 + shift); - __ packuswb(kScratchDoubleReg, kScratchDoubleReg); - __ pand(dst, kScratchDoubleReg); - __ psllw(dst, shift); - } + Register shift = i.InputRegister(1); + Register tmp = i.ToRegister(instr->TempAt(0)); + XMMRegister tmp_simd = i.TempSimd128Register(1); + // Mask off the unwanted bits before word-shifting. + __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); + __ mov(tmp, shift); + __ add(tmp, Immediate(8)); + __ movd(tmp_simd, tmp); + __ psrlw(kScratchDoubleReg, tmp_simd); + __ packuswb(kScratchDoubleReg, kScratchDoubleReg); + __ pand(dst, kScratchDoubleReg); + __ movd(tmp_simd, shift); + __ psllw(dst, tmp_simd); break; } case kAVXI8x16Shl: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(0); - int8_t shift = i.InputInt8(1) & 0x7; - if (shift < 4) { - // For small shifts, doubling is faster. - for (int i = 0; i < shift; ++i) { - __ vpaddb(dst, src, src); - src = dst; - } - } else { - // Mask off the unwanted bits before word-shifting. - __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8 + shift); - __ vpackuswb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpand(dst, src, kScratchDoubleReg); - __ vpsllw(dst, dst, shift); - } + Register shift = i.InputRegister(1); + Register tmp = i.ToRegister(instr->TempAt(0)); + XMMRegister tmp_simd = i.TempSimd128Register(1); + // Mask off the unwanted bits before word-shifting. + __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); + __ mov(tmp, shift); + __ add(tmp, Immediate(8)); + __ movd(tmp_simd, tmp); + __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, tmp_simd); + __ vpackuswb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); + __ vpand(dst, src, kScratchDoubleReg); + __ movd(tmp_simd, shift); + __ vpsllw(dst, dst, tmp_simd); break; } case kIA32I8x16ShrS: { XMMRegister dst = i.OutputSimd128Register(); - XMMRegister src = i.InputSimd128Register(0); - int8_t shift = i.InputInt8(1) & 0x7; + DCHECK_EQ(dst, i.InputSimd128Register(0)); + Register tmp = i.ToRegister(instr->TempAt(0)); + XMMRegister tmp_simd = i.TempSimd128Register(1); // Unpack the bytes into words, do arithmetic shifts, and repack. - __ Punpckhbw(kScratchDoubleReg, src); - __ Punpcklbw(dst, src); - __ Psraw(kScratchDoubleReg, 8 + shift); - __ Psraw(dst, 8 + shift); - __ Packsswb(dst, kScratchDoubleReg); + __ punpckhbw(kScratchDoubleReg, dst); + __ punpcklbw(dst, dst); + __ mov(tmp, i.InputRegister(1)); + __ add(tmp, Immediate(8)); + __ movd(tmp_simd, tmp); + __ psraw(kScratchDoubleReg, tmp_simd); + __ psraw(dst, tmp_simd); + __ packsswb(dst, kScratchDoubleReg); break; } case kSSEI8x16Add: { @@ -2964,7 +3014,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); XMMRegister right = i.InputSimd128Register(1); - XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); + XMMRegister tmp = i.TempSimd128Register(0); // I16x8 view of I8x16 // left = AAaa AAaa ... AAaa AAaa @@ -3004,7 +3054,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister dst = i.OutputSimd128Register(); XMMRegister left = i.InputSimd128Register(0); XMMRegister right = i.InputSimd128Register(1); - XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); + XMMRegister tmp = i.TempSimd128Register(0); // I16x8 view of I8x16 // left = AAaa AAaa ... AAaa AAaa @@ -3165,15 +3215,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kIA32I8x16ShrU: { + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister dst = i.OutputSimd128Register(); - XMMRegister src = i.InputSimd128Register(0); - int8_t shift = i.InputInt8(1) & 0x7; + Register tmp = i.ToRegister(instr->TempAt(0)); + XMMRegister tmp_simd = i.TempSimd128Register(1); // Unpack the bytes into words, do logical shifts, and repack. - __ Punpckhbw(kScratchDoubleReg, src); - __ Punpcklbw(dst, src); - __ Psrlw(kScratchDoubleReg, 8 + shift); - __ Psrlw(dst, 8 + shift); - __ Packuswb(dst, kScratchDoubleReg); + __ punpckhbw(kScratchDoubleReg, dst); + __ punpcklbw(dst, dst); + __ mov(tmp, i.InputRegister(1)); + __ add(tmp, Immediate(8)); + __ movd(tmp_simd, tmp); + __ psrlw(kScratchDoubleReg, tmp_simd); + __ psrlw(dst, tmp_simd); + __ packuswb(dst, kScratchDoubleReg); break; } case kSSEI8x16MinU: { @@ -3693,10 +3747,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ASSEMBLE_SIMD_ALL_TRUE(pcmpeqb); break; } - case kIA32StackCheck: { - __ CompareStackLimit(esp); - break; - } case kIA32Word32AtomicPairLoad: { XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); __ movq(tmp, i.MemoryOperand()); @@ -4402,6 +4452,8 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) { void CodeGenerator::FinishCode() {} +void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) {} + void CodeGenerator::AssembleMove(InstructionOperand* source, InstructionOperand* destination) { IA32OperandConverter g(this, nullptr); diff --git a/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h b/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h index 56dea82fe2..7530c716b8 100644 --- a/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h +++ b/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h @@ -116,7 +116,6 @@ namespace compiler { V(IA32PushSimd128) \ V(IA32Poke) \ V(IA32Peek) \ - V(IA32StackCheck) \ V(SSEF32x4Splat) \ V(AVXF32x4Splat) \ V(SSEF32x4ExtractLane) \ @@ -140,6 +139,8 @@ namespace compiler { V(AVXF32x4Sub) \ V(SSEF32x4Mul) \ V(AVXF32x4Mul) \ + V(SSEF32x4Div) \ + V(AVXF32x4Div) \ V(SSEF32x4Min) \ V(AVXF32x4Min) \ V(SSEF32x4Max) \ @@ -394,7 +395,8 @@ namespace compiler { V(M2I) /* [ %r2*2 + K] */ \ V(M4I) /* [ %r2*4 + K] */ \ V(M8I) /* [ %r2*8 + K] */ \ - V(MI) /* [ K] */ + V(MI) /* [ K] */ \ + V(Root) /* [%root + K] */ } // namespace compiler } // namespace internal diff --git a/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc b/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc index 15f69b991c..c2097a6691 100644 --- a/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc +++ b/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc @@ -120,6 +120,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXF32x4Sub: case kSSEF32x4Mul: case kAVXF32x4Mul: + case kSSEF32x4Div: + case kAVXF32x4Div: case kSSEF32x4Min: case kAVXF32x4Min: case kSSEF32x4Max: @@ -356,7 +358,6 @@ int InstructionScheduler::GetTargetInstructionFlags( // Moves are used for memory load/store operations. return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect; - case kIA32StackCheck: case kIA32Peek: return kIsLoadOperation; diff --git a/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc b/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc index e1fc66b4ba..ebef39a93a 100644 --- a/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc +++ b/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc @@ -152,6 +152,21 @@ class IA32OperandGenerator final : public OperandGenerator { AddressingMode GetEffectiveAddressMemoryOperand(Node* node, InstructionOperand inputs[], size_t* input_count) { + { + LoadMatcher<ExternalReferenceMatcher> m(node); + if (m.index().HasValue() && m.object().HasValue() && + selector()->CanAddressRelativeToRootsRegister(m.object().Value())) { + ptrdiff_t const delta = + m.index().Value() + + TurboAssemblerBase::RootRegisterOffsetForExternalReference( + selector()->isolate(), m.object().Value()); + if (is_int32(delta)) { + inputs[(*input_count)++] = TempImmediate(static_cast<int32_t>(delta)); + return kMode_Root; + } + } + } + BaseWithIndexAndDisplacement32Matcher m(node, AddressOption::kAllowAll); DCHECK(m.matches()); if ((m.displacement() == nullptr || CanBeImmediate(m.displacement()))) { @@ -261,6 +276,31 @@ void VisitRRISimd(InstructionSelector* selector, Node* node, } } +void VisitRROSimdShift(InstructionSelector* selector, Node* node, + ArchOpcode avx_opcode, ArchOpcode sse_opcode) { + IA32OperandGenerator g(selector); + InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0)); + InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1)); + InstructionOperand temps[] = {g.TempSimd128Register()}; + if (selector->IsSupported(AVX)) { + selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0, operand1, + arraysize(temps), temps); + } else { + selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, operand1, + arraysize(temps), temps); + } +} + +void VisitRROI8x16SimdRightShift(InstructionSelector* selector, Node* node, + ArchOpcode opcode) { + IA32OperandGenerator g(selector); + InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0)); + InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1)); + InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; + selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1, + arraysize(temps), temps); +} + } // namespace void InstructionSelector::VisitStackSlot(Node* node) { @@ -344,7 +384,8 @@ void InstructionSelector::VisitStore(Node* node) { WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind(); MachineRepresentation rep = store_rep.representation(); - if (write_barrier_kind != kNoWriteBarrier) { + if (write_barrier_kind != kNoWriteBarrier && + V8_LIKELY(!FLAG_disable_write_barriers)) { DCHECK(CanBeTaggedPointer(rep)); AddressingMode addressing_mode; InstructionOperand inputs[] = { @@ -516,6 +557,35 @@ void InstructionSelector::VisitWord32Xor(Node* node) { } } +void InstructionSelector::VisitStackPointerGreaterThan( + Node* node, FlagsContinuation* cont) { + Node* const value = node->InputAt(0); + InstructionCode opcode = kArchStackPointerGreaterThan; + + DCHECK(cont->IsBranch()); + const int effect_level = + GetEffectLevel(cont->true_block()->PredecessorAt(0)->control_input()); + + IA32OperandGenerator g(this); + if (g.CanBeMemoryOperand(kIA32Cmp, node, value, effect_level)) { + DCHECK_EQ(IrOpcode::kLoad, value->opcode()); + + // GetEffectiveAddressMemoryOperand can create at most 3 inputs. + static constexpr int kMaxInputCount = 3; + + size_t input_count = 0; + InstructionOperand inputs[kMaxInputCount]; + AddressingMode addressing_mode = + g.GetEffectiveAddressMemoryOperand(value, inputs, &input_count); + opcode |= AddressingModeField::encode(addressing_mode); + DCHECK_LE(input_count, kMaxInputCount); + + EmitWithContinuation(opcode, 0, nullptr, input_count, inputs, cont); + } else { + EmitWithContinuation(opcode, g.UseRegister(value), cont); + } +} + // Shared routine for multiple shift operations. static inline void VisitShift(InstructionSelector* selector, Node* node, ArchOpcode opcode) { @@ -1243,30 +1313,6 @@ void VisitWordCompare(InstructionSelector* selector, Node* node, void VisitWordCompare(InstructionSelector* selector, Node* node, FlagsContinuation* cont) { - if (selector->isolate() != nullptr) { - StackCheckMatcher<Int32BinopMatcher, IrOpcode::kUint32LessThan> m( - selector->isolate(), node); - if (m.Matched()) { - // Compare(Load(js_stack_limit), LoadStackPointer) - if (!node->op()->HasProperty(Operator::kCommutative)) cont->Commute(); - InstructionCode opcode = cont->Encode(kIA32StackCheck); - CHECK(cont->IsBranch()); - selector->EmitWithContinuation(opcode, cont); - return; - } - } - WasmStackCheckMatcher<Int32BinopMatcher, IrOpcode::kUint32LessThan> wasm_m( - node); - if (wasm_m.Matched()) { - // This is a wasm stack check. By structure, we know that we can use the - // stack pointer directly, as wasm code does not modify the stack at points - // where stack checks are performed. - Node* left = node->InputAt(0); - LocationOperand esp(InstructionOperand::EXPLICIT, LocationOperand::REGISTER, - InstructionSequence::DefaultRepresentation(), - RegisterCode::kRegCode_esp); - return VisitCompareWithMemoryOperand(selector, kIA32Cmp, left, esp, cont); - } VisitWordCompare(selector, node, kIA32Cmp, cont); } @@ -1433,6 +1479,9 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, return VisitWordCompare(this, value, cont); case IrOpcode::kWord32And: return VisitWordCompare(this, value, kIA32Test, cont); + case IrOpcode::kStackPointerGreaterThan: + cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition); + return VisitStackPointerGreaterThan(value, cont); default: break; } @@ -1842,6 +1891,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { V(F32x4AddHoriz) \ V(F32x4Sub) \ V(F32x4Mul) \ + V(F32x4Div) \ V(F32x4Min) \ V(F32x4Max) \ V(F32x4Eq) \ @@ -1939,8 +1989,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { V(I32x4ShrU) \ V(I16x8Shl) \ V(I16x8ShrS) \ - V(I16x8ShrU) \ - V(I8x16Shl) + V(I16x8ShrU) #define SIMD_I8X16_RIGHT_SHIFT_OPCODES(V) \ V(I8x16ShrS) \ @@ -2037,22 +2086,21 @@ VISIT_SIMD_REPLACE_LANE(F32x4) #undef VISIT_SIMD_REPLACE_LANE #undef SIMD_INT_TYPES -#define VISIT_SIMD_SHIFT(Opcode) \ - void InstructionSelector::Visit##Opcode(Node* node) { \ - VisitRRISimd(this, node, kAVX##Opcode, kSSE##Opcode); \ +#define VISIT_SIMD_SHIFT(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + VisitRROSimdShift(this, node, kAVX##Opcode, kSSE##Opcode); \ } SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT) #undef VISIT_SIMD_SHIFT #undef SIMD_SHIFT_OPCODES -#define VISIT_SIMD_I8X16_RIGHT_SHIFT(Op) \ - void InstructionSelector::Visit##Op(Node* node) { \ - VisitRRISimd(this, node, kIA32##Op); \ +#define VISIT_SIMD_I8x16_RIGHT_SHIFT(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + VisitRROI8x16SimdRightShift(this, node, kIA32##Opcode); \ } - -SIMD_I8X16_RIGHT_SHIFT_OPCODES(VISIT_SIMD_I8X16_RIGHT_SHIFT) +SIMD_I8X16_RIGHT_SHIFT_OPCODES(VISIT_SIMD_I8x16_RIGHT_SHIFT) #undef SIMD_I8X16_RIGHT_SHIFT_OPCODES -#undef VISIT_SIMD_I8X16_RIGHT_SHIFT +#undef VISIT_SIMD_I8x16_RIGHT_SHIFT #define VISIT_SIMD_UNOP(Opcode) \ void InstructionSelector::Visit##Opcode(Node* node) { \ @@ -2123,6 +2171,20 @@ void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) { VisitPack(this, node, kAVXI8x16UConvertI16x8, kSSEI8x16UConvertI16x8); } +void InstructionSelector::VisitI8x16Shl(Node* node) { + IA32OperandGenerator g(this); + InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0)); + InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1)); + InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; + if (IsSupported(AVX)) { + Emit(kAVXI8x16Shl, g.DefineAsRegister(node), operand0, operand1, + arraysize(temps), temps); + } else { + Emit(kSSEI8x16Shl, g.DefineSameAsFirst(node), operand0, operand1, + arraysize(temps), temps); + } +} + void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) { UNREACHABLE(); } @@ -2259,13 +2321,13 @@ static const ShuffleEntry arch_shuffles[] = { {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kSSES8x8Reverse, kAVXS8x8Reverse, - false, - false}, + true, + true}, {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kSSES8x4Reverse, kAVXS8x4Reverse, - false, - false}, + true, + true}, {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kSSES8x2Reverse, kAVXS8x2Reverse, diff --git a/deps/v8/src/compiler/backend/instruction-codes.h b/deps/v8/src/compiler/backend/instruction-codes.h index 1085de2196..589c1bda3b 100644 --- a/deps/v8/src/compiler/backend/instruction-codes.h +++ b/deps/v8/src/compiler/backend/instruction-codes.h @@ -88,13 +88,13 @@ inline RecordWriteMode WriteBarrierKindToRecordWriteMode( V(ArchThrowTerminator) \ V(ArchDeoptimize) \ V(ArchRet) \ - V(ArchStackPointer) \ V(ArchFramePointer) \ V(ArchParentFramePointer) \ V(ArchTruncateDoubleToI) \ V(ArchStoreWithWriteBarrier) \ V(ArchStackSlot) \ V(ArchWordPoisonOnSpeculation) \ + V(ArchStackPointerGreaterThan) \ V(Word32AtomicLoadInt8) \ V(Word32AtomicLoadUint8) \ V(Word32AtomicLoadInt16) \ @@ -238,6 +238,9 @@ enum FlagsCondition { kNegative }; +static constexpr FlagsCondition kStackPointerGreaterThanCondition = + kUnsignedGreaterThan; + inline FlagsCondition NegateFlagsCondition(FlagsCondition condition) { return static_cast<FlagsCondition>(condition ^ 1); } diff --git a/deps/v8/src/compiler/backend/instruction-scheduler.cc b/deps/v8/src/compiler/backend/instruction-scheduler.cc index 538af71bb4..dc66813740 100644 --- a/deps/v8/src/compiler/backend/instruction-scheduler.cc +++ b/deps/v8/src/compiler/backend/instruction-scheduler.cc @@ -275,9 +275,10 @@ int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const { case kIeee754Float64Tanh: return kNoOpcodeFlags; - case kArchStackPointer: - // ArchStackPointer instruction loads the current stack pointer value and - // must not be reordered with instruction with side effects. + case kArchStackPointerGreaterThan: + // The ArchStackPointerGreaterThan instruction loads the current stack + // pointer value and must not be reordered with instructions with side + // effects. return kIsLoadOperation; case kArchWordPoisonOnSpeculation: diff --git a/deps/v8/src/compiler/backend/instruction-selector.cc b/deps/v8/src/compiler/backend/instruction-selector.cc index 11ba910405..43193ec2b1 100644 --- a/deps/v8/src/compiler/backend/instruction-selector.cc +++ b/deps/v8/src/compiler/backend/instruction-selector.cc @@ -26,6 +26,7 @@ InstructionSelector::InstructionSelector( InstructionSequence* sequence, Schedule* schedule, SourcePositionTable* source_positions, Frame* frame, EnableSwitchJumpTable enable_switch_jump_table, TickCounter* tick_counter, + size_t* max_unoptimized_frame_height, SourcePositionMode source_position_mode, Features features, EnableScheduling enable_scheduling, EnableRootsRelativeAddressing enable_roots_relative_addressing, @@ -56,7 +57,10 @@ InstructionSelector::InstructionSelector( instruction_selection_failed_(false), instr_origins_(sequence->zone()), trace_turbo_(trace_turbo), - tick_counter_(tick_counter) { + tick_counter_(tick_counter), + max_unoptimized_frame_height_(max_unoptimized_frame_height) { + DCHECK_EQ(*max_unoptimized_frame_height, 0); // Caller-initialized. + instructions_.reserve(node_count); continuation_inputs_.reserve(5); continuation_outputs_.reserve(2); @@ -421,9 +425,27 @@ void InstructionSelector::SetEffectLevel(Node* node, int effect_level) { effect_level_[id] = effect_level; } -bool InstructionSelector::CanAddressRelativeToRootsRegister() const { - return enable_roots_relative_addressing_ == kEnableRootsRelativeAddressing && - CanUseRootsRegister(); +bool InstructionSelector::CanAddressRelativeToRootsRegister( + const ExternalReference& reference) const { + // There are three things to consider here: + // 1. CanUseRootsRegister: Is kRootRegister initialized? + const bool root_register_is_available_and_initialized = CanUseRootsRegister(); + if (!root_register_is_available_and_initialized) return false; + + // 2. enable_roots_relative_addressing_: Can we address everything on the heap + // through the root register, i.e. are root-relative addresses to arbitrary + // addresses guaranteed not to change between code generation and + // execution? + const bool all_root_relative_offsets_are_constant = + (enable_roots_relative_addressing_ == kEnableRootsRelativeAddressing); + if (all_root_relative_offsets_are_constant) return true; + + // 3. IsAddressableThroughRootRegister: Is the target address guaranteed to + // have a fixed root-relative offset? If so, we can ignore 2. + const bool this_root_relative_offset_is_constant = + TurboAssemblerBase::IsAddressableThroughRootRegister(isolate(), + reference); + return this_root_relative_offset_is_constant; } bool InstructionSelector::CanUseRootsRegister() const { @@ -744,7 +766,7 @@ Instruction* InstructionSelector::EmitWithContinuation( void InstructionSelector::AppendDeoptimizeArguments( InstructionOperandVector* args, DeoptimizeKind kind, - DeoptimizeReason reason, VectorSlotPair const& feedback, + DeoptimizeReason reason, FeedbackSource const& feedback, Node* frame_state) { OperandGenerator g(this); FrameStateDescriptor* const descriptor = GetFrameStateDescriptor(frame_state); @@ -761,7 +783,7 @@ void InstructionSelector::AppendDeoptimizeArguments( Instruction* InstructionSelector::EmitDeoptimize( InstructionCode opcode, size_t output_count, InstructionOperand* outputs, size_t input_count, InstructionOperand* inputs, DeoptimizeKind kind, - DeoptimizeReason reason, VectorSlotPair const& feedback, + DeoptimizeReason reason, FeedbackSource const& feedback, Node* frame_state) { InstructionOperandVector args(instruction_zone()); for (size_t i = 0; i < input_count; ++i) { @@ -972,7 +994,7 @@ void InstructionSelector::InitializeCallBuffer(Node* call, CallBuffer* buffer, int const state_id = sequence()->AddDeoptimizationEntry( buffer->frame_state_descriptor, DeoptimizeKind::kLazy, - DeoptimizeReason::kUnknown, VectorSlotPair()); + DeoptimizeReason::kUnknown, FeedbackSource()); buffer->instruction_args.push_back(g.TempImmediate(state_id)); StateObjectDeduplicator deduplicator(instruction_zone()); @@ -1056,7 +1078,6 @@ void InstructionSelector::InitializeCallBuffer(Node* call, CallBuffer* buffer, bool InstructionSelector::IsSourcePositionUsed(Node* node) { return (source_position_mode_ == kAllSourcePositions || node->opcode() == IrOpcode::kCall || - node->opcode() == IrOpcode::kCallWithCallerSavedRegisters || node->opcode() == IrOpcode::kTrapIf || node->opcode() == IrOpcode::kTrapUnless || node->opcode() == IrOpcode::kProtectedLoad || @@ -1078,10 +1099,13 @@ void InstructionSelector::VisitBlock(BasicBlock* block) { if (node->opcode() == IrOpcode::kStore || node->opcode() == IrOpcode::kUnalignedStore || node->opcode() == IrOpcode::kCall || - node->opcode() == IrOpcode::kCallWithCallerSavedRegisters || node->opcode() == IrOpcode::kProtectedLoad || node->opcode() == IrOpcode::kProtectedStore || - node->opcode() == IrOpcode::kMemoryBarrier) { +#define ADD_EFFECT_FOR_ATOMIC_OP(Opcode) \ + node->opcode() == IrOpcode::k##Opcode || + MACHINE_ATOMIC_OP_LIST(ADD_EFFECT_FOR_ATOMIC_OP) +#undef ADD_EFFECT_FOR_ATOMIC_OP + node->opcode() == IrOpcode::kMemoryBarrier) { ++effect_level; } } @@ -1274,9 +1298,9 @@ void InstructionSelector::VisitNode(Node* node) { // No code needed for these graph artifacts. return; case IrOpcode::kIfException: - return MarkAsReference(node), VisitIfException(node); + return MarkAsTagged(node), VisitIfException(node); case IrOpcode::kFinishRegion: - return MarkAsReference(node), VisitFinishRegion(node); + return MarkAsTagged(node), VisitFinishRegion(node); case IrOpcode::kParameter: { MachineType type = linkage()->GetParameterType(ParameterIndexOf(node->op())); @@ -1284,7 +1308,7 @@ void InstructionSelector::VisitNode(Node* node) { return VisitParameter(node); } case IrOpcode::kOsrValue: - return MarkAsReference(node), VisitOsrValue(node); + return MarkAsTagged(node), VisitOsrValue(node); case IrOpcode::kPhi: { MachineRepresentation rep = PhiRepresentationOf(node->op()); if (rep == MachineRepresentation::kNone) return; @@ -1304,20 +1328,18 @@ void InstructionSelector::VisitNode(Node* node) { case IrOpcode::kFloat64Constant: return MarkAsFloat64(node), VisitConstant(node); case IrOpcode::kHeapConstant: - return MarkAsReference(node), VisitConstant(node); + return MarkAsTagged(node), VisitConstant(node); case IrOpcode::kCompressedHeapConstant: return MarkAsCompressed(node), VisitConstant(node); case IrOpcode::kNumberConstant: { double value = OpParameter<double>(node->op()); - if (!IsSmiDouble(value)) MarkAsReference(node); + if (!IsSmiDouble(value)) MarkAsTagged(node); return VisitConstant(node); } case IrOpcode::kDelayedStringConstant: - return MarkAsReference(node), VisitConstant(node); + return MarkAsTagged(node), VisitConstant(node); case IrOpcode::kCall: return VisitCall(node); - case IrOpcode::kCallWithCallerSavedRegisters: - return VisitCallWithCallerSavedRegisters(node); case IrOpcode::kDeoptimizeIf: return VisitDeoptimizeIf(node); case IrOpcode::kDeoptimizeUnless: @@ -1484,10 +1506,16 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsRepresentation(MachineType::PointerRepresentation(), node), VisitBitcastTaggedToWord(node); case IrOpcode::kBitcastWordToTagged: - return MarkAsReference(node), VisitBitcastWordToTagged(node); + return MarkAsTagged(node), VisitBitcastWordToTagged(node); case IrOpcode::kBitcastWordToTaggedSigned: return MarkAsRepresentation(MachineRepresentation::kTaggedSigned, node), EmitIdentity(node); + case IrOpcode::kBitcastWord32ToCompressedSigned: + return MarkAsRepresentation(MachineRepresentation::kCompressedSigned, + node), + EmitIdentity(node); + case IrOpcode::kBitcastCompressedSignedToWord32: + return MarkAsWord32(node), EmitIdentity(node); case IrOpcode::kChangeFloat32ToFloat64: return MarkAsFloat64(node), VisitChangeFloat32ToFloat64(node); case IrOpcode::kChangeInt32ToFloat64: @@ -1536,18 +1564,20 @@ void InstructionSelector::VisitNode(Node* node) { case IrOpcode::kChangeTaggedToCompressed: return MarkAsCompressed(node), VisitChangeTaggedToCompressed(node); case IrOpcode::kChangeTaggedPointerToCompressedPointer: - return MarkAsCompressed(node), + return MarkAsRepresentation(MachineRepresentation::kCompressedPointer, + node), VisitChangeTaggedPointerToCompressedPointer(node); case IrOpcode::kChangeTaggedSignedToCompressedSigned: - return MarkAsWord32(node), + return MarkAsRepresentation(MachineRepresentation::kCompressedSigned, + node), VisitChangeTaggedSignedToCompressedSigned(node); case IrOpcode::kChangeCompressedToTagged: - return MarkAsReference(node), VisitChangeCompressedToTagged(node); + return MarkAsTagged(node), VisitChangeCompressedToTagged(node); case IrOpcode::kChangeCompressedPointerToTaggedPointer: - return MarkAsReference(node), + return MarkAsRepresentation(MachineRepresentation::kTaggedPointer, node), VisitChangeCompressedPointerToTaggedPointer(node); case IrOpcode::kChangeCompressedSignedToTaggedSigned: - return MarkAsWord64(node), + return MarkAsRepresentation(MachineRepresentation::kTaggedSigned, node), VisitChangeCompressedSignedToTaggedSigned(node); #endif case IrOpcode::kTruncateFloat64ToFloat32: @@ -1697,15 +1727,15 @@ void InstructionSelector::VisitNode(Node* node) { case IrOpcode::kFloat64InsertHighWord32: return MarkAsFloat64(node), VisitFloat64InsertHighWord32(node); case IrOpcode::kTaggedPoisonOnSpeculation: - return MarkAsReference(node), VisitTaggedPoisonOnSpeculation(node); + return MarkAsTagged(node), VisitTaggedPoisonOnSpeculation(node); case IrOpcode::kWord32PoisonOnSpeculation: return MarkAsWord32(node), VisitWord32PoisonOnSpeculation(node); case IrOpcode::kWord64PoisonOnSpeculation: return MarkAsWord64(node), VisitWord64PoisonOnSpeculation(node); case IrOpcode::kStackSlot: return VisitStackSlot(node); - case IrOpcode::kLoadStackPointer: - return VisitLoadStackPointer(node); + case IrOpcode::kStackPointerGreaterThan: + return VisitStackPointerGreaterThan(node); case IrOpcode::kLoadFramePointer: return VisitLoadFramePointer(node); case IrOpcode::kLoadParentFramePointer: @@ -1827,6 +1857,18 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitF64x2Abs(node); case IrOpcode::kF64x2Neg: return MarkAsSimd128(node), VisitF64x2Neg(node); + case IrOpcode::kF64x2Add: + return MarkAsSimd128(node), VisitF64x2Add(node); + case IrOpcode::kF64x2Sub: + return MarkAsSimd128(node), VisitF64x2Sub(node); + case IrOpcode::kF64x2Mul: + return MarkAsSimd128(node), VisitF64x2Mul(node); + case IrOpcode::kF64x2Div: + return MarkAsSimd128(node), VisitF64x2Div(node); + case IrOpcode::kF64x2Min: + return MarkAsSimd128(node), VisitF64x2Min(node); + case IrOpcode::kF64x2Max: + return MarkAsSimd128(node), VisitF64x2Max(node); case IrOpcode::kF64x2Eq: return MarkAsSimd128(node), VisitF64x2Eq(node); case IrOpcode::kF64x2Ne: @@ -1861,6 +1903,8 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitF32x4Sub(node); case IrOpcode::kF32x4Mul: return MarkAsSimd128(node), VisitF32x4Mul(node); + case IrOpcode::kF32x4Div: + return MarkAsSimd128(node), VisitF32x4Div(node); case IrOpcode::kF32x4Min: return MarkAsSimd128(node), VisitF32x4Min(node); case IrOpcode::kF32x4Max: @@ -1891,6 +1935,10 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitI64x2Sub(node); case IrOpcode::kI64x2Mul: return MarkAsSimd128(node), VisitI64x2Mul(node); + case IrOpcode::kI64x2MinS: + return MarkAsSimd128(node), VisitI64x2MinS(node); + case IrOpcode::kI64x2MaxS: + return MarkAsSimd128(node), VisitI64x2MaxS(node); case IrOpcode::kI64x2Eq: return MarkAsSimd128(node), VisitI64x2Eq(node); case IrOpcode::kI64x2Ne: @@ -1901,6 +1949,10 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitI64x2GeS(node); case IrOpcode::kI64x2ShrU: return MarkAsSimd128(node), VisitI64x2ShrU(node); + case IrOpcode::kI64x2MinU: + return MarkAsSimd128(node), VisitI64x2MinU(node); + case IrOpcode::kI64x2MaxU: + return MarkAsSimd128(node), VisitI64x2MaxU(node); case IrOpcode::kI64x2GtU: return MarkAsSimd128(node), VisitI64x2GtU(node); case IrOpcode::kI64x2GeU: @@ -2134,9 +2186,10 @@ void InstructionSelector::VisitTaggedPoisonOnSpeculation(Node* node) { EmitWordPoisonOnSpeculation(node); } -void InstructionSelector::VisitLoadStackPointer(Node* node) { - OperandGenerator g(this); - Emit(kArchStackPointer, g.DefineAsRegister(node)); +void InstructionSelector::VisitStackPointerGreaterThan(Node* node) { + FlagsContinuation cont = + FlagsContinuation::ForSet(kStackPointerGreaterThanCondition, node); + VisitStackPointerGreaterThan(node, &cont); } void InstructionSelector::VisitLoadFramePointer(Node* node) { @@ -2553,11 +2606,18 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) { // !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_S390 #if !V8_TARGET_ARCH_X64 +#if !V8_TARGET_ARCH_ARM64 void InstructionSelector::VisitF64x2Splat(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2ExtractLane(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); } @@ -2566,20 +2626,25 @@ void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2ExtractLane(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2Neg(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitS1x2AnyTrue(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitS1x2AllTrue(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2Shl(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2ShrS(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2Sub(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2Ne(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitI64x2ShrU(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2GtS(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2GeS(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI64x2ShrU(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2GtU(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2GeU(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitS1x2AnyTrue(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitS1x2AllTrue(Node* node) { UNIMPLEMENTED(); } +#endif // !V8_TARGET_ARCH_ARM64 +void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitI64x2MinS(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitI64x2MaxS(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitI64x2MinU(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitI64x2MaxU(Node* node) { UNIMPLEMENTED(); } #endif // !V8_TARGET_ARCH_X64 void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } @@ -2677,6 +2742,12 @@ void InstructionSelector::VisitCall(Node* node, BasicBlock* handler) { OperandGenerator g(this); auto call_descriptor = CallDescriptorOf(node->op()); + if (call_descriptor->NeedsCallerSavedRegisters()) { + Emit(kArchSaveCallerRegisters | MiscField::encode(static_cast<int>( + call_descriptor->get_save_fp_mode())), + g.NoOutput()); + } + FrameStateDescriptor* frame_state_descriptor = nullptr; if (call_descriptor->NeedsFrameState()) { frame_state_descriptor = GetFrameStateDescriptor( @@ -2745,18 +2816,13 @@ void InstructionSelector::VisitCall(Node* node, BasicBlock* handler) { call_instr->MarkAsCall(); EmitPrepareResults(&(buffer.output_nodes), call_descriptor, node); -} -void InstructionSelector::VisitCallWithCallerSavedRegisters( - Node* node, BasicBlock* handler) { - OperandGenerator g(this); - const auto fp_mode = CallDescriptorOf(node->op())->get_save_fp_mode(); - Emit(kArchSaveCallerRegisters | MiscField::encode(static_cast<int>(fp_mode)), - g.NoOutput()); - VisitCall(node, handler); - Emit(kArchRestoreCallerRegisters | - MiscField::encode(static_cast<int>(fp_mode)), - g.NoOutput()); + if (call_descriptor->NeedsCallerSavedRegisters()) { + Emit(kArchRestoreCallerRegisters | + MiscField::encode( + static_cast<int>(call_descriptor->get_save_fp_mode())), + g.NoOutput()); + } } void InstructionSelector::VisitTailCall(Node* node) { @@ -2764,7 +2830,7 @@ void InstructionSelector::VisitTailCall(Node* node) { auto call_descriptor = CallDescriptorOf(node->op()); CallDescriptor* caller = linkage()->GetIncomingDescriptor(); - DCHECK(caller->CanTailCall(node)); + DCHECK(caller->CanTailCall(CallDescriptorOf(node->op()))); const CallDescriptor* callee = CallDescriptorOf(node->op()); int stack_param_delta = callee->GetStackParameterDelta(caller); CallBuffer buffer(zone(), call_descriptor, nullptr); @@ -2912,14 +2978,13 @@ void InstructionSelector::VisitTrapUnless(Node* node, TrapId trap_id) { } void InstructionSelector::EmitIdentity(Node* node) { - OperandGenerator g(this); MarkAsUsed(node->InputAt(0)); SetRename(node, node->InputAt(0)); } void InstructionSelector::VisitDeoptimize(DeoptimizeKind kind, DeoptimizeReason reason, - VectorSlotPair const& feedback, + FeedbackSource const& feedback, Node* value) { EmitDeoptimize(kArchDeoptimize, 0, nullptr, 0, nullptr, kind, reason, feedback, value); @@ -2980,8 +3045,9 @@ bool InstructionSelector::CanProduceSignalingNaN(Node* node) { return true; } -FrameStateDescriptor* InstructionSelector::GetFrameStateDescriptor( - Node* state) { +namespace { + +FrameStateDescriptor* GetFrameStateDescriptorInternal(Zone* zone, Node* state) { DCHECK_EQ(IrOpcode::kFrameState, state->opcode()); DCHECK_EQ(kFrameStateInputCount, state->InputCount()); FrameStateInfo state_info = FrameStateInfoOf(state->op()); @@ -2999,13 +3065,24 @@ FrameStateDescriptor* InstructionSelector::GetFrameStateDescriptor( FrameStateDescriptor* outer_state = nullptr; Node* outer_node = state->InputAt(kFrameStateOuterStateInput); if (outer_node->opcode() == IrOpcode::kFrameState) { - outer_state = GetFrameStateDescriptor(outer_node); + outer_state = GetFrameStateDescriptorInternal(zone, outer_node); } - return new (instruction_zone()) FrameStateDescriptor( - instruction_zone(), state_info.type(), state_info.bailout_id(), - state_info.state_combine(), parameters, locals, stack, - state_info.shared_info(), outer_state); + return new (zone) + FrameStateDescriptor(zone, state_info.type(), state_info.bailout_id(), + state_info.state_combine(), parameters, locals, + stack, state_info.shared_info(), outer_state); +} + +} // namespace + +FrameStateDescriptor* InstructionSelector::GetFrameStateDescriptor( + Node* state) { + auto* desc = GetFrameStateDescriptorInternal(instruction_zone(), state); + *max_unoptimized_frame_height_ = + std::max(*max_unoptimized_frame_height_, + desc->total_conservative_frame_size_in_bytes()); + return desc; } // static diff --git a/deps/v8/src/compiler/backend/instruction-selector.h b/deps/v8/src/compiler/backend/instruction-selector.h index 16f88bb516..eb3e098427 100644 --- a/deps/v8/src/compiler/backend/instruction-selector.h +++ b/deps/v8/src/compiler/backend/instruction-selector.h @@ -12,6 +12,7 @@ #include "src/compiler/backend/instruction-scheduler.h" #include "src/compiler/backend/instruction.h" #include "src/compiler/common-operator.h" +#include "src/compiler/feedback-source.h" #include "src/compiler/linkage.h" #include "src/compiler/machine-operator.h" #include "src/compiler/node.h" @@ -60,7 +61,7 @@ class FlagsContinuation final { static FlagsContinuation ForDeoptimize(FlagsCondition condition, DeoptimizeKind kind, DeoptimizeReason reason, - VectorSlotPair const& feedback, + FeedbackSource const& feedback, Node* frame_state) { return FlagsContinuation(kFlags_deoptimize, condition, kind, reason, feedback, frame_state); @@ -69,7 +70,7 @@ class FlagsContinuation final { // Creates a new flags continuation for an eager deoptimization exit. static FlagsContinuation ForDeoptimizeAndPoison( FlagsCondition condition, DeoptimizeKind kind, DeoptimizeReason reason, - VectorSlotPair const& feedback, Node* frame_state) { + FeedbackSource const& feedback, Node* frame_state) { return FlagsContinuation(kFlags_deoptimize_and_poison, condition, kind, reason, feedback, frame_state); } @@ -110,7 +111,7 @@ class FlagsContinuation final { DCHECK(IsDeoptimize()); return reason_; } - VectorSlotPair const& feedback() const { + FeedbackSource const& feedback() const { DCHECK(IsDeoptimize()); return feedback_; } @@ -196,7 +197,7 @@ class FlagsContinuation final { FlagsContinuation(FlagsMode mode, FlagsCondition condition, DeoptimizeKind kind, DeoptimizeReason reason, - VectorSlotPair const& feedback, Node* frame_state) + FeedbackSource const& feedback, Node* frame_state) : mode_(mode), condition_(condition), kind_(kind), @@ -226,7 +227,7 @@ class FlagsContinuation final { FlagsCondition condition_; DeoptimizeKind kind_; // Only valid if mode_ == kFlags_deoptimize* DeoptimizeReason reason_; // Only valid if mode_ == kFlags_deoptimize* - VectorSlotPair feedback_; // Only valid if mode_ == kFlags_deoptimize* + FeedbackSource feedback_; // Only valid if mode_ == kFlags_deoptimize* Node* frame_state_or_result_; // Only valid if mode_ == kFlags_deoptimize* // or mode_ == kFlags_set. BasicBlock* true_block_; // Only valid if mode_ == kFlags_branch*. @@ -270,6 +271,7 @@ class V8_EXPORT_PRIVATE InstructionSelector final { InstructionSequence* sequence, Schedule* schedule, SourcePositionTable* source_positions, Frame* frame, EnableSwitchJumpTable enable_switch_jump_table, TickCounter* tick_counter, + size_t* max_unoptimized_frame_height, SourcePositionMode source_position_mode = kCallSourcePositions, Features features = SupportedFeatures(), EnableScheduling enable_scheduling = FLAG_turbo_instruction_scheduling @@ -352,7 +354,7 @@ class V8_EXPORT_PRIVATE InstructionSelector final { InstructionOperand* outputs, size_t input_count, InstructionOperand* inputs, DeoptimizeKind kind, DeoptimizeReason reason, - VectorSlotPair const& feedback, + FeedbackSource const& feedback, Node* frame_state); // =========================================================================== @@ -446,7 +448,8 @@ class V8_EXPORT_PRIVATE InstructionSelector final { // Check if we can generate loads and stores of ExternalConstants relative // to the roots register. - bool CanAddressRelativeToRootsRegister() const; + bool CanAddressRelativeToRootsRegister( + const ExternalReference& reference) const; // Check if we can use the roots register to access GC roots. bool CanUseRootsRegister() const; @@ -496,7 +499,7 @@ class V8_EXPORT_PRIVATE InstructionSelector final { void AppendDeoptimizeArguments(InstructionOperandVector* args, DeoptimizeKind kind, DeoptimizeReason reason, - VectorSlotPair const& feedback, + FeedbackSource const& feedback, Node* frame_state); void EmitTableSwitch( @@ -543,7 +546,7 @@ class V8_EXPORT_PRIVATE InstructionSelector final { void MarkAsSimd128(Node* node) { MarkAsRepresentation(MachineRepresentation::kSimd128, node); } - void MarkAsReference(Node* node) { + void MarkAsTagged(Node* node) { MarkAsRepresentation(MachineRepresentation::kTagged, node); } void MarkAsCompressed(Node* node) { @@ -621,8 +624,6 @@ class V8_EXPORT_PRIVATE InstructionSelector final { void VisitProjection(Node* node); void VisitConstant(Node* node); void VisitCall(Node* call, BasicBlock* handler = nullptr); - void VisitCallWithCallerSavedRegisters(Node* call, - BasicBlock* handler = nullptr); void VisitDeoptimizeIf(Node* node); void VisitDeoptimizeUnless(Node* node); void VisitTrapIf(Node* node, TrapId trap_id); @@ -632,7 +633,7 @@ class V8_EXPORT_PRIVATE InstructionSelector final { void VisitBranch(Node* input, BasicBlock* tbranch, BasicBlock* fbranch); void VisitSwitch(Node* node, const SwitchInfo& sw); void VisitDeoptimize(DeoptimizeKind kind, DeoptimizeReason reason, - VectorSlotPair const& feedback, Node* value); + FeedbackSource const& feedback, Node* value); void VisitReturn(Node* ret); void VisitThrow(Node* node); void VisitRetain(Node* node); @@ -640,6 +641,8 @@ class V8_EXPORT_PRIVATE InstructionSelector final { void VisitStaticAssert(Node* node); void VisitDeadValue(Node* node); + void VisitStackPointerGreaterThan(Node* node, FlagsContinuation* cont); + void VisitWordCompareZero(Node* user, Node* value, FlagsContinuation* cont); void EmitWordPoisonOnSpeculation(Node* node); @@ -782,6 +785,10 @@ class V8_EXPORT_PRIVATE InstructionSelector final { ZoneVector<std::pair<int, int>> instr_origins_; EnableTraceTurboJson trace_turbo_; TickCounter* const tick_counter_; + + // Store the maximal unoptimized frame height. Later used to apply an offset + // to stack checks. + size_t* max_unoptimized_frame_height_; }; } // namespace compiler diff --git a/deps/v8/src/compiler/backend/instruction.cc b/deps/v8/src/compiler/backend/instruction.cc index 09c7fe22c5..06158b0c72 100644 --- a/deps/v8/src/compiler/backend/instruction.cc +++ b/deps/v8/src/compiler/backend/instruction.cc @@ -6,12 +6,14 @@ #include <iomanip> +#include "src/codegen/interface-descriptors.h" #include "src/codegen/register-configuration.h" #include "src/codegen/source-position.h" #include "src/compiler/common-operator.h" #include "src/compiler/graph.h" #include "src/compiler/schedule.h" #include "src/compiler/state-values-utils.h" +#include "src/execution/frames.h" namespace v8 { namespace internal { @@ -942,7 +944,7 @@ void InstructionSequence::MarkAsRepresentation(MachineRepresentation rep, int InstructionSequence::AddDeoptimizationEntry( FrameStateDescriptor* descriptor, DeoptimizeKind kind, - DeoptimizeReason reason, VectorSlotPair const& feedback) { + DeoptimizeReason reason, FeedbackSource const& feedback) { int deoptimization_id = static_cast<int>(deoptimization_entries_.size()); deoptimization_entries_.push_back( DeoptimizationEntry(descriptor, kind, reason, feedback)); @@ -1002,6 +1004,59 @@ void InstructionSequence::SetRegisterConfigurationForTesting( GetRegConfig = InstructionSequence::RegisterConfigurationForTesting; } +namespace { + +size_t GetConservativeFrameSizeInBytes(FrameStateType type, + size_t parameters_count, + size_t locals_count, + BailoutId bailout_id) { + switch (type) { + case FrameStateType::kInterpretedFunction: { + auto info = InterpretedFrameInfo::Conservative( + static_cast<int>(parameters_count), static_cast<int>(locals_count)); + return info.frame_size_in_bytes(); + } + case FrameStateType::kArgumentsAdaptor: { + auto info = ArgumentsAdaptorFrameInfo::Conservative( + static_cast<int>(parameters_count)); + return info.frame_size_in_bytes(); + } + case FrameStateType::kConstructStub: { + auto info = ConstructStubFrameInfo::Conservative( + static_cast<int>(parameters_count)); + return info.frame_size_in_bytes(); + } + case FrameStateType::kBuiltinContinuation: + case FrameStateType::kJavaScriptBuiltinContinuation: + case FrameStateType::kJavaScriptBuiltinContinuationWithCatch: { + const RegisterConfiguration* config = RegisterConfiguration::Default(); + auto info = BuiltinContinuationFrameInfo::Conservative( + static_cast<int>(parameters_count), + Builtins::CallInterfaceDescriptorFor( + Builtins::GetBuiltinFromBailoutId(bailout_id)), + config); + return info.frame_size_in_bytes(); + } + } + UNREACHABLE(); +} + +size_t GetTotalConservativeFrameSizeInBytes(FrameStateType type, + size_t parameters_count, + size_t locals_count, + BailoutId bailout_id, + FrameStateDescriptor* outer_state) { + size_t outer_total_conservative_frame_size_in_bytes = + (outer_state == nullptr) + ? 0 + : outer_state->total_conservative_frame_size_in_bytes(); + return GetConservativeFrameSizeInBytes(type, parameters_count, locals_count, + bailout_id) + + outer_total_conservative_frame_size_in_bytes; +} + +} // namespace + FrameStateDescriptor::FrameStateDescriptor( Zone* zone, FrameStateType type, BailoutId bailout_id, OutputFrameStateCombine state_combine, size_t parameters_count, @@ -1014,10 +1069,35 @@ FrameStateDescriptor::FrameStateDescriptor( parameters_count_(parameters_count), locals_count_(locals_count), stack_count_(stack_count), + total_conservative_frame_size_in_bytes_( + GetTotalConservativeFrameSizeInBytes( + type, parameters_count, locals_count, bailout_id, outer_state)), values_(zone), shared_info_(shared_info), outer_state_(outer_state) {} +size_t FrameStateDescriptor::GetHeight() const { + switch (type()) { + case FrameStateType::kInterpretedFunction: + return locals_count(); // The accumulator is *not* included. + case FrameStateType::kBuiltinContinuation: + // Custom, non-JS calling convention (that does not have a notion of + // a receiver or context). + return parameters_count(); + case FrameStateType::kArgumentsAdaptor: + case FrameStateType::kConstructStub: + case FrameStateType::kJavaScriptBuiltinContinuation: + case FrameStateType::kJavaScriptBuiltinContinuationWithCatch: + // JS linkage. The parameters count + // - includes the receiver (input 1 in CreateArtificialFrameState, and + // passed as part of stack parameters to + // CreateJavaScriptBuiltinContinuationFrameState), and + // - does *not* include the context. + return parameters_count(); + } + UNREACHABLE(); +} + size_t FrameStateDescriptor::GetSize() const { return 1 + parameters_count() + locals_count() + stack_count() + (HasContext() ? 1 : 0); diff --git a/deps/v8/src/compiler/backend/instruction.h b/deps/v8/src/compiler/backend/instruction.h index 9b32204055..f5f7f64c51 100644 --- a/deps/v8/src/compiler/backend/instruction.h +++ b/deps/v8/src/compiler/backend/instruction.h @@ -17,6 +17,7 @@ #include "src/common/globals.h" #include "src/compiler/backend/instruction-codes.h" #include "src/compiler/common-operator.h" +#include "src/compiler/feedback-source.h" #include "src/compiler/frame.h" #include "src/compiler/opcodes.h" #include "src/numbers/double.h" @@ -130,7 +131,7 @@ class V8_EXPORT_PRIVATE InstructionOperand { inline uint64_t GetCanonicalizedValue() const; - class KindField : public BitField64<Kind, 0, 3> {}; + using KindField = BitField64<Kind, 0, 3>; uint64_t value_; }; @@ -331,20 +332,20 @@ class UnallocatedOperand final : public InstructionOperand { STATIC_ASSERT(KindField::kSize == 3); - class VirtualRegisterField : public BitField64<uint32_t, 3, 32> {}; + using VirtualRegisterField = BitField64<uint32_t, 3, 32>; // BitFields for all unallocated operands. - class BasicPolicyField : public BitField64<BasicPolicy, 35, 1> {}; + using BasicPolicyField = BitField64<BasicPolicy, 35, 1>; // BitFields specific to BasicPolicy::FIXED_SLOT. - class FixedSlotIndexField : public BitField64<int, 36, 28> {}; + using FixedSlotIndexField = BitField64<int, 36, 28>; // BitFields specific to BasicPolicy::EXTENDED_POLICY. - class ExtendedPolicyField : public BitField64<ExtendedPolicy, 36, 3> {}; - class LifetimeField : public BitField64<Lifetime, 39, 1> {}; - class HasSecondaryStorageField : public BitField64<bool, 40, 1> {}; - class FixedRegisterField : public BitField64<int, 41, 6> {}; - class SecondaryStorageField : public BitField64<int, 47, 3> {}; + using ExtendedPolicyField = BitField64<ExtendedPolicy, 36, 3>; + using LifetimeField = BitField64<Lifetime, 39, 1>; + using HasSecondaryStorageField = BitField64<bool, 40, 1>; + using FixedRegisterField = BitField64<int, 41, 6>; + using SecondaryStorageField = BitField64<int, 47, 3>; private: explicit UnallocatedOperand(int virtual_register) @@ -373,7 +374,7 @@ class ConstantOperand : public InstructionOperand { INSTRUCTION_OPERAND_CASTS(ConstantOperand, CONSTANT) STATIC_ASSERT(KindField::kSize == 3); - class VirtualRegisterField : public BitField64<uint32_t, 3, 32> {}; + using VirtualRegisterField = BitField64<uint32_t, 3, 32>; }; class ImmediateOperand : public InstructionOperand { @@ -406,8 +407,8 @@ class ImmediateOperand : public InstructionOperand { INSTRUCTION_OPERAND_CASTS(ImmediateOperand, IMMEDIATE) STATIC_ASSERT(KindField::kSize == 3); - class TypeField : public BitField64<ImmediateType, 3, 1> {}; - class ValueField : public BitField64<int32_t, 32, 32> {}; + using TypeField = BitField64<ImmediateType, 3, 1>; + using ValueField = BitField64<int32_t, 32, 32>; }; class LocationOperand : public InstructionOperand { @@ -509,9 +510,9 @@ class LocationOperand : public InstructionOperand { } STATIC_ASSERT(KindField::kSize == 3); - class LocationKindField : public BitField64<LocationKind, 3, 2> {}; - class RepresentationField : public BitField64<MachineRepresentation, 5, 8> {}; - class IndexField : public BitField64<int32_t, 35, 29> {}; + using LocationKindField = BitField64<LocationKind, 3, 2>; + using RepresentationField = BitField64<MachineRepresentation, 5, 8>; + using IndexField = BitField64<int32_t, 35, 29>; }; class V8_EXPORT_PRIVATE ExplicitOperand @@ -1270,6 +1271,20 @@ class FrameStateDescriptor : public ZoneObject { type_ == FrameStateType::kConstructStub; } + // The frame height on the stack, in number of slots, as serialized into a + // Translation and later used by the deoptimizer. Does *not* include + // information from the chain of outer states. Unlike |GetSize| this does not + // always include parameters, locals, and stack slots; instead, the returned + // slot kinds depend on the frame type. + size_t GetHeight() const; + + // Returns an overapproximation of the unoptimized stack frame size in bytes, + // as later produced by the deoptimizer. Considers both this and the chain of + // outer states. + size_t total_conservative_frame_size_in_bytes() const { + return total_conservative_frame_size_in_bytes_; + } + size_t GetSize() const; size_t GetTotalSize() const; size_t GetFrameCount() const; @@ -1283,12 +1298,13 @@ class FrameStateDescriptor : public ZoneObject { FrameStateType type_; BailoutId bailout_id_; OutputFrameStateCombine frame_state_combine_; - size_t parameters_count_; - size_t locals_count_; - size_t stack_count_; + const size_t parameters_count_; + const size_t locals_count_; + const size_t stack_count_; + const size_t total_conservative_frame_size_in_bytes_; StateValueList values_; MaybeHandle<SharedFunctionInfo> const shared_info_; - FrameStateDescriptor* outer_state_; + FrameStateDescriptor* const outer_state_; }; // A deoptimization entry is a pair of the reason why we deoptimize and the @@ -1297,7 +1313,7 @@ class DeoptimizationEntry final { public: DeoptimizationEntry() = default; DeoptimizationEntry(FrameStateDescriptor* descriptor, DeoptimizeKind kind, - DeoptimizeReason reason, VectorSlotPair const& feedback) + DeoptimizeReason reason, FeedbackSource const& feedback) : descriptor_(descriptor), kind_(kind), reason_(reason), @@ -1306,13 +1322,13 @@ class DeoptimizationEntry final { FrameStateDescriptor* descriptor() const { return descriptor_; } DeoptimizeKind kind() const { return kind_; } DeoptimizeReason reason() const { return reason_; } - VectorSlotPair const& feedback() const { return feedback_; } + FeedbackSource const& feedback() const { return feedback_; } private: FrameStateDescriptor* descriptor_ = nullptr; DeoptimizeKind kind_ = DeoptimizeKind::kEager; DeoptimizeReason reason_ = DeoptimizeReason::kUnknown; - VectorSlotPair feedback_ = VectorSlotPair(); + FeedbackSource feedback_ = FeedbackSource(); }; using DeoptimizationVector = ZoneVector<DeoptimizationEntry>; @@ -1577,7 +1593,7 @@ class V8_EXPORT_PRIVATE InstructionSequence final int AddDeoptimizationEntry(FrameStateDescriptor* descriptor, DeoptimizeKind kind, DeoptimizeReason reason, - VectorSlotPair const& feedback); + FeedbackSource const& feedback); DeoptimizationEntry const& GetDeoptimizationEntry(int deoptimization_id); int GetDeoptimizationEntryCount() const { return static_cast<int>(deoptimization_entries_.size()); diff --git a/deps/v8/src/compiler/backend/mips/code-generator-mips.cc b/deps/v8/src/compiler/backend/mips/code-generator-mips.cc index 5cec4a8a16..239075392a 100644 --- a/deps/v8/src/compiler/backend/mips/code-generator-mips.cc +++ b/deps/v8/src/compiler/backend/mips/code-generator-mips.cc @@ -850,18 +850,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // don't emit code for nops. break; case kArchDeoptimize: { - int deopt_state_id = + DeoptimizationExit* exit = BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); - CodeGenResult result = - AssembleDeoptimizerCall(deopt_state_id, current_source_position_); + CodeGenResult result = AssembleDeoptimizerCall(exit); if (result != kSuccess) return result; break; } case kArchRet: AssembleReturn(instr->InputAt(0)); break; - case kArchStackPointer: - __ mov(i.OutputRegister(), sp); + case kArchStackPointerGreaterThan: + // Pseudo-instruction used for cmp/branch. No opcode emitted here. break; case kArchFramePointer: __ mov(i.OutputRegister(), fp); @@ -2067,6 +2066,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1)); break; } + case kMipsF32x4Div: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + __ fdiv_w(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } case kMipsF32x4Max: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ fmax_w(i.OutputSimd128Register(), i.InputSimd128Register(0), @@ -3015,6 +3020,9 @@ void AssembleBranchToLabels(CodeGenerator* gen, TurboAssembler* tasm, } else if (instr->arch_opcode() == kMipsCmp) { cc = FlagsConditionToConditionCmp(condition); __ Branch(tlabel, cc, i.InputRegister(0), i.InputOperand(1)); + } else if (instr->arch_opcode() == kArchStackPointerGreaterThan) { + cc = FlagsConditionToConditionCmp(condition); + __ Branch(tlabel, cc, sp, Operand(i.InputRegister(0))); } else if (instr->arch_opcode() == kMipsCmpS || instr->arch_opcode() == kMipsCmpD) { bool predicate; @@ -3444,6 +3452,42 @@ void CodeGenerator::AssembleConstructFrame() { const RegList saves = call_descriptor->CalleeSavedRegisters(); const RegList saves_fpu = call_descriptor->CalleeSavedFPRegisters(); + + if (required_slots > 0) { + DCHECK(frame_access_state()->has_frame()); + if (info()->IsWasm() && required_slots > 128) { + // For WebAssembly functions with big frames we have to do the stack + // overflow check before we construct the frame. Otherwise we may not + // have enough space on the stack to call the runtime for the stack + // overflow. + Label done; + + // If the frame is bigger than the stack, we throw the stack overflow + // exception unconditionally. Thereby we can avoid the integer overflow + // check in the condition code. + if ((required_slots * kSystemPointerSize) < (FLAG_stack_size * 1024)) { + __ Lw( + kScratchReg, + FieldMemOperand(kWasmInstanceRegister, + WasmInstanceObject::kRealStackLimitAddressOffset)); + __ Lw(kScratchReg, MemOperand(kScratchReg)); + __ Addu(kScratchReg, kScratchReg, + Operand(required_slots * kSystemPointerSize)); + __ Branch(&done, uge, sp, Operand(kScratchReg)); + } + + __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL); + // We come from WebAssembly, there are no references for the GC. + ReferenceMap* reference_map = new (zone()) ReferenceMap(zone()); + RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt); + if (FLAG_debug_code) { + __ stop(); + } + + __ bind(&done); + } + } + const int returns = frame()->GetReturnSlotCount(); // Skip callee-saved and return slots, which are pushed below. @@ -3527,6 +3571,8 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) { void CodeGenerator::FinishCode() {} +void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) {} + void CodeGenerator::AssembleMove(InstructionOperand* source, InstructionOperand* destination) { MipsOperandConverter g(this, nullptr); diff --git a/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h b/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h index 44e53ac044..e8020d9e89 100644 --- a/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h +++ b/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h @@ -165,6 +165,7 @@ namespace compiler { V(MipsF32x4AddHoriz) \ V(MipsF32x4Sub) \ V(MipsF32x4Mul) \ + V(MipsF32x4Div) \ V(MipsF32x4Max) \ V(MipsF32x4Min) \ V(MipsF32x4Eq) \ diff --git a/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc b/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc index 92ab3f9344..4e6aef52f4 100644 --- a/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc +++ b/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc @@ -51,6 +51,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMipsF32x4Max: case kMipsF32x4Min: case kMipsF32x4Mul: + case kMipsF32x4Div: case kMipsF32x4Ne: case kMipsF32x4Neg: case kMipsF32x4RecipApprox: @@ -1673,7 +1674,6 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) { case kMipsCmp: return 0; case kArchDebugBreak: - case kArchStackPointer: case kArchFramePointer: case kArchParentFramePointer: case kMipsShl: diff --git a/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc b/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc index 452e92a174..bb47262c6c 100644 --- a/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc +++ b/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc @@ -352,7 +352,8 @@ void InstructionSelector::VisitStore(Node* node) { MachineRepresentation rep = store_rep.representation(); // TODO(mips): I guess this could be done in a better way. - if (write_barrier_kind != kNoWriteBarrier) { + if (write_barrier_kind != kNoWriteBarrier && + V8_LIKELY(!FLAG_disable_write_barriers)) { DCHECK(CanBeTaggedPointer(rep)); InstructionOperand inputs[3]; size_t input_count = 0; @@ -1529,6 +1530,15 @@ void VisitWordCompare(InstructionSelector* selector, Node* node, } // namespace +void InstructionSelector::VisitStackPointerGreaterThan( + Node* node, FlagsContinuation* cont) { + Node* const value = node->InputAt(0); + InstructionCode opcode = kArchStackPointerGreaterThan; + + MipsOperandGenerator g(this); + EmitWithContinuation(opcode, g.UseRegister(value), cont); +} + // Shared routine for word comparisons against zero. void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, FlagsContinuation* cont) { @@ -1607,6 +1617,9 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, break; case IrOpcode::kWord32And: return VisitWordCompare(this, value, kMipsTst, cont, true); + case IrOpcode::kStackPointerGreaterThan: + cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition); + return VisitStackPointerGreaterThan(value, cont); default: break; } @@ -2041,6 +2054,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(F32x4AddHoriz, kMipsF32x4AddHoriz) \ V(F32x4Sub, kMipsF32x4Sub) \ V(F32x4Mul, kMipsF32x4Mul) \ + V(F32x4Div, kMipsF32x4Div) \ V(F32x4Max, kMipsF32x4Max) \ V(F32x4Min, kMipsF32x4Min) \ V(F32x4Eq, kMipsF32x4Eq) \ diff --git a/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc b/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc index f746b52df6..5682bed71a 100644 --- a/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc +++ b/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc @@ -828,18 +828,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // don't emit code for nops. break; case kArchDeoptimize: { - int deopt_state_id = + DeoptimizationExit* exit = BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); - CodeGenResult result = - AssembleDeoptimizerCall(deopt_state_id, current_source_position_); + CodeGenResult result = AssembleDeoptimizerCall(exit); if (result != kSuccess) return result; break; } case kArchRet: AssembleReturn(instr->InputAt(0)); break; - case kArchStackPointer: - __ mov(i.OutputRegister(), sp); + case kArchStackPointerGreaterThan: + // Pseudo-instruction used for cmp/branch. No opcode emitted here. break; case kArchFramePointer: __ mov(i.OutputRegister(), fp); @@ -2182,6 +2181,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1)); break; } + case kMips64F32x4Div: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + __ fdiv_w(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } case kMips64F32x4Max: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ fmax_w(i.OutputSimd128Register(), i.InputSimd128Register(0), @@ -3140,6 +3145,9 @@ void AssembleBranchToLabels(CodeGenerator* gen, TurboAssembler* tasm, } else if (instr->arch_opcode() == kMips64Cmp) { cc = FlagsConditionToConditionCmp(condition); __ Branch(tlabel, cc, i.InputRegister(0), i.InputOperand(1)); + } else if (instr->arch_opcode() == kArchStackPointerGreaterThan) { + cc = FlagsConditionToConditionCmp(condition); + __ Branch(tlabel, cc, sp, Operand(i.InputRegister(0))); } else if (instr->arch_opcode() == kMips64CmpS || instr->arch_opcode() == kMips64CmpD) { bool predicate; @@ -3603,6 +3611,42 @@ void CodeGenerator::AssembleConstructFrame() { const RegList saves = call_descriptor->CalleeSavedRegisters(); const RegList saves_fpu = call_descriptor->CalleeSavedFPRegisters(); + + if (required_slots > 0) { + DCHECK(frame_access_state()->has_frame()); + if (info()->IsWasm() && required_slots > 128) { + // For WebAssembly functions with big frames we have to do the stack + // overflow check before we construct the frame. Otherwise we may not + // have enough space on the stack to call the runtime for the stack + // overflow. + Label done; + + // If the frame is bigger than the stack, we throw the stack overflow + // exception unconditionally. Thereby we can avoid the integer overflow + // check in the condition code. + if ((required_slots * kSystemPointerSize) < (FLAG_stack_size * 1024)) { + __ Ld( + kScratchReg, + FieldMemOperand(kWasmInstanceRegister, + WasmInstanceObject::kRealStackLimitAddressOffset)); + __ Ld(kScratchReg, MemOperand(kScratchReg)); + __ Daddu(kScratchReg, kScratchReg, + Operand(required_slots * kSystemPointerSize)); + __ Branch(&done, uge, sp, Operand(kScratchReg)); + } + + __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL); + // We come from WebAssembly, there are no references for the GC. + ReferenceMap* reference_map = new (zone()) ReferenceMap(zone()); + RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt); + if (FLAG_debug_code) { + __ stop(); + } + + __ bind(&done); + } + } + const int returns = frame()->GetReturnSlotCount(); // Skip callee-saved and return slots, which are pushed below. @@ -3686,6 +3730,8 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) { void CodeGenerator::FinishCode() {} +void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) {} + void CodeGenerator::AssembleMove(InstructionOperand* source, InstructionOperand* destination) { MipsOperandConverter g(this, nullptr); diff --git a/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h b/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h index e375ee8d07..edc8924757 100644 --- a/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h +++ b/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h @@ -195,6 +195,7 @@ namespace compiler { V(Mips64F32x4AddHoriz) \ V(Mips64F32x4Sub) \ V(Mips64F32x4Mul) \ + V(Mips64F32x4Div) \ V(Mips64F32x4Max) \ V(Mips64F32x4Min) \ V(Mips64F32x4Eq) \ diff --git a/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc b/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc index 4dcafe4197..880b424c41 100644 --- a/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc +++ b/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc @@ -79,6 +79,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMips64F32x4Max: case kMips64F32x4Min: case kMips64F32x4Mul: + case kMips64F32x4Div: case kMips64F32x4Ne: case kMips64F32x4Neg: case kMips64F32x4RecipApprox: @@ -1275,7 +1276,6 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) { return 0; case kArchRet: return AssemblerReturnLatency(); - case kArchStackPointer: case kArchFramePointer: return 1; case kArchParentFramePointer: diff --git a/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc b/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc index 95f11ebed1..9c717ab1e9 100644 --- a/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc +++ b/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc @@ -422,7 +422,8 @@ void InstructionSelector::VisitStore(Node* node) { MachineRepresentation rep = store_rep.representation(); // TODO(mips): I guess this could be done in a better way. - if (write_barrier_kind != kNoWriteBarrier) { + if (write_barrier_kind != kNoWriteBarrier && + V8_LIKELY(!FLAG_disable_write_barriers)) { DCHECK(CanBeTaggedPointer(rep)); InstructionOperand inputs[3]; size_t input_count = 0; @@ -2090,6 +2091,15 @@ void VisitAtomicBinop(InstructionSelector* selector, Node* node, } // namespace +void InstructionSelector::VisitStackPointerGreaterThan( + Node* node, FlagsContinuation* cont) { + Node* const value = node->InputAt(0); + InstructionCode opcode = kArchStackPointerGreaterThan; + + Mips64OperandGenerator g(this); + EmitWithContinuation(opcode, g.UseRegister(value), cont); +} + // Shared routine for word comparisons against zero. void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, FlagsContinuation* cont) { @@ -2199,6 +2209,9 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, case IrOpcode::kWord32And: case IrOpcode::kWord64And: return VisitWordCompare(this, value, kMips64Tst, cont, true); + case IrOpcode::kStackPointerGreaterThan: + cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition); + return VisitStackPointerGreaterThan(value, cont); default: break; } @@ -2704,6 +2717,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(F32x4AddHoriz, kMips64F32x4AddHoriz) \ V(F32x4Sub, kMips64F32x4Sub) \ V(F32x4Mul, kMips64F32x4Mul) \ + V(F32x4Div, kMips64F32x4Div) \ V(F32x4Max, kMips64F32x4Max) \ V(F32x4Min, kMips64F32x4Min) \ V(F32x4Eq, kMips64F32x4Eq) \ diff --git a/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc b/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc index 5289812cb5..5c69bc34a1 100644 --- a/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc +++ b/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc @@ -1024,13 +1024,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( Label start_call; bool isWasmCapiFunction = linkage()->GetIncomingDescriptor()->IsWasmCapiFunction(); - constexpr int offset = 12; + constexpr int offset = 9 * kInstrSize; if (isWasmCapiFunction) { - __ mflr(kScratchReg); + __ mflr(r0); __ bind(&start_call); - __ LoadPC(r0); - __ addi(r0, r0, Operand(offset)); - __ StoreP(r0, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset)); + __ LoadPC(kScratchReg); + __ addi(kScratchReg, kScratchReg, Operand(offset)); + __ StoreP(kScratchReg, + MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset)); __ mtlr(r0); } if (instr->InputAt(0)->IsImmediate()) { @@ -1040,11 +1041,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( Register func = i.InputRegister(0); __ CallCFunction(func, num_parameters); } - // TODO(miladfar): In the above block, r0 must be populated with the - // strictly-correct PC, which is the return address at this spot. The - // offset is set to 12 right now, which is counted from where we are - // binding to the label and ends at this spot. If failed, replace it it - // with the correct offset suggested. More info on f5ab7d3. + // TODO(miladfar): In the above block, kScratchReg must be populated with + // the strictly-correct PC, which is the return address at this spot. The + // offset is set to 36 (9 * kInstrSize) right now, which is counted from + // where we are binding to the label and ends at this spot. If failed, + // replace it with the correct offset suggested. More info on f5ab7d3. if (isWasmCapiFunction) CHECK_EQ(offset, __ SizeOfCodeGeneratedSince(&start_call)); @@ -1104,10 +1105,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( DCHECK_EQ(LeaveRC, i.OutputRCBit()); break; case kArchDeoptimize: { - int deopt_state_id = + DeoptimizationExit* exit = BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); - CodeGenResult result = - AssembleDeoptimizerCall(deopt_state_id, current_source_position_); + CodeGenResult result = AssembleDeoptimizerCall(exit); if (result != kSuccess) return result; break; } @@ -1115,10 +1115,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( AssembleReturn(instr->InputAt(0)); DCHECK_EQ(LeaveRC, i.OutputRCBit()); break; - case kArchStackPointer: - __ mr(i.OutputRegister(), sp); - DCHECK_EQ(LeaveRC, i.OutputRCBit()); - break; case kArchFramePointer: __ mr(i.OutputRegister(), fp); DCHECK_EQ(LeaveRC, i.OutputRCBit()); @@ -1130,6 +1126,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mr(i.OutputRegister(), fp); } break; + case kArchStackPointerGreaterThan: { + constexpr size_t kValueIndex = 0; + DCHECK(instr->InputAt(kValueIndex)->IsRegister()); + __ cmpl(sp, i.InputRegister(kValueIndex), cr0); + break; + } case kArchTruncateDoubleToI: __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(), i.InputDoubleRegister(0), DetermineStubCallMode()); @@ -2516,6 +2518,8 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) { void CodeGenerator::FinishCode() {} +void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) {} + void CodeGenerator::AssembleMove(InstructionOperand* source, InstructionOperand* destination) { PPCOperandConverter g(this, nullptr); diff --git a/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc b/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc index bfc77b9412..ef8490a726 100644 --- a/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc +++ b/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc @@ -65,17 +65,6 @@ class PPCOperandGenerator final : public OperandGenerator { } return false; } - - // Use the stack pointer if the node is LoadStackPointer, otherwise assign a - // register. - InstructionOperand UseRegisterOrStackPointer(Node* node) { - if (node->opcode() == IrOpcode::kLoadStackPointer) { - return LocationOperand(LocationOperand::EXPLICIT, - LocationOperand::REGISTER, - MachineRepresentation::kWord32, sp.code()); - } - return UseRegister(node); - } }; namespace { @@ -267,7 +256,8 @@ void InstructionSelector::VisitStore(Node* node) { rep = store_rep.representation(); } - if (write_barrier_kind != kNoWriteBarrier) { + if (write_barrier_kind != kNoWriteBarrier && + V8_LIKELY(!FLAG_disable_write_barriers)) { DCHECK(CanBeTaggedPointer(rep)); AddressingMode addressing_mode; InstructionOperand inputs[3]; @@ -558,6 +548,15 @@ void InstructionSelector::VisitWord32Xor(Node* node) { } } +void InstructionSelector::VisitStackPointerGreaterThan( + Node* node, FlagsContinuation* cont) { + Node* const value = node->InputAt(0); + InstructionCode opcode = kArchStackPointerGreaterThan; + + PPCOperandGenerator g(this); + EmitWithContinuation(opcode, g.UseRegister(value), cont); +} + #if V8_TARGET_ARCH_PPC64 void InstructionSelector::VisitWord64Xor(Node* node) { PPCOperandGenerator g(this); @@ -1456,15 +1455,15 @@ void VisitWordCompare(InstructionSelector* selector, Node* node, // Match immediates on left or right side of comparison. if (g.CanBeImmediate(right, immediate_mode)) { - VisitCompare(selector, opcode, g.UseRegisterOrStackPointer(left), - g.UseImmediate(right), cont); + VisitCompare(selector, opcode, g.UseRegister(left), g.UseImmediate(right), + cont); } else if (g.CanBeImmediate(left, immediate_mode)) { if (!commutative) cont->Commute(); - VisitCompare(selector, opcode, g.UseRegisterOrStackPointer(right), - g.UseImmediate(left), cont); + VisitCompare(selector, opcode, g.UseRegister(right), g.UseImmediate(left), + cont); } else { - VisitCompare(selector, opcode, g.UseRegisterOrStackPointer(left), - g.UseRegisterOrStackPointer(right), cont); + VisitCompare(selector, opcode, g.UseRegister(left), g.UseRegister(right), + cont); } } @@ -1639,6 +1638,9 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, // case IrOpcode::kWord64Shr: // case IrOpcode::kWord64Ror: #endif + case IrOpcode::kStackPointerGreaterThan: + cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition); + return VisitStackPointerGreaterThan(value, cont); default: break; } @@ -2281,6 +2283,8 @@ void InstructionSelector::VisitF32x4Sub(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Mul(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF32x4Div(Node* node) { UNIMPLEMENTED(); } + void InstructionSelector::VisitF32x4Min(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Max(Node* node) { UNIMPLEMENTED(); } diff --git a/deps/v8/src/compiler/backend/register-allocator.cc b/deps/v8/src/compiler/backend/register-allocator.cc index 44701f8159..21eef0485c 100644 --- a/deps/v8/src/compiler/backend/register-allocator.cc +++ b/deps/v8/src/compiler/backend/register-allocator.cc @@ -2989,34 +2989,72 @@ LifetimePosition RegisterAllocator::FindOptimalSplitPos(LifetimePosition start, } LifetimePosition RegisterAllocator::FindOptimalSpillingPos( - LiveRange* range, LifetimePosition pos) { + LiveRange* range, LifetimePosition pos, SpillMode spill_mode, + LiveRange** begin_spill_out) { + *begin_spill_out = range; + // TODO(herhut): Be more clever here as long as we do not move pos out of + // deferred code. + if (spill_mode == SpillMode::kSpillDeferred) return pos; const InstructionBlock* block = GetInstructionBlock(code(), pos.Start()); const InstructionBlock* loop_header = block->IsLoopHeader() ? block : GetContainingLoop(code(), block); - if (loop_header == nullptr) return pos; - const UsePosition* prev_use = - range->PreviousUsePositionRegisterIsBeneficial(pos); - - while (loop_header != nullptr) { - // We are going to spill live range inside the loop. - // If possible try to move spilling position backwards to loop header. - // This will reduce number of memory moves on the back edge. - LifetimePosition loop_start = LifetimePosition::GapFromInstructionIndex( - loop_header->first_instruction_index()); - - if (range->Covers(loop_start)) { - if (prev_use == nullptr || prev_use->pos() < loop_start) { + if (data()->is_turbo_control_flow_aware_allocation()) { + while (loop_header != nullptr) { + // We are going to spill live range inside the loop. + // If possible try to move spilling position backwards to loop header. + // This will reduce number of memory moves on the back edge. + LifetimePosition loop_start = LifetimePosition::GapFromInstructionIndex( + loop_header->first_instruction_index()); + auto& loop_header_state = + data()->GetSpillState(loop_header->rpo_number()); + for (LiveRange* live_at_header : loop_header_state) { + if (live_at_header->TopLevel() != range->TopLevel() || + !live_at_header->Covers(loop_start) || live_at_header->spilled()) { + continue; + } + LiveRange* check_use = live_at_header; + for (; check_use != nullptr && check_use->Start() < pos; + check_use = check_use->next()) { + UsePosition* next_use = + check_use->NextUsePositionRegisterIsBeneficial(loop_start); + if (next_use != nullptr && next_use->pos() < pos) { + return pos; + } + } // No register beneficial use inside the loop before the pos. + *begin_spill_out = live_at_header; pos = loop_start; + break; } + + // Try hoisting out to an outer loop. + loop_header = GetContainingLoop(code(), loop_header); } + } else { + const UsePosition* prev_use = + range->PreviousUsePositionRegisterIsBeneficial(pos); + + while (loop_header != nullptr) { + // We are going to spill live range inside the loop. + // If possible try to move spilling position backwards to loop header + // inside the current range. This will reduce number of memory moves on + // the back edge. + LifetimePosition loop_start = LifetimePosition::GapFromInstructionIndex( + loop_header->first_instruction_index()); + + if (range->Covers(loop_start)) { + if (prev_use == nullptr || prev_use->pos() < loop_start) { + // No register beneficial use inside the loop before the pos. + pos = loop_start; + } + } - // Try hoisting out to an outer loop. - loop_header = GetContainingLoop(code(), loop_header); + // Try hoisting out to an outer loop. + loop_header = GetContainingLoop(code(), loop_header); + } } - return pos; } @@ -3064,6 +3102,28 @@ LinearScanAllocator::LinearScanAllocator(RegisterAllocationData* data, inactive_live_ranges().reserve(8); } +void LinearScanAllocator::MaybeSpillPreviousRanges(LiveRange* begin_range, + LifetimePosition begin_pos, + LiveRange* end_range) { + // Spill begin_range after begin_pos, then spill every live range of this + // virtual register until but excluding end_range. + DCHECK(begin_range->Covers(begin_pos)); + DCHECK_EQ(begin_range->TopLevel(), end_range->TopLevel()); + + if (begin_range != end_range) { + DCHECK_LE(begin_range->End(), end_range->Start()); + if (!begin_range->spilled()) { + SpillAfter(begin_range, begin_pos, SpillMode::kSpillAtDefinition); + } + for (LiveRange* range = begin_range->next(); range != end_range; + range = range->next()) { + if (!range->spilled()) { + range->Spill(); + } + } + } +} + void LinearScanAllocator::MaybeUndoPreviousSplit(LiveRange* range) { if (range->next() != nullptr && range->next()->ShouldRecombine()) { LiveRange* to_remove = range->next(); @@ -4407,11 +4467,10 @@ void LinearScanAllocator::SplitAndSpillIntersecting(LiveRange* current, } UsePosition* next_pos = range->NextRegisterPosition(current->Start()); - // TODO(herhut): Be more clever here as long as we do not move split_pos - // out of deferred code. - LifetimePosition spill_pos = spill_mode == SpillMode::kSpillDeferred - ? split_pos - : FindOptimalSpillingPos(range, split_pos); + LiveRange* begin_spill = nullptr; + LifetimePosition spill_pos = + FindOptimalSpillingPos(range, split_pos, spill_mode, &begin_spill); + MaybeSpillPreviousRanges(begin_spill, spill_pos, range); if (next_pos == nullptr) { SpillAfter(range, spill_pos, spill_mode); } else { diff --git a/deps/v8/src/compiler/backend/register-allocator.h b/deps/v8/src/compiler/backend/register-allocator.h index 55f8a8dd1f..bc7b09d147 100644 --- a/deps/v8/src/compiler/backend/register-allocator.h +++ b/deps/v8/src/compiler/backend/register-allocator.h @@ -1238,7 +1238,9 @@ class RegisterAllocator : public ZoneObject { // If we are trying to spill a range inside the loop try to // hoist spill position out to the point just before the loop. LifetimePosition FindOptimalSpillingPos(LiveRange* range, - LifetimePosition pos); + LifetimePosition pos, + SpillMode spill_mode, + LiveRange** begin_spill_out); const ZoneVector<TopLevelLiveRange*>& GetFixedRegisters() const; const char* RegisterName(int allocation_index) const; @@ -1292,6 +1294,9 @@ class LinearScanAllocator final : public RegisterAllocator { ZoneUnorderedSet<RangeWithRegister, RangeWithRegister::Hash, RangeWithRegister::Equals>; + void MaybeSpillPreviousRanges(LiveRange* begin_range, + LifetimePosition begin_pos, + LiveRange* end_range); void MaybeUndoPreviousSplit(LiveRange* range); void SpillNotLiveRanges( RangeWithRegisterSet& to_be_live, // NOLINT(runtime/references) diff --git a/deps/v8/src/compiler/backend/s390/code-generator-s390.cc b/deps/v8/src/compiler/backend/s390/code-generator-s390.cc index 6457b7c8b4..4c2d862fc4 100644 --- a/deps/v8/src/compiler/backend/s390/code-generator-s390.cc +++ b/deps/v8/src/compiler/backend/s390/code-generator-s390.cc @@ -1578,19 +1578,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // don't emit code for nops. break; case kArchDeoptimize: { - int deopt_state_id = + DeoptimizationExit* exit = BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); - CodeGenResult result = - AssembleDeoptimizerCall(deopt_state_id, current_source_position_); + CodeGenResult result = AssembleDeoptimizerCall(exit); if (result != kSuccess) return result; break; } case kArchRet: AssembleReturn(instr->InputAt(0)); break; - case kArchStackPointer: - __ LoadRR(i.OutputRegister(), sp); - break; case kArchFramePointer: __ LoadRR(i.OutputRegister(), fp); break; @@ -1601,6 +1597,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ LoadRR(i.OutputRegister(), fp); } break; + case kArchStackPointerGreaterThan: { + constexpr size_t kValueIndex = 0; + DCHECK(instr->InputAt(kValueIndex)->IsRegister()); + __ CmpLogicalP(sp, i.InputRegister(kValueIndex)); + break; + } case kArchTruncateDoubleToI: __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(), i.InputDoubleRegister(0), DetermineStubCallMode()); @@ -3193,6 +3195,8 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) { void CodeGenerator::FinishCode() {} +void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) {} + void CodeGenerator::AssembleMove(InstructionOperand* source, InstructionOperand* destination) { S390OperandConverter g(this, nullptr); diff --git a/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc b/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc index 99d3b0fa0f..7f3277fc68 100644 --- a/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc +++ b/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc @@ -243,17 +243,6 @@ class S390OperandGenerator final : public OperandGenerator { bool Is64BitOperand(Node* node) { return MachineRepresentation::kWord64 == GetRepresentation(node); } - - // Use the stack pointer if the node is LoadStackPointer, otherwise assign a - // register. - InstructionOperand UseRegisterOrStackPointer(Node* node) { - if (node->opcode() == IrOpcode::kLoadStackPointer) { - return LocationOperand(LocationOperand::EXPLICIT, - LocationOperand::REGISTER, - MachineRepresentation::kWord32, sp.code()); - } - return UseRegister(node); - } }; namespace { @@ -727,7 +716,8 @@ static void VisitGeneralStore( Node* base = node->InputAt(0); Node* offset = node->InputAt(1); Node* value = node->InputAt(2); - if (write_barrier_kind != kNoWriteBarrier) { + if (write_barrier_kind != kNoWriteBarrier && + V8_LIKELY(!FLAG_disable_write_barriers)) { DCHECK(CanBeTaggedPointer(rep)); AddressingMode addressing_mode; InstructionOperand inputs[3]; @@ -837,6 +827,15 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) { UNREACHABLE(); } // Architecture supports unaligned access, therefore VisitStore is used instead void InstructionSelector::VisitUnalignedStore(Node* node) { UNREACHABLE(); } +void InstructionSelector::VisitStackPointerGreaterThan( + Node* node, FlagsContinuation* cont) { + Node* const value = node->InputAt(0); + InstructionCode opcode = kArchStackPointerGreaterThan; + + S390OperandGenerator g(this); + EmitWithContinuation(opcode, g.UseRegister(value), cont); +} + #if 0 static inline bool IsContiguousMask32(uint32_t value, int* mb, int* me) { int mask_width = base::bits::CountPopulation(value); @@ -1681,7 +1680,7 @@ void VisitWordCompare(InstructionSelector* selector, Node* node, return VisitLoadAndTest(selector, load_and_test, node, left, cont, true); } - inputs[input_count++] = g.UseRegisterOrStackPointer(left); + inputs[input_count++] = g.UseRegister(left); if (g.CanBeMemoryOperand(opcode, node, right, effect_level)) { // generate memory operand AddressingMode addressing_mode = g.GetEffectiveAddressMemoryOperand( @@ -2008,6 +2007,9 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, // doesn't generate cc, so ignore break; #endif + case IrOpcode::kStackPointerGreaterThan: + cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition); + return VisitStackPointerGreaterThan(value, cont); default: break; } @@ -2689,6 +2691,8 @@ void InstructionSelector::VisitF32x4Sub(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Mul(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF32x4Div(Node* node) { UNIMPLEMENTED(); } + void InstructionSelector::VisitF32x4Min(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Max(Node* node) { UNIMPLEMENTED(); } diff --git a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc index a108edeff0..a4f82b153b 100644 --- a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc +++ b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc @@ -155,10 +155,18 @@ class X64OperandConverter : public InstructionOperandConverter { namespace { +bool HasAddressingMode(Instruction* instr) { + return instr->addressing_mode() != kMode_None; +} + bool HasImmediateInput(Instruction* instr, size_t index) { return instr->InputAt(index)->IsImmediate(); } +bool HasRegisterInput(Instruction* instr, size_t index) { + return instr->InputAt(index)->IsRegister(); +} + class OutOfLineLoadFloat32NaN final : public OutOfLineCode { public: OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result) @@ -210,6 +218,10 @@ class OutOfLineTruncateDoubleToI final : public OutOfLineCode { // Just encode the stub index. This will be patched when the code // is added to the native module and copied into wasm code space. __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL); + } else if (tasm()->options().inline_offheap_trampolines) { + // With embedded builtins we do not need the isolate here. This allows + // the call to be generated asynchronously. + __ CallBuiltin(Builtins::kDoubleToI); } else { __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET); } @@ -380,60 +392,60 @@ void EmitWordLoadPoisoningIfNeeded( } \ } while (false) -#define ASSEMBLE_BINOP(asm_instr) \ - do { \ - if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ - size_t index = 1; \ - Operand right = i.MemoryOperand(&index); \ - __ asm_instr(i.InputRegister(0), right); \ - } else { \ - if (HasImmediateInput(instr, 1)) { \ - if (instr->InputAt(0)->IsRegister()) { \ - __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ - } else { \ - __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ - } \ - } else { \ - if (instr->InputAt(1)->IsRegister()) { \ - __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ - } else { \ - __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ - } \ - } \ - } \ +#define ASSEMBLE_BINOP(asm_instr) \ + do { \ + if (HasAddressingMode(instr)) { \ + size_t index = 1; \ + Operand right = i.MemoryOperand(&index); \ + __ asm_instr(i.InputRegister(0), right); \ + } else { \ + if (HasImmediateInput(instr, 1)) { \ + if (HasRegisterInput(instr, 0)) { \ + __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ + } else { \ + __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ + } \ + } else { \ + if (HasRegisterInput(instr, 1)) { \ + __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ + } else { \ + __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ + } \ + } \ + } \ } while (false) -#define ASSEMBLE_COMPARE(asm_instr) \ - do { \ - if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ - size_t index = 0; \ - Operand left = i.MemoryOperand(&index); \ - if (HasImmediateInput(instr, index)) { \ - __ asm_instr(left, i.InputImmediate(index)); \ - } else { \ - __ asm_instr(left, i.InputRegister(index)); \ - } \ - } else { \ - if (HasImmediateInput(instr, 1)) { \ - if (instr->InputAt(0)->IsRegister()) { \ - __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ - } else { \ - __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ - } \ - } else { \ - if (instr->InputAt(1)->IsRegister()) { \ - __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ - } else { \ - __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ - } \ - } \ - } \ +#define ASSEMBLE_COMPARE(asm_instr) \ + do { \ + if (HasAddressingMode(instr)) { \ + size_t index = 0; \ + Operand left = i.MemoryOperand(&index); \ + if (HasImmediateInput(instr, index)) { \ + __ asm_instr(left, i.InputImmediate(index)); \ + } else { \ + __ asm_instr(left, i.InputRegister(index)); \ + } \ + } else { \ + if (HasImmediateInput(instr, 1)) { \ + if (HasRegisterInput(instr, 0)) { \ + __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ + } else { \ + __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ + } \ + } else { \ + if (HasRegisterInput(instr, 1)) { \ + __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ + } else { \ + __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ + } \ + } \ + } \ } while (false) #define ASSEMBLE_MULT(asm_instr) \ do { \ if (HasImmediateInput(instr, 1)) { \ - if (instr->InputAt(0)->IsRegister()) { \ + if (HasRegisterInput(instr, 0)) { \ __ asm_instr(i.OutputRegister(), i.InputRegister(0), \ i.InputImmediate(1)); \ } else { \ @@ -441,7 +453,7 @@ void EmitWordLoadPoisoningIfNeeded( i.InputImmediate(1)); \ } \ } else { \ - if (instr->InputAt(1)->IsRegister()) { \ + if (HasRegisterInput(instr, 1)) { \ __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \ } else { \ __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \ @@ -468,9 +480,9 @@ void EmitWordLoadPoisoningIfNeeded( #define ASSEMBLE_MOVX(asm_instr) \ do { \ - if (instr->addressing_mode() != kMode_None) { \ + if (HasAddressingMode(instr)) { \ __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \ - } else if (instr->InputAt(0)->IsRegister()) { \ + } else if (HasRegisterInput(instr, 0)) { \ __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \ } else { \ __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \ @@ -576,17 +588,18 @@ void EmitWordLoadPoisoningIfNeeded( __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \ } while (false) -#define ASSEMBLE_SIMD_ALL_TRUE(opcode) \ - do { \ - CpuFeatureScope sse_scope(tasm(), SSE4_1); \ - Register dst = i.OutputRegister(); \ - Register tmp = i.TempRegister(0); \ - __ movq(tmp, Immediate(1)); \ - __ xorq(dst, dst); \ - __ pxor(kScratchDoubleReg, kScratchDoubleReg); \ - __ opcode(kScratchDoubleReg, i.InputSimd128Register(0)); \ - __ ptest(kScratchDoubleReg, kScratchDoubleReg); \ - __ cmovq(zero, dst, tmp); \ +#define ASSEMBLE_SIMD_ALL_TRUE(opcode) \ + do { \ + CpuFeatureScope sse_scope(tasm(), SSE4_1); \ + Register dst = i.OutputRegister(); \ + Register tmp1 = i.TempRegister(0); \ + XMMRegister tmp2 = i.TempSimd128Register(1); \ + __ movq(tmp1, Immediate(1)); \ + __ xorq(dst, dst); \ + __ pxor(tmp2, tmp2); \ + __ opcode(tmp2, i.InputSimd128Register(0)); \ + __ ptest(tmp2, tmp2); \ + __ cmovq(zero, dst, tmp1); \ } while (false) void CodeGenerator::AssembleDeconstructFrame() { @@ -989,10 +1002,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // don't emit code for nops. break; case kArchDeoptimize: { - int deopt_state_id = + DeoptimizationExit* exit = BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); - CodeGenResult result = - AssembleDeoptimizerCall(deopt_state_id, current_source_position_); + CodeGenResult result = AssembleDeoptimizerCall(exit); if (result != kSuccess) return result; unwinding_info_writer_.MarkBlockWillExit(); break; @@ -1000,9 +1012,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArchRet: AssembleReturn(instr->InputAt(0)); break; - case kArchStackPointer: - __ movq(i.OutputRegister(), rsp); - break; case kArchFramePointer: __ movq(i.OutputRegister(), rbp); break; @@ -1013,6 +1022,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ movq(i.OutputRegister(), rbp); } break; + case kArchStackPointerGreaterThan: { + constexpr size_t kValueIndex = 0; + if (HasAddressingMode(instr)) { + __ cmpq(rsp, i.MemoryOperand(kValueIndex)); + } else { + __ cmpq(rsp, i.InputRegister(kValueIndex)); + } + break; + } case kArchTruncateDoubleToI: { auto result = i.OutputRegister(); auto input = i.InputDoubleRegister(0); @@ -1176,14 +1194,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ASSEMBLE_MULT(imulq); break; case kX64ImulHigh32: - if (instr->InputAt(1)->IsRegister()) { + if (HasRegisterInput(instr, 1)) { __ imull(i.InputRegister(1)); } else { __ imull(i.InputOperand(1)); } break; case kX64UmulHigh32: - if (instr->InputAt(1)->IsRegister()) { + if (HasRegisterInput(instr, 1)) { __ mull(i.InputRegister(1)); } else { __ mull(i.InputOperand(1)); @@ -1254,42 +1272,42 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ASSEMBLE_SHIFT(rorq, 6); break; case kX64Lzcnt: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Lzcntq(i.OutputRegister(), i.InputRegister(0)); } else { __ Lzcntq(i.OutputRegister(), i.InputOperand(0)); } break; case kX64Lzcnt32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Lzcntl(i.OutputRegister(), i.InputRegister(0)); } else { __ Lzcntl(i.OutputRegister(), i.InputOperand(0)); } break; case kX64Tzcnt: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Tzcntq(i.OutputRegister(), i.InputRegister(0)); } else { __ Tzcntq(i.OutputRegister(), i.InputOperand(0)); } break; case kX64Tzcnt32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Tzcntl(i.OutputRegister(), i.InputRegister(0)); } else { __ Tzcntl(i.OutputRegister(), i.InputOperand(0)); } break; case kX64Popcnt: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Popcntq(i.OutputRegister(), i.InputRegister(0)); } else { __ Popcntq(i.OutputRegister(), i.InputOperand(0)); } break; case kX64Popcnt32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Popcntl(i.OutputRegister(), i.InputRegister(0)); } else { __ Popcntl(i.OutputRegister(), i.InputOperand(0)); @@ -1321,16 +1339,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; case kSSEFloat32Abs: { // TODO(bmeurer): Use RIP relative 128-bit constants. - __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ Psrlq(kScratchDoubleReg, 33); - __ Andps(i.OutputDoubleRegister(), kScratchDoubleReg); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ Pcmpeqd(tmp, tmp); + __ Psrlq(tmp, 33); + __ Andps(i.OutputDoubleRegister(), tmp); break; } case kSSEFloat32Neg: { // TODO(bmeurer): Use RIP relative 128-bit constants. - __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ Psllq(kScratchDoubleReg, 31); - __ Xorps(i.OutputDoubleRegister(), kScratchDoubleReg); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ Pcmpeqd(tmp, tmp); + __ Psllq(tmp, 31); + __ Xorps(i.OutputDoubleRegister(), tmp); break; } case kSSEFloat32Sqrt: @@ -1532,17 +1552,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64F64x2Abs: case kSSEFloat64Abs: { // TODO(bmeurer): Use RIP relative 128-bit constants. - __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ Psrlq(kScratchDoubleReg, 1); - __ Andpd(i.OutputDoubleRegister(), kScratchDoubleReg); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ Pcmpeqd(tmp, tmp); + __ Psrlq(tmp, 1); + __ Andpd(i.OutputDoubleRegister(), tmp); break; } case kX64F64x2Neg: case kSSEFloat64Neg: { // TODO(bmeurer): Use RIP relative 128-bit constants. - __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ Psllq(kScratchDoubleReg, 63); - __ Xorpd(i.OutputDoubleRegister(), kScratchDoubleReg); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ Pcmpeqd(tmp, tmp); + __ Psllq(tmp, 63); + __ Xorpd(i.OutputDoubleRegister(), tmp); break; } case kSSEFloat64Sqrt: @@ -1659,56 +1681,56 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kSSEInt32ToFloat64: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kSSEInt32ToFloat32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kSSEInt64ToFloat32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kSSEInt64ToFloat64: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kSSEUint64ToFloat32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kSSEUint64ToFloat64: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kSSEUint32ToFloat64: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kSSEUint32ToFloat32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0)); @@ -1729,21 +1751,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; case kSSEFloat64InsertLowWord32: - if (instr->InputAt(1)->IsRegister()) { + if (HasRegisterInput(instr, 1)) { __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0); } else { __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0); } break; case kSSEFloat64InsertHighWord32: - if (instr->InputAt(1)->IsRegister()) { + if (HasRegisterInput(instr, 1)) { __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1); } else { __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1); } break; case kSSEFloat64LoadLowWord32: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Movd(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Movd(i.OutputDoubleRegister(), i.InputOperand(0)); @@ -1800,56 +1822,52 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kAVXFloat32Abs: { // TODO(bmeurer): Use RIP relative 128-bit constants. CpuFeatureScope avx_scope(tasm(), AVX); - __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ vpcmpeqd(tmp, tmp, tmp); + __ vpsrlq(tmp, tmp, 33); if (instr->InputAt(0)->IsFPRegister()) { - __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputDoubleRegister(0)); + __ vandps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0)); } else { - __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputOperand(0)); + __ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); } break; } case kAVXFloat32Neg: { // TODO(bmeurer): Use RIP relative 128-bit constants. CpuFeatureScope avx_scope(tasm(), AVX); - __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ vpcmpeqd(tmp, tmp, tmp); + __ vpsllq(tmp, tmp, 31); if (instr->InputAt(0)->IsFPRegister()) { - __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputDoubleRegister(0)); + __ vxorps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0)); } else { - __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputOperand(0)); + __ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); } break; } case kAVXFloat64Abs: { // TODO(bmeurer): Use RIP relative 128-bit constants. CpuFeatureScope avx_scope(tasm(), AVX); - __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ vpcmpeqd(tmp, tmp, tmp); + __ vpsrlq(tmp, tmp, 1); if (instr->InputAt(0)->IsFPRegister()) { - __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputDoubleRegister(0)); + __ vandpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0)); } else { - __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputOperand(0)); + __ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); } break; } case kAVXFloat64Neg: { // TODO(bmeurer): Use RIP relative 128-bit constants. CpuFeatureScope avx_scope(tasm(), AVX); - __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63); + XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); + __ vpcmpeqd(tmp, tmp, tmp); + __ vpsllq(tmp, tmp, 63); if (instr->InputAt(0)->IsFPRegister()) { - __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputDoubleRegister(0)); + __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0)); } else { - __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, - i.InputOperand(0)); + __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); } break; } @@ -1929,14 +1947,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64Movl: EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); if (instr->HasOutput()) { - if (instr->addressing_mode() == kMode_None) { - if (instr->InputAt(0)->IsRegister()) { + if (HasAddressingMode(instr)) { + __ movl(i.OutputRegister(), i.MemoryOperand()); + } else { + if (HasRegisterInput(instr, 0)) { __ movl(i.OutputRegister(), i.InputRegister(0)); } else { __ movl(i.OutputRegister(), i.InputOperand(0)); } - } else { - __ movl(i.OutputRegister(), i.MemoryOperand()); } __ AssertZeroExtended(i.OutputRegister()); } else { @@ -2002,12 +2020,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; } - case kX64CompressSigned: // Fall through. - case kX64CompressPointer: // Fall through. - case kX64CompressAny: { - ASSEMBLE_MOVX(movl); - break; - } case kX64Movq: EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); if (instr->HasOutput()) { @@ -2082,14 +2094,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; case kX64BitcastIF: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Movd(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Movss(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kX64BitcastLD: - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ Movq(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0)); @@ -2177,7 +2189,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ incl(i.OutputRegister()); break; case kX64Push: - if (AddressingModeField::decode(instr->opcode()) != kMode_None) { + if (HasAddressingMode(instr)) { size_t index = 0; Operand operand = i.MemoryOperand(&index); __ pushq(operand); @@ -2189,7 +2201,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( frame_access_state()->IncreaseSPDelta(1); unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), kSystemPointerSize); - } else if (instr->InputAt(0)->IsRegister()) { + } else if (HasRegisterInput(instr, 0)) { __ pushq(i.InputRegister(0)); frame_access_state()->IncreaseSPDelta(1); unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), @@ -2256,11 +2268,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64F64x2Splat: { + CpuFeatureScope sse_scope(tasm(), SSE3); XMMRegister dst = i.OutputSimd128Register(); if (instr->InputAt(0)->IsFPRegister()) { - __ pshufd(dst, i.InputDoubleRegister(0), 0x44); + __ movddup(dst, i.InputDoubleRegister(0)); } else { - __ pshufd(dst, i.InputOperand(0), 0x44); + __ movddup(dst, i.InputOperand(0)); } break; } @@ -2280,6 +2293,61 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ movq(i.OutputDoubleRegister(), kScratchRegister); break; } + case kX64F64x2Add: { + ASSEMBLE_SSE_BINOP(addpd); + break; + } + case kX64F64x2Sub: { + ASSEMBLE_SSE_BINOP(subpd); + break; + } + case kX64F64x2Mul: { + ASSEMBLE_SSE_BINOP(mulpd); + break; + } + case kX64F64x2Div: { + ASSEMBLE_SSE_BINOP(divpd); + break; + } + case kX64F64x2Min: { + XMMRegister src1 = i.InputSimd128Register(1), + dst = i.OutputSimd128Register(); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + // The minpd instruction doesn't propagate NaNs and +0's in its first + // operand. Perform minpd in both orders, merge the resuls, and adjust. + __ movapd(kScratchDoubleReg, src1); + __ minpd(kScratchDoubleReg, dst); + __ minpd(dst, src1); + // propagate -0's and NaNs, which may be non-canonical. + __ orpd(kScratchDoubleReg, dst); + // Canonicalize NaNs by quieting and clearing the payload. + __ cmppd(dst, kScratchDoubleReg, 3); + __ orpd(kScratchDoubleReg, dst); + __ psrlq(dst, 13); + __ andnpd(dst, kScratchDoubleReg); + break; + } + case kX64F64x2Max: { + XMMRegister src1 = i.InputSimd128Register(1), + dst = i.OutputSimd128Register(); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + // The maxpd instruction doesn't propagate NaNs and +0's in its first + // operand. Perform maxpd in both orders, merge the resuls, and adjust. + __ movapd(kScratchDoubleReg, src1); + __ maxpd(kScratchDoubleReg, dst); + __ maxpd(dst, src1); + // Find discrepancies. + __ xorpd(dst, kScratchDoubleReg); + // Propagate NaNs, which may be non-canonical. + __ orpd(kScratchDoubleReg, dst); + // Propagate sign discrepancy and (subtle) quiet NaNs. + __ subpd(kScratchDoubleReg, dst); + // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. + __ cmppd(dst, kScratchDoubleReg, 3); + __ psrlq(dst, 13); + __ andnpd(dst, kScratchDoubleReg); + break; + } case kX64F64x2Eq: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); @@ -2406,6 +2474,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } + case kX64F32x4Div: { + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); + __ divps(i.OutputSimd128Register(), i.InputSimd128Register(1)); + break; + } case kX64F32x4Min: { XMMRegister src1 = i.InputSimd128Register(1), dst = i.OutputSimd128Register(); @@ -2466,13 +2539,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I64x2Splat: { + CpuFeatureScope sse_scope(tasm(), SSE3); XMMRegister dst = i.OutputSimd128Register(); - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ movq(dst, i.InputRegister(0)); } else { __ movq(dst, i.InputOperand(0)); } - __ pshufd(dst, dst, 0x44); + __ movddup(dst, dst); break; } case kX64I64x2ExtractLane: { @@ -2482,7 +2556,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I64x2ReplaceLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - if (instr->InputAt(2)->IsRegister()) { + if (HasRegisterInput(instr, 2)) { __ pinsrq(i.OutputSimd128Register(), i.InputRegister(2), i.InputInt8(1)); } else { @@ -2502,7 +2576,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I64x2Shl: { - __ psllq(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ psllq(i.OutputSimd128Register(), tmp); break; } case kX64I64x2ShrS: { @@ -2511,16 +2587,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // ShrS on each quadword one at a time XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(0); + Register tmp = i.ToRegister(instr->TempAt(0)); // lower quadword - __ pextrq(kScratchRegister, src, 0x0); - __ sarq(kScratchRegister, Immediate(i.InputInt8(1))); - __ pinsrq(dst, kScratchRegister, 0x0); + __ pextrq(tmp, src, 0x0); + __ sarq_cl(tmp); + __ pinsrq(dst, tmp, 0x0); // upper quadword - __ pextrq(kScratchRegister, src, 0x1); - __ sarq(kScratchRegister, Immediate(i.InputInt8(1))); - __ pinsrq(dst, kScratchRegister, 0x1); + __ pextrq(tmp, src, 0x1); + __ sarq_cl(tmp); + __ pinsrq(dst, tmp, 0x1); break; } case kX64I64x2Add: { @@ -2538,8 +2615,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister left = i.InputSimd128Register(0); XMMRegister right = i.InputSimd128Register(1); - XMMRegister tmp1 = i.ToSimd128Register(instr->TempAt(0)); - XMMRegister tmp2 = i.ToSimd128Register(instr->TempAt(1)); + XMMRegister tmp1 = i.TempSimd128Register(0); + XMMRegister tmp2 = i.TempSimd128Register(1); __ movaps(tmp1, left); __ movaps(tmp2, right); @@ -2559,6 +2636,66 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ paddq(left, tmp2); // left == dst break; } + case kX64I64x2MinS: { + if (CpuFeatures::IsSupported(SSE4_2)) { + CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src = i.InputSimd128Register(1); + XMMRegister tmp = i.TempSimd128Register(0); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + DCHECK_EQ(src, xmm0); + + __ movaps(tmp, src); + __ pcmpgtq(src, dst); + __ blendvpd(tmp, dst); // implicit use of xmm0 as mask + __ movaps(dst, tmp); + } else { + CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src = i.InputSimd128Register(1); + XMMRegister tmp = i.TempSimd128Register(0); + Register tmp1 = i.TempRegister(1); + Register tmp2 = i.TempRegister(2); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + // backup src since we cannot change it + __ movaps(tmp, src); + + // compare the lower quardwords + __ movq(tmp1, dst); + __ movq(tmp2, tmp); + __ cmpq(tmp1, tmp2); + // tmp2 now has the min of lower quadwords + __ cmovq(less_equal, tmp2, tmp1); + // tmp1 now has the higher quadword + // must do this before movq, movq clears top quadword + __ pextrq(tmp1, dst, 1); + // save tmp2 into dst + __ movq(dst, tmp2); + // tmp2 now has the higher quadword + __ pextrq(tmp2, tmp, 1); + // compare higher quadwords + __ cmpq(tmp1, tmp2); + // tmp2 now has the min of higher quadwords + __ cmovq(less_equal, tmp2, tmp1); + __ movq(tmp, tmp2); + // dst = [tmp[0], dst[0]] + __ punpcklqdq(dst, tmp); + } + break; + } + case kX64I64x2MaxS: { + CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src = i.InputSimd128Register(1); + XMMRegister tmp = i.TempSimd128Register(0); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + DCHECK_EQ(src, xmm0); + + __ movaps(tmp, src); + __ pcmpgtq(src, dst); + __ blendvpd(dst, tmp); // implicit use of xmm0 as mask + break; + } case kX64I64x2Eq: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); @@ -2568,9 +2705,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64I64x2Ne: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); + XMMRegister tmp = i.TempSimd128Register(0); __ pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1)); - __ pcmpeqq(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); + __ pcmpeqq(tmp, tmp); + __ pxor(i.OutputSimd128Register(), tmp); break; } case kX64I64x2GtS: { @@ -2584,7 +2722,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSE4_2); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); - XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); + XMMRegister tmp = i.TempSimd128Register(0); __ movaps(tmp, src); __ pcmpgtq(tmp, dst); @@ -2593,7 +2731,56 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I64x2ShrU: { - __ psrlq(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ psrlq(i.OutputSimd128Register(), tmp); + break; + } + case kX64I64x2MinU: { + CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2); + CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src = i.InputSimd128Register(1); + XMMRegister src_tmp = i.TempSimd128Register(0); + XMMRegister dst_tmp = i.TempSimd128Register(1); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + DCHECK_EQ(src, xmm0); + + __ movaps(src_tmp, src); + __ movaps(dst_tmp, dst); + + __ pcmpeqd(src, src); + __ psllq(src, 63); + + __ pxor(dst_tmp, src); + __ pxor(src, src_tmp); + + __ pcmpgtq(src, dst_tmp); + __ blendvpd(src_tmp, dst); // implicit use of xmm0 as mask + __ movaps(dst, src_tmp); + break; + } + case kX64I64x2MaxU: { + CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2); + CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src = i.InputSimd128Register(1); + XMMRegister src_tmp = i.TempSimd128Register(0); + XMMRegister dst_tmp = i.TempSimd128Register(1); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + DCHECK_EQ(src, xmm0); + + __ movaps(src_tmp, src); + __ movaps(dst_tmp, dst); + + __ pcmpeqd(src, src); + __ psllq(src, 63); + + __ pxor(dst_tmp, src); + __ pxor(src, src_tmp); + + __ pcmpgtq(src, dst_tmp); + __ blendvpd(dst, src_tmp); // implicit use of xmm0 as mask break; } case kX64I64x2GtU: { @@ -2601,7 +2788,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSE4_2); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); - XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); + XMMRegister tmp = i.TempSimd128Register(0); __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psllq(kScratchDoubleReg, 63); @@ -2617,7 +2804,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSE4_2); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); - XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); + XMMRegister tmp = i.TempSimd128Register(0); __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psllq(kScratchDoubleReg, 63); @@ -2632,7 +2819,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I32x4Splat: { XMMRegister dst = i.OutputSimd128Register(); - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ movd(dst, i.InputRegister(0)); } else { __ movd(dst, i.InputOperand(0)); @@ -2647,7 +2834,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I32x4ReplaceLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - if (instr->InputAt(2)->IsRegister()) { + if (HasRegisterInput(instr, 2)) { __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2), i.InputInt8(1)); } else { @@ -2658,19 +2845,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64I32x4SConvertF32x4: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister dst = i.OutputSimd128Register(); + XMMRegister tmp = i.TempSimd128Register(0); // NAN->0 - __ movaps(kScratchDoubleReg, dst); - __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg); - __ pand(dst, kScratchDoubleReg); + __ movaps(tmp, dst); + __ cmpeqps(tmp, tmp); + __ pand(dst, tmp); // Set top bit if >= 0 (but not -0.0!) - __ pxor(kScratchDoubleReg, dst); + __ pxor(tmp, dst); // Convert __ cvttps2dq(dst, dst); // Set top bit if >=0 is now < 0 - __ pand(kScratchDoubleReg, dst); - __ psrad(kScratchDoubleReg, 31); + __ pand(tmp, dst); + __ psrad(tmp, 31); // Set positive overflow lanes to 0x7FFFFFFF - __ pxor(dst, kScratchDoubleReg); + __ pxor(dst, tmp); break; } case kX64I32x4SConvertI16x8Low: { @@ -2699,11 +2887,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I32x4Shl: { - __ pslld(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ pslld(i.OutputSimd128Register(), tmp); break; } case kX64I32x4ShrS: { - __ psrad(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ psrad(i.OutputSimd128Register(), tmp); break; } case kX64I32x4Add: { @@ -2739,9 +2931,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I32x4Ne: { + XMMRegister tmp = i.TempSimd128Register(0); __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); + __ pcmpeqd(tmp, tmp); + __ pxor(i.OutputSimd128Register(), tmp); break; } case kX64I32x4GtS: { @@ -2760,24 +2953,25 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); - XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); + XMMRegister tmp = i.TempSimd128Register(0); + XMMRegister tmp2 = i.TempSimd128Register(1); // NAN->0, negative->0 - __ pxor(kScratchDoubleReg, kScratchDoubleReg); - __ maxps(dst, kScratchDoubleReg); + __ pxor(tmp2, tmp2); + __ maxps(dst, tmp2); // scratch: float representation of max_signed - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ psrld(kScratchDoubleReg, 1); // 0x7fffffff - __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000 + __ pcmpeqd(tmp2, tmp2); + __ psrld(tmp2, 1); // 0x7fffffff + __ cvtdq2ps(tmp2, tmp2); // 0x4f000000 // tmp: convert (src-max_signed). // Positive overflow lanes -> 0x7FFFFFFF // Negative lanes -> 0 __ movaps(tmp, dst); - __ subps(tmp, kScratchDoubleReg); - __ cmpleps(kScratchDoubleReg, tmp); + __ subps(tmp, tmp2); + __ cmpleps(tmp2, tmp); __ cvttps2dq(tmp, tmp); - __ pxor(tmp, kScratchDoubleReg); - __ pxor(kScratchDoubleReg, kScratchDoubleReg); - __ pmaxsd(tmp, kScratchDoubleReg); + __ pxor(tmp, tmp2); + __ pxor(tmp2, tmp2); + __ pmaxsd(tmp, tmp2); // convert. Overflow lanes above max_signed will be 0x80000000 __ cvttps2dq(dst, dst); // Add (src-max_signed) for overflow lanes. @@ -2797,7 +2991,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I32x4ShrU: { - __ psrld(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ psrld(i.OutputSimd128Register(), tmp); break; } case kX64I32x4MinU: { @@ -2814,10 +3010,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); + XMMRegister tmp = i.TempSimd128Register(0); __ pmaxud(dst, src); __ pcmpeqd(dst, src); - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(dst, kScratchDoubleReg); + __ pcmpeqd(tmp, tmp); + __ pxor(dst, tmp); break; } case kX64I32x4GeU: { @@ -2835,7 +3032,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I16x8Splat: { XMMRegister dst = i.OutputSimd128Register(); - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ movd(dst, i.InputRegister(0)); } else { __ movd(dst, i.InputOperand(0)); @@ -2853,7 +3050,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I16x8ReplaceLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - if (instr->InputAt(2)->IsRegister()) { + if (HasRegisterInput(instr, 2)) { __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2), i.InputInt8(1)); } else { @@ -2887,11 +3084,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I16x8Shl: { - __ psllw(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ psllw(i.OutputSimd128Register(), tmp); break; } case kX64I16x8ShrS: { - __ psraw(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ psraw(i.OutputSimd128Register(), tmp); break; } case kX64I16x8SConvertI32x4: { @@ -2940,9 +3141,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I16x8Ne: { + XMMRegister tmp = i.TempSimd128Register(0); __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1)); - __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); + __ pcmpeqw(tmp, tmp); + __ pxor(i.OutputSimd128Register(), tmp); break; } case kX64I16x8GtS: { @@ -2970,7 +3172,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I16x8ShrU: { - __ psrlw(i.OutputSimd128Register(), i.InputInt8(1)); + XMMRegister tmp = i.TempSimd128Register(0); + __ movq(tmp, i.InputRegister(1)); + __ psrlw(i.OutputSimd128Register(), tmp); break; } case kX64I16x8UConvertI32x4: { @@ -3007,10 +3211,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); + XMMRegister tmp = i.TempSimd128Register(0); __ pmaxuw(dst, src); __ pcmpeqw(dst, src); - __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(dst, kScratchDoubleReg); + __ pcmpeqw(tmp, tmp); + __ pxor(dst, tmp); break; } case kX64I16x8GeU: { @@ -3024,7 +3229,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64I8x16Splat: { CpuFeatureScope sse_scope(tasm(), SSSE3); XMMRegister dst = i.OutputSimd128Register(); - if (instr->InputAt(0)->IsRegister()) { + if (HasRegisterInput(instr, 0)) { __ movd(dst, i.InputRegister(0)); } else { __ movd(dst, i.InputOperand(0)); @@ -3042,7 +3247,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I8x16ReplaceLane: { CpuFeatureScope sse_scope(tasm(), SSE4_1); - if (instr->InputAt(2)->IsRegister()) { + if (HasRegisterInput(instr, 2)) { __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2), i.InputInt8(1)); } else { @@ -3071,31 +3276,36 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64I8x16Shl: { XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); - int8_t shift = i.InputInt8(1) & 0x7; - if (shift < 4) { - // For small shifts, doubling is faster. - for (int i = 0; i < shift; ++i) { - __ paddb(dst, dst); - } - } else { - // Mask off the unwanted bits before word-shifting. - __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); - __ psrlw(kScratchDoubleReg, 8 + shift); - __ packuswb(kScratchDoubleReg, kScratchDoubleReg); - __ pand(dst, kScratchDoubleReg); - __ psllw(dst, shift); - } + // Temp registers for shift mask andadditional moves to XMM registers. + Register tmp = i.ToRegister(instr->TempAt(0)); + XMMRegister tmp_simd = i.TempSimd128Register(1); + // Mask off the unwanted bits before word-shifting. + __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); + __ movq(tmp, i.InputRegister(1)); + __ addq(tmp, Immediate(8)); + __ movq(tmp_simd, tmp); + __ psrlw(kScratchDoubleReg, tmp_simd); + __ packuswb(kScratchDoubleReg, kScratchDoubleReg); + __ pand(dst, kScratchDoubleReg); + __ movq(tmp_simd, i.InputRegister(1)); + __ psllw(dst, tmp_simd); break; } case kX64I8x16ShrS: { XMMRegister dst = i.OutputSimd128Register(); - XMMRegister src = i.InputSimd128Register(0); - int8_t shift = i.InputInt8(1) & 0x7; + DCHECK_EQ(dst, i.InputSimd128Register(0)); + // Temp registers for shift mask andadditional moves to XMM registers. + Register tmp = i.ToRegister(instr->TempAt(0)); + XMMRegister tmp_simd = i.TempSimd128Register(1); // Unpack the bytes into words, do arithmetic shifts, and repack. - __ punpckhbw(kScratchDoubleReg, src); - __ punpcklbw(dst, src); - __ psraw(kScratchDoubleReg, 8 + shift); - __ psraw(dst, 8 + shift); + __ punpckhbw(kScratchDoubleReg, dst); + __ punpcklbw(dst, dst); + // Prepare shift value + __ movq(tmp, i.InputRegister(1)); + __ addq(tmp, Immediate(8)); + __ movq(tmp_simd, tmp); + __ psraw(kScratchDoubleReg, tmp_simd); + __ psraw(dst, tmp_simd); __ packsswb(dst, kScratchDoubleReg); break; } @@ -3119,7 +3329,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); XMMRegister right = i.InputSimd128Register(1); - XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); + XMMRegister tmp = i.TempSimd128Register(0); // I16x8 view of I8x16 // left = AAaa AAaa ... AAaa AAaa // right= BBbb BBbb ... BBbb BBbb @@ -3163,9 +3373,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kX64I8x16Ne: { + XMMRegister tmp = i.TempSimd128Register(0); __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1)); - __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); + __ pcmpeqb(tmp, tmp); + __ pxor(i.OutputSimd128Register(), tmp); break; } case kX64I8x16GtS: { @@ -3194,13 +3405,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64I8x16ShrU: { XMMRegister dst = i.OutputSimd128Register(); - XMMRegister src = i.InputSimd128Register(0); - int8_t shift = i.InputInt8(1) & 0x7; // Unpack the bytes into words, do logical shifts, and repack. - __ punpckhbw(kScratchDoubleReg, src); - __ punpcklbw(dst, src); - __ psrlw(kScratchDoubleReg, 8 + shift); - __ psrlw(dst, 8 + shift); + DCHECK_EQ(dst, i.InputSimd128Register(0)); + // Temp registers for shift mask andadditional moves to XMM registers. + Register tmp = i.ToRegister(instr->TempAt(0)); + XMMRegister tmp_simd = i.TempSimd128Register(1); + __ punpckhbw(kScratchDoubleReg, dst); + __ punpcklbw(dst, dst); + // Prepare shift value + __ movq(tmp, i.InputRegister(1)); + __ addq(tmp, Immediate(8)); + __ movq(tmp_simd, tmp); + __ psrlw(kScratchDoubleReg, tmp_simd); + __ psrlw(dst, tmp_simd); __ packuswb(dst, kScratchDoubleReg); break; } @@ -3226,10 +3443,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(1); + XMMRegister tmp = i.TempSimd128Register(0); __ pmaxub(dst, src); __ pcmpeqb(dst, src); - __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(dst, kScratchDoubleReg); + __ pcmpeqb(tmp, tmp); + __ pxor(dst, tmp); break; } case kX64I8x16GeU: { @@ -3561,9 +3779,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ASSEMBLE_SIMD_ALL_TRUE(pcmpeqb); break; } - case kX64StackCheck: - __ CompareRoot(rsp, RootIndex::kStackLimit); - break; case kWord32AtomicExchangeInt8: { __ xchgb(i.InputRegister(0), i.MemoryOperand(1)); __ movsxbl(i.InputRegister(0), i.InputRegister(0)); @@ -4167,6 +4382,8 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) { void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); } +void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) {} + void CodeGenerator::AssembleMove(InstructionOperand* source, InstructionOperand* destination) { X64OperandConverter g(this, nullptr); diff --git a/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h b/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h index d6ac3f43df..8a0a45a916 100644 --- a/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h +++ b/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h @@ -140,9 +140,6 @@ namespace compiler { V(X64DecompressSigned) \ V(X64DecompressPointer) \ V(X64DecompressAny) \ - V(X64CompressSigned) \ - V(X64CompressPointer) \ - V(X64CompressAny) \ V(X64Movq) \ V(X64Movsd) \ V(X64Movss) \ @@ -158,12 +155,17 @@ namespace compiler { V(X64Push) \ V(X64Poke) \ V(X64Peek) \ - V(X64StackCheck) \ V(X64F64x2Splat) \ V(X64F64x2ExtractLane) \ V(X64F64x2ReplaceLane) \ V(X64F64x2Abs) \ V(X64F64x2Neg) \ + V(X64F64x2Add) \ + V(X64F64x2Sub) \ + V(X64F64x2Mul) \ + V(X64F64x2Div) \ + V(X64F64x2Min) \ + V(X64F64x2Max) \ V(X64F64x2Eq) \ V(X64F64x2Ne) \ V(X64F64x2Lt) \ @@ -181,6 +183,7 @@ namespace compiler { V(X64F32x4AddHoriz) \ V(X64F32x4Sub) \ V(X64F32x4Mul) \ + V(X64F32x4Div) \ V(X64F32x4Min) \ V(X64F32x4Max) \ V(X64F32x4Eq) \ @@ -196,11 +199,15 @@ namespace compiler { V(X64I64x2Add) \ V(X64I64x2Sub) \ V(X64I64x2Mul) \ + V(X64I64x2MinS) \ + V(X64I64x2MaxS) \ V(X64I64x2Eq) \ V(X64I64x2Ne) \ V(X64I64x2GtS) \ V(X64I64x2GeS) \ V(X64I64x2ShrU) \ + V(X64I64x2MinU) \ + V(X64I64x2MaxU) \ V(X64I64x2GtU) \ V(X64I64x2GeU) \ V(X64I32x4Splat) \ diff --git a/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc b/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc index 6389ef2e50..e9fa450c38 100644 --- a/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc +++ b/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc @@ -129,6 +129,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64F64x2ReplaceLane: case kX64F64x2Abs: case kX64F64x2Neg: + case kX64F64x2Add: + case kX64F64x2Sub: + case kX64F64x2Mul: + case kX64F64x2Div: + case kX64F64x2Min: + case kX64F64x2Max: case kX64F64x2Eq: case kX64F64x2Ne: case kX64F64x2Lt: @@ -146,6 +152,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64F32x4AddHoriz: case kX64F32x4Sub: case kX64F32x4Mul: + case kX64F32x4Div: case kX64F32x4Min: case kX64F32x4Max: case kX64F32x4Eq: @@ -161,11 +168,15 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64I64x2Add: case kX64I64x2Sub: case kX64I64x2Mul: + case kX64I64x2MinS: + case kX64I64x2MaxS: case kX64I64x2Eq: case kX64I64x2Ne: case kX64I64x2GtS: case kX64I64x2GeS: case kX64I64x2ShrU: + case kX64I64x2MinU: + case kX64I64x2MaxU: case kX64I64x2GtU: case kX64I64x2GeU: case kX64I32x4Splat: @@ -295,9 +306,6 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64DecompressSigned: case kX64DecompressPointer: case kX64DecompressAny: - case kX64CompressSigned: - case kX64CompressPointer: - case kX64CompressAny: return (instr->addressing_mode() == kMode_None) ? kNoOpcodeFlags : kIsLoadOperation | kHasSideEffect; @@ -346,7 +354,6 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64Movdqu: return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect; - case kX64StackCheck: case kX64Peek: return kIsLoadOperation; diff --git a/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc b/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc index a4908fb846..5379074bac 100644 --- a/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc +++ b/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc @@ -170,9 +170,10 @@ class X64OperandGenerator final : public OperandGenerator { AddressingMode GetEffectiveAddressMemoryOperand(Node* operand, InstructionOperand inputs[], size_t* input_count) { - if (selector()->CanAddressRelativeToRootsRegister()) { + { LoadMatcher<ExternalReferenceMatcher> m(operand); - if (m.index().HasValue() && m.object().HasValue()) { + if (m.index().HasValue() && m.object().HasValue() && + selector()->CanAddressRelativeToRootsRegister(m.object().Value())) { ptrdiff_t const delta = m.index().Value() + TurboAssemblerBase::RootRegisterOffsetForExternalReference( @@ -350,7 +351,8 @@ void InstructionSelector::VisitStore(Node* node) { StoreRepresentation store_rep = StoreRepresentationOf(node->op()); WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind(); - if (write_barrier_kind != kNoWriteBarrier) { + if (write_barrier_kind != kNoWriteBarrier && + V8_LIKELY(!FLAG_disable_write_barriers)) { DCHECK(CanBeTaggedOrCompressedPointer(store_rep.representation())); AddressingMode addressing_mode; InstructionOperand inputs[] = { @@ -528,6 +530,35 @@ void InstructionSelector::VisitWord64Xor(Node* node) { } } +void InstructionSelector::VisitStackPointerGreaterThan( + Node* node, FlagsContinuation* cont) { + Node* const value = node->InputAt(0); + InstructionCode opcode = kArchStackPointerGreaterThan; + + DCHECK(cont->IsBranch()); + const int effect_level = + GetEffectLevel(cont->true_block()->PredecessorAt(0)->control_input()); + + X64OperandGenerator g(this); + if (g.CanBeMemoryOperand(kX64Cmp, node, value, effect_level)) { + DCHECK_EQ(IrOpcode::kLoad, value->opcode()); + + // GetEffectiveAddressMemoryOperand can create at most 3 inputs. + static constexpr int kMaxInputCount = 3; + + size_t input_count = 0; + InstructionOperand inputs[kMaxInputCount]; + AddressingMode addressing_mode = + g.GetEffectiveAddressMemoryOperand(value, inputs, &input_count); + opcode |= AddressingModeField::encode(addressing_mode); + DCHECK_LE(input_count, kMaxInputCount); + + EmitWithContinuation(opcode, 0, nullptr, input_count, inputs, cont); + } else { + EmitWithContinuation(opcode, g.UseRegister(value), cont); + } +} + namespace { bool TryMergeTruncateInt64ToInt32IntoLoad(InstructionSelector* selector, @@ -1238,23 +1269,23 @@ void InstructionSelector::VisitChangeUint32ToUint64(Node* node) { } void InstructionSelector::VisitChangeTaggedToCompressed(Node* node) { - X64OperandGenerator g(this); - Node* value = node->InputAt(0); - Emit(kX64CompressAny, g.DefineAsRegister(node), g.Use(value)); + // The top 32 bits in the 64-bit register will be undefined, and + // must not be used by a dependent node. + return EmitIdentity(node); } void InstructionSelector::VisitChangeTaggedPointerToCompressedPointer( Node* node) { - X64OperandGenerator g(this); - Node* value = node->InputAt(0); - Emit(kX64CompressPointer, g.DefineAsRegister(node), g.Use(value)); + // The top 32 bits in the 64-bit register will be undefined, and + // must not be used by a dependent node. + return EmitIdentity(node); } void InstructionSelector::VisitChangeTaggedSignedToCompressedSigned( Node* node) { - X64OperandGenerator g(this); - Node* value = node->InputAt(0); - Emit(kX64CompressSigned, g.DefineAsRegister(node), g.Use(value)); + // The top 32 bits in the 64-bit register will be undefined, and + // must not be used by a dependent node. + return EmitIdentity(node); } void InstructionSelector::VisitChangeCompressedToTagged(Node* node) { @@ -1338,10 +1369,13 @@ void VisitFloatBinop(InstructionSelector* selector, Node* node, void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input, ArchOpcode avx_opcode, ArchOpcode sse_opcode) { X64OperandGenerator g(selector); + InstructionOperand temps[] = {g.TempDoubleRegister()}; if (selector->IsSupported(AVX)) { - selector->Emit(avx_opcode, g.DefineAsRegister(node), g.Use(input)); + selector->Emit(avx_opcode, g.DefineAsRegister(node), g.UseUnique(input), + arraysize(temps), temps); } else { - selector->Emit(sse_opcode, g.DefineSameAsFirst(node), g.UseRegister(input)); + selector->Emit(sse_opcode, g.DefineSameAsFirst(node), g.UseRegister(input), + arraysize(temps), temps); } } @@ -1838,30 +1872,6 @@ void VisitWord64Compare(InstructionSelector* selector, Node* node, g.UseRegister(m.right().node()), cont); } } - if (selector->isolate() != nullptr) { - StackCheckMatcher<Int64BinopMatcher, IrOpcode::kUint64LessThan> m( - selector->isolate(), node); - if (m.Matched()) { - // Compare(Load(js_stack_limit), LoadStackPointer) - if (!node->op()->HasProperty(Operator::kCommutative)) cont->Commute(); - InstructionCode opcode = cont->Encode(kX64StackCheck); - CHECK(cont->IsBranch()); - selector->EmitWithContinuation(opcode, cont); - return; - } - } - WasmStackCheckMatcher<Int64BinopMatcher, IrOpcode::kUint64LessThan> wasm_m( - node); - if (wasm_m.Matched()) { - // This is a wasm stack check. By structure, we know that we can use the - // stack pointer directly, as wasm code does not modify the stack at points - // where stack checks are performed. - Node* left = node->InputAt(0); - LocationOperand rsp(InstructionOperand::EXPLICIT, LocationOperand::REGISTER, - InstructionSequence::DefaultRepresentation(), - RegisterCode::kRegCode_rsp); - return VisitCompareWithMemoryOperand(selector, kX64Cmp, left, rsp, cont); - } VisitWordCompare(selector, node, kX64Cmp, cont); } @@ -2157,6 +2167,9 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, return VisitWordCompare(this, value, kX64Cmp32, cont); case IrOpcode::kWord32And: return VisitWordCompare(this, value, kX64Test32, cont); + case IrOpcode::kStackPointerGreaterThan: + cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition); + return VisitStackPointerGreaterThan(value, cont); default: break; } @@ -2586,6 +2599,12 @@ VISIT_ATOMIC_BINOP(Xor) V(I8x16) #define SIMD_BINOP_LIST(V) \ + V(F64x2Add) \ + V(F64x2Sub) \ + V(F64x2Mul) \ + V(F64x2Div) \ + V(F64x2Min) \ + V(F64x2Max) \ V(F64x2Eq) \ V(F64x2Ne) \ V(F64x2Lt) \ @@ -2594,6 +2613,7 @@ VISIT_ATOMIC_BINOP(Xor) V(F32x4AddHoriz) \ V(F32x4Sub) \ V(F32x4Mul) \ + V(F32x4Div) \ V(F32x4Min) \ V(F32x4Max) \ V(F32x4Eq) \ @@ -2603,7 +2623,6 @@ VISIT_ATOMIC_BINOP(Xor) V(I64x2Add) \ V(I64x2Sub) \ V(I64x2Eq) \ - V(I64x2Ne) \ V(I64x2GtS) \ V(I32x4Add) \ V(I32x4AddHoriz) \ @@ -2612,12 +2631,10 @@ VISIT_ATOMIC_BINOP(Xor) V(I32x4MinS) \ V(I32x4MaxS) \ V(I32x4Eq) \ - V(I32x4Ne) \ V(I32x4GtS) \ V(I32x4GeS) \ V(I32x4MinU) \ V(I32x4MaxU) \ - V(I32x4GtU) \ V(I32x4GeU) \ V(I16x8SConvertI32x4) \ V(I16x8Add) \ @@ -2629,14 +2646,12 @@ VISIT_ATOMIC_BINOP(Xor) V(I16x8MinS) \ V(I16x8MaxS) \ V(I16x8Eq) \ - V(I16x8Ne) \ V(I16x8GtS) \ V(I16x8GeS) \ V(I16x8AddSaturateU) \ V(I16x8SubSaturateU) \ V(I16x8MinU) \ V(I16x8MaxU) \ - V(I16x8GtU) \ V(I16x8GeU) \ V(I8x16SConvertI16x8) \ V(I8x16Add) \ @@ -2646,23 +2661,28 @@ VISIT_ATOMIC_BINOP(Xor) V(I8x16MinS) \ V(I8x16MaxS) \ V(I8x16Eq) \ - V(I8x16Ne) \ V(I8x16GtS) \ V(I8x16GeS) \ V(I8x16AddSaturateU) \ V(I8x16SubSaturateU) \ V(I8x16MinU) \ V(I8x16MaxU) \ - V(I8x16GtU) \ V(I8x16GeU) \ V(S128And) \ V(S128Or) \ V(S128Xor) #define SIMD_BINOP_ONE_TEMP_LIST(V) \ + V(I64x2Ne) \ V(I64x2GeS) \ V(I64x2GtU) \ - V(I64x2GeU) + V(I64x2GeU) \ + V(I32x4Ne) \ + V(I32x4GtU) \ + V(I16x8Ne) \ + V(I16x8GtU) \ + V(I8x16Ne) \ + V(I8x16GtU) #define SIMD_UNOP_LIST(V) \ V(F32x4SConvertI32x4) \ @@ -2686,16 +2706,17 @@ VISIT_ATOMIC_BINOP(Xor) #define SIMD_SHIFT_OPCODES(V) \ V(I64x2Shl) \ - V(I64x2ShrS) \ V(I64x2ShrU) \ V(I32x4Shl) \ V(I32x4ShrS) \ V(I32x4ShrU) \ V(I16x8Shl) \ V(I16x8ShrS) \ - V(I16x8ShrU) \ - V(I8x16Shl) \ - V(I8x16ShrS) \ + V(I16x8ShrU) + +#define SIMD_NARROW_SHIFT_OPCODES(V) \ + V(I8x16Shl) \ + V(I8x16ShrS) \ V(I8x16ShrU) #define SIMD_ANYTRUE_LIST(V) \ @@ -2745,17 +2766,30 @@ SIMD_TYPES(VISIT_SIMD_EXTRACT_LANE) SIMD_TYPES(VISIT_SIMD_REPLACE_LANE) #undef VISIT_SIMD_REPLACE_LANE -#define VISIT_SIMD_SHIFT(Opcode) \ - void InstructionSelector::Visit##Opcode(Node* node) { \ - X64OperandGenerator g(this); \ - int32_t value = OpParameter<int32_t>(node->op()); \ - Emit(kX64##Opcode, g.DefineSameAsFirst(node), \ - g.UseRegister(node->InputAt(0)), g.UseImmediate(value)); \ +#define VISIT_SIMD_SHIFT(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + X64OperandGenerator g(this); \ + InstructionOperand temps[] = {g.TempSimd128Register()}; \ + Emit(kX64##Opcode, g.DefineSameAsFirst(node), \ + g.UseUniqueRegister(node->InputAt(0)), \ + g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \ } SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT) #undef VISIT_SIMD_SHIFT #undef SIMD_SHIFT_OPCODES +#define VISIT_SIMD_NARROW_SHIFT(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + X64OperandGenerator g(this); \ + InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \ + Emit(kX64##Opcode, g.DefineSameAsFirst(node), \ + g.UseUniqueRegister(node->InputAt(0)), \ + g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \ + } +SIMD_NARROW_SHIFT_OPCODES(VISIT_SIMD_NARROW_SHIFT) +#undef VISIT_SIMD_NARROW_SHIFT +#undef SIMD_NARROW_SHIFT_OPCODES + #define VISIT_SIMD_UNOP(Opcode) \ void InstructionSelector::Visit##Opcode(Node* node) { \ X64OperandGenerator g(this); \ @@ -2799,12 +2833,12 @@ SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE) #undef VISIT_SIMD_ANYTRUE #undef SIMD_ANYTRUE_LIST -#define VISIT_SIMD_ALLTRUE(Opcode) \ - void InstructionSelector::Visit##Opcode(Node* node) { \ - X64OperandGenerator g(this); \ - InstructionOperand temps[] = {g.TempRegister()}; \ - Emit(kX64##Opcode, g.DefineAsRegister(node), \ - g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \ +#define VISIT_SIMD_ALLTRUE(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + X64OperandGenerator g(this); \ + InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \ + Emit(kX64##Opcode, g.DefineAsRegister(node), \ + g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \ } SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE) #undef VISIT_SIMD_ALLTRUE @@ -2820,14 +2854,16 @@ void InstructionSelector::VisitS128Select(Node* node) { void InstructionSelector::VisitF64x2Abs(Node* node) { X64OperandGenerator g(this); - Emit(kX64F64x2Abs, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0))); + InstructionOperand temps[] = {g.TempDoubleRegister()}; + Emit(kX64F64x2Abs, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), + arraysize(temps), temps); } void InstructionSelector::VisitF64x2Neg(Node* node) { X64OperandGenerator g(this); - Emit(kX64F64x2Neg, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0))); + InstructionOperand temps[] = {g.TempDoubleRegister()}; + Emit(kX64F64x2Neg, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), + arraysize(temps), temps); } void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) { @@ -2836,6 +2872,15 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) { g.UseRegister(node->InputAt(0))); } +void InstructionSelector::VisitI64x2ShrS(Node* node) { + X64OperandGenerator g(this); + InstructionOperand temps[] = {g.TempRegister()}; + // Use fixed to rcx, to use sarq_cl in codegen. + Emit(kX64I64x2ShrS, g.DefineSameAsFirst(node), + g.UseUniqueRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), rcx), + arraysize(temps), temps); +} + void InstructionSelector::VisitI64x2Mul(Node* node) { X64OperandGenerator g(this); InstructionOperand temps[] = {g.TempSimd128Register(), @@ -2845,15 +2890,59 @@ void InstructionSelector::VisitI64x2Mul(Node* node) { g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); } +void InstructionSelector::VisitI64x2MinS(Node* node) { + X64OperandGenerator g(this); + if (this->IsSupported(SSE4_2)) { + InstructionOperand temps[] = {g.TempSimd128Register()}; + Emit(kX64I64x2MinS, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0), + arraysize(temps), temps); + } else { + InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister(), + g.TempRegister()}; + Emit(kX64I64x2MinS, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), + arraysize(temps), temps); + } +} + +void InstructionSelector::VisitI64x2MaxS(Node* node) { + X64OperandGenerator g(this); + InstructionOperand temps[] = {g.TempSimd128Register()}; + Emit(kX64I64x2MaxS, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0), + arraysize(temps), temps); +} + +void InstructionSelector::VisitI64x2MinU(Node* node) { + X64OperandGenerator g(this); + InstructionOperand temps[] = {g.TempSimd128Register(), + g.TempSimd128Register()}; + Emit(kX64I64x2MinU, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0), + arraysize(temps), temps); +} + +void InstructionSelector::VisitI64x2MaxU(Node* node) { + X64OperandGenerator g(this); + InstructionOperand temps[] = {g.TempSimd128Register(), + g.TempSimd128Register()}; + Emit(kX64I64x2MaxU, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0), + arraysize(temps), temps); +} + void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) { X64OperandGenerator g(this); + InstructionOperand temps[] = {g.TempSimd128Register()}; Emit(kX64I32x4SConvertF32x4, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0))); + g.UseRegister(node->InputAt(0)), arraysize(temps), temps); } void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) { X64OperandGenerator g(this); - InstructionOperand temps[] = {g.TempSimd128Register()}; + InstructionOperand temps[] = {g.TempSimd128Register(), + g.TempSimd128Register()}; Emit(kX64I32x4UConvertF32x4, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), arraysize(temps), temps); } @@ -2997,12 +3086,12 @@ static const ShuffleEntry arch_shuffles[] = { true}, {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kX64S8x8Reverse, - false, - false}, + true, + true}, {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kX64S8x4Reverse, - false, - false}, + true, + true}, {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kX64S8x2Reverse, true, @@ -3060,6 +3149,8 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { SwapShuffleInputs(node); is_swizzle = false; // It's simpler to just handle the general case. no_same_as_first = false; // SSE requires same-as-first. + // TODO(v8:9608): also see v8:9083 + src1_needs_reg = true; opcode = kX64S8x16Alignr; // palignr takes a single imm8 offset. imms[imm_count++] = offset; |