diff options
Diffstat (limited to 'chromium/v8/src/wasm/baseline')
12 files changed, 3652 insertions, 284 deletions
diff --git a/chromium/v8/src/wasm/baseline/arm/liftoff-assembler-arm.h b/chromium/v8/src/wasm/baseline/arm/liftoff-assembler-arm.h index eb91b79ea55..4a9cffb9728 100644 --- a/chromium/v8/src/wasm/baseline/arm/liftoff-assembler-arm.h +++ b/chromium/v8/src/wasm/baseline/arm/liftoff-assembler-arm.h @@ -332,6 +332,71 @@ inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, MemOperand src, } } +constexpr int MaskFromNeonDataType(NeonDataType dt) { + switch (dt) { + case NeonS8: + case NeonU8: + return 7; + case NeonS16: + case NeonU16: + return 15; + case NeonS32: + case NeonU32: + return 31; + case NeonS64: + case NeonU64: + return 63; + } +} + +enum ShiftDirection { kLeft, kRight }; + +template <ShiftDirection dir = kLeft, NeonDataType dt, NeonSize sz> +inline void EmitSimdShift(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister lhs, LiftoffRegister rhs) { + constexpr int mask = MaskFromNeonDataType(dt); + UseScratchRegisterScope temps(assm); + QwNeonRegister tmp = temps.AcquireQ(); + Register shift = temps.Acquire(); + assm->and_(shift, rhs.gp(), Operand(mask)); + assm->vdup(sz, tmp, shift); + if (dir == kRight) { + assm->vneg(sz, tmp, tmp); + } + assm->vshl(dt, liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs), tmp); +} + +template <ShiftDirection dir, NeonDataType dt> +inline void EmitSimdShiftImmediate(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + // vshr by 0 is not allowed, so check for it, and only move if dst != lhs. + int32_t shift = rhs & MaskFromNeonDataType(dt); + if (shift) { + if (dir == kLeft) { + assm->vshl(dt, liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs), shift); + } else { + assm->vshr(dt, liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs), shift); + } + } else if (dst != lhs) { + assm->vmov(liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs)); + } +} + +inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(assm); + DwVfpRegister scratch = temps.AcquireD(); + assm->vpmax(NeonU32, scratch, src.low_fp(), src.high_fp()); + assm->vpmax(NeonU32, scratch, scratch, scratch); + assm->ExtractLane(dst.gp(), scratch, NeonS32, 0); + assm->cmp(dst.gp(), Operand(0)); + assm->mov(dst.gp(), Operand(1), LeaveCC, ne); +} + } // namespace liftoff int LiftoffAssembler::PrepareStackFrame() { @@ -437,7 +502,7 @@ void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value, vmov(liftoff::GetFloatRegister(reg.fp()), value.to_f32_boxed()); break; case ValueType::kF64: { - Register extra_scratch = GetUnusedRegister(kGpReg).gp(); + Register extra_scratch = GetUnusedRegister(kGpReg, {}).gp(); vmov(reg.fp(), Double(value.to_f64_boxed().get_bits()), extra_scratch); break; } @@ -1171,7 +1236,7 @@ void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src, void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset, ValueType type) { DCHECK_NE(dst_offset, src_offset); - LiftoffRegister reg = GetUnusedRegister(reg_class_for(type)); + LiftoffRegister reg = GetUnusedRegister(reg_class_for(type), {}); Fill(reg, src_offset, type); Spill(dst_offset, reg, type); } @@ -1216,7 +1281,7 @@ void LiftoffAssembler::Spill(int offset, WasmValue value) { // The scratch register will be required by str if multiple instructions // are required to encode the offset, and so we cannot use it in that case. if (!ImmediateFitsAddrMode2Instruction(dst.offset())) { - src = GetUnusedRegister(kGpReg).gp(); + src = GetUnusedRegister(kGpReg, {}).gp(); } else { src = temps.Acquire(); } @@ -1758,7 +1823,7 @@ void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs, DoubleRegister rhs) { constexpr uint32_t kF32SignBit = uint32_t{1} << 31; UseScratchRegisterScope temps(this); - Register scratch = GetUnusedRegister(kGpReg).gp(); + Register scratch = GetUnusedRegister(kGpReg, {}).gp(); Register scratch2 = temps.Acquire(); VmovLow(scratch, lhs); // Clear sign bit in {scratch}. @@ -1777,7 +1842,7 @@ void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs, // On arm, we cannot hold the whole f64 value in a gp register, so we just // operate on the upper half (UH). UseScratchRegisterScope temps(this); - Register scratch = GetUnusedRegister(kGpReg).gp(); + Register scratch = GetUnusedRegister(kGpReg, {}).gp(); Register scratch2 = temps.Acquire(); VmovHigh(scratch, lhs); // Clear sign bit in {scratch}. @@ -1862,6 +1927,38 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, b(trap, ge); return true; } + case kExprI32SConvertSatF32: { + UseScratchRegisterScope temps(this); + SwVfpRegister scratch_f = temps.AcquireS(); + vcvt_s32_f32( + scratch_f, + liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero. + vmov(dst.gp(), scratch_f); + return true; + } + case kExprI32UConvertSatF32: { + UseScratchRegisterScope temps(this); + SwVfpRegister scratch_f = temps.AcquireS(); + vcvt_u32_f32( + scratch_f, + liftoff::GetFloatRegister(src.fp())); // f32 -> u32 round to zero. + vmov(dst.gp(), scratch_f); + return true; + } + case kExprI32SConvertSatF64: { + UseScratchRegisterScope temps(this); + SwVfpRegister scratch_f = temps.AcquireS(); + vcvt_s32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero. + vmov(dst.gp(), scratch_f); + return true; + } + case kExprI32UConvertSatF64: { + UseScratchRegisterScope temps(this); + SwVfpRegister scratch_f = temps.AcquireS(); + vcvt_u32_f64(scratch_f, src.fp()); // f64 -> u32 round to zero. + vmov(dst.gp(), scratch_f); + return true; + } case kExprI32ReinterpretF32: vmov(dst.gp(), liftoff::GetFloatRegister(src.fp())); return true; @@ -1914,10 +2011,14 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, case kExprF64UConvertI64: case kExprI64SConvertF32: case kExprI64UConvertF32: + case kExprI64SConvertSatF32: + case kExprI64UConvertSatF32: case kExprF32SConvertI64: case kExprF32UConvertI64: case kExprI64SConvertF64: case kExprI64UConvertF64: + case kExprI64SConvertSatF64: + case kExprI64UConvertSatF64: // These cases can be handled by the C fallback function. return false; default: @@ -2052,6 +2153,79 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, } } +void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, + Register offset_reg, uint32_t offset_imm, + LoadType type, + LoadTransformationKind transform, + uint32_t* protected_load_pc) { + UseScratchRegisterScope temps(this); + Register actual_src_addr = liftoff::CalculateActualAddress( + this, &temps, src_addr, offset_reg, offset_imm); + *protected_load_pc = pc_offset(); + MachineType memtype = type.mem_type(); + + if (transform == LoadTransformationKind::kExtend) { + if (memtype == MachineType::Int8()) { + vld1(Neon8, NeonListOperand(dst.low_fp()), + NeonMemOperand(actual_src_addr)); + vmovl(NeonS8, liftoff::GetSimd128Register(dst), dst.low_fp()); + } else if (memtype == MachineType::Uint8()) { + vld1(Neon8, NeonListOperand(dst.low_fp()), + NeonMemOperand(actual_src_addr)); + vmovl(NeonU8, liftoff::GetSimd128Register(dst), dst.low_fp()); + } else if (memtype == MachineType::Int16()) { + vld1(Neon16, NeonListOperand(dst.low_fp()), + NeonMemOperand(actual_src_addr)); + vmovl(NeonS16, liftoff::GetSimd128Register(dst), dst.low_fp()); + } else if (memtype == MachineType::Uint16()) { + vld1(Neon16, NeonListOperand(dst.low_fp()), + NeonMemOperand(actual_src_addr)); + vmovl(NeonU16, liftoff::GetSimd128Register(dst), dst.low_fp()); + } else if (memtype == MachineType::Int32()) { + vld1(Neon32, NeonListOperand(dst.low_fp()), + NeonMemOperand(actual_src_addr)); + vmovl(NeonS32, liftoff::GetSimd128Register(dst), dst.low_fp()); + } else if (memtype == MachineType::Uint32()) { + vld1(Neon32, NeonListOperand(dst.low_fp()), + NeonMemOperand(actual_src_addr)); + vmovl(NeonU32, liftoff::GetSimd128Register(dst), dst.low_fp()); + } + } else { + DCHECK_EQ(LoadTransformationKind::kSplat, transform); + if (memtype == MachineType::Int8()) { + vld1r(Neon8, NeonListOperand(liftoff::GetSimd128Register(dst)), + NeonMemOperand(actual_src_addr)); + } else if (memtype == MachineType::Int16()) { + vld1r(Neon16, NeonListOperand(liftoff::GetSimd128Register(dst)), + NeonMemOperand(actual_src_addr)); + } else if (memtype == MachineType::Int32()) { + vld1r(Neon32, NeonListOperand(liftoff::GetSimd128Register(dst)), + NeonMemOperand(actual_src_addr)); + } else if (memtype == MachineType::Int64()) { + vld1(Neon32, NeonListOperand(dst.low_fp()), + NeonMemOperand(actual_src_addr)); + TurboAssembler::Move(dst.high_fp(), dst.low_fp()); + } + } +} + +void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + UseScratchRegisterScope temps(this); + + NeonListOperand table(liftoff::GetSimd128Register(lhs)); + if (dst == lhs) { + // dst will be overwritten, so keep the table somewhere else. + QwNeonRegister tbl = temps.AcquireQ(); + TurboAssembler::Move(tbl, liftoff::GetSimd128Register(lhs)); + table = NeonListOperand(tbl); + } + + vtbl(dst.low_fp(), table, rhs.low_fp()); + vtbl(dst.high_fp(), table, rhs.high_fp()); +} + void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, LiftoffRegister src) { TurboAssembler::Move(dst.low_fp(), src.fp()); @@ -2273,12 +2447,37 @@ void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { - bailout(kSimd, "i64x2_shl"); + liftoff::EmitSimdShift<liftoff::kLeft, NeonS64, Neon32>(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { - bailout(kSimd, "i64x2_shli"); + vshl(NeonS64, liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs), rhs & 63); +} + +void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonS64, Neon32>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS64>(this, dst, lhs, + rhs); +} + +void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonU64, Neon32>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU64>(this, dst, lhs, + rhs); } void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, @@ -2306,15 +2505,18 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, QwNeonRegister tmp1 = left; QwNeonRegister tmp2 = right; - if (cache_state()->is_used(lhs) && cache_state()->is_used(rhs)) { + LiftoffRegList used_plus_dst = + cache_state()->used_registers | LiftoffRegList::ForRegs(dst); + + if (used_plus_dst.has(lhs) && used_plus_dst.has(rhs)) { tmp1 = temps.AcquireQ(); // We only have 1 scratch Q register, so acquire another ourselves. LiftoffRegList pinned = LiftoffRegList::ForRegs(dst); LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); tmp2 = liftoff::GetSimd128Register(unused_pair); - } else if (cache_state()->is_used(lhs)) { + } else if (used_plus_dst.has(lhs)) { tmp1 = temps.AcquireQ(); - } else if (cache_state()->is_used(rhs)) { + } else if (used_plus_dst.has(rhs)) { tmp2 = temps.AcquireQ(); } @@ -2363,14 +2565,79 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, liftoff::GetSimd128Register(src)); } +void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + DwVfpRegister scratch = temps.AcquireD(); + vpmin(NeonU32, scratch, src.low_fp(), src.high_fp()); + vpmin(NeonU32, scratch, scratch, scratch); + ExtractLane(dst.gp(), scratch, NeonS32, 0); + cmp(dst.gp(), Operand(0)); + mov(dst.gp(), Operand(1), LeaveCC, ne); +} + +void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + Simd128Register tmp = liftoff::GetSimd128Register(src); + Simd128Register mask = temps.AcquireQ(); + + if (cache_state()->is_used(src)) { + // We only have 1 scratch Q register, so try and reuse src. + LiftoffRegList pinned = LiftoffRegList::ForRegs(src); + LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); + mask = liftoff::GetSimd128Register(unused_pair); + } + + vshr(NeonS32, tmp, liftoff::GetSimd128Register(src), 31); + // Set i-th bit of each lane i. When AND with tmp, the lanes that + // are signed will have i-th bit set, unsigned will be 0. + vmov(mask.low(), Double((uint64_t)0x0000'0002'0000'0001)); + vmov(mask.high(), Double((uint64_t)0x0000'0008'0000'0004)); + vand(tmp, mask, tmp); + vpadd(Neon32, tmp.low(), tmp.low(), tmp.high()); + vpadd(Neon32, tmp.low(), tmp.low(), kDoubleRegZero); + VmovLow(dst.gp(), tmp.low()); +} + void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { - bailout(kSimd, "i32x4_shl"); + liftoff::EmitSimdShift<liftoff::kLeft, NeonS32, Neon32>(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { - bailout(kSimd, "i32x4_shli"); + vshl(NeonS32, liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs), rhs & 31); +} + +void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonS32, Neon32>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS32>(this, dst, lhs, + rhs); +} + +void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonU32, Neon32>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU32>(this, dst, lhs, + rhs); } void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, @@ -2430,14 +2697,81 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst, liftoff::GetSimd128Register(src)); } +void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + DwVfpRegister scratch = temps.AcquireD(); + vpmin(NeonU16, scratch, src.low_fp(), src.high_fp()); + vpmin(NeonU16, scratch, scratch, scratch); + vpmin(NeonU16, scratch, scratch, scratch); + ExtractLane(dst.gp(), scratch, NeonS16, 0); + cmp(dst.gp(), Operand(0)); + mov(dst.gp(), Operand(1), LeaveCC, ne); +} + +void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + Simd128Register tmp = liftoff::GetSimd128Register(src); + Simd128Register mask = temps.AcquireQ(); + + if (cache_state()->is_used(src)) { + // We only have 1 scratch Q register, so try and reuse src. + LiftoffRegList pinned = LiftoffRegList::ForRegs(src); + LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); + mask = liftoff::GetSimd128Register(unused_pair); + } + + vshr(NeonS16, tmp, liftoff::GetSimd128Register(src), 15); + // Set i-th bit of each lane i. When AND with tmp, the lanes that + // are signed will have i-th bit set, unsigned will be 0. + vmov(mask.low(), Double((uint64_t)0x0008'0004'0002'0001)); + vmov(mask.high(), Double((uint64_t)0x0080'0040'0020'0010)); + vand(tmp, mask, tmp); + vpadd(Neon16, tmp.low(), tmp.low(), tmp.high()); + vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); + vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); + vmov(NeonU16, dst.gp(), tmp.low(), 0); +} + void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { - bailout(kSimd, "i16x8_shl"); + liftoff::EmitSimdShift<liftoff::kLeft, NeonS16, Neon16>(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { - bailout(kSimd, "i16x8_shli"); + vshl(NeonS16, liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs), rhs & 15); +} + +void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonS16, Neon16>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS16>(this, dst, lhs, + rhs); +} + +void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonU16, Neon16>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU16>(this, dst, lhs, + rhs); } void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, @@ -2537,6 +2871,60 @@ void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst, imm_lane_idx); } +void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs, + const uint8_t shuffle[16]) { + Simd128Register dest = liftoff::GetSimd128Register(dst); + Simd128Register src1 = liftoff::GetSimd128Register(lhs); + Simd128Register src2 = liftoff::GetSimd128Register(rhs); + UseScratchRegisterScope temps(this); + Simd128Register scratch = temps.AcquireQ(); + if ((src1 != src2) && src1.code() + 1 != src2.code()) { + // vtbl requires the operands to be consecutive or the same. + // If they are the same, we build a smaller list operand (table_size = 2). + // If they are not the same, and not consecutive, we move the src1 and src2 + // to q14 and q15, which will be unused since they are not allocatable in + // Liftoff. If the operands are the same, then we build a smaller list + // operand below. + static_assert(!(kLiftoffAssemblerFpCacheRegs & + (d28.bit() | d29.bit() | d30.bit() | d31.bit())), + "This only works if q14-q15 (d28-d31) are not used."); + vmov(q14, src1); + src1 = q14; + vmov(q15, src2); + src2 = q15; + } + + int table_size = src1 == src2 ? 2 : 4; + uint32_t mask = table_size == 2 ? 0x0F0F0F0F : 0x1F1F1F1F; + + int scratch_s_base = scratch.code() * 4; + for (int j = 0; j < 4; j++) { + uint32_t imm = 0; + for (int i = 3; i >= 0; i--) { + imm = (imm << 8) | shuffle[j * 4 + i]; + } + uint32_t four_lanes = imm; + // Ensure indices are in [0,15] if table_size is 2, or [0,31] if 4. + four_lanes &= mask; + vmov(SwVfpRegister::from_code(scratch_s_base + j), + Float32::FromBits(four_lanes)); + } + + DwVfpRegister table_base = src1.low(); + NeonListOperand table(table_base, table_size); + + if (dest != src1 && dest != src2) { + vtbl(dest.low(), table, scratch.low()); + vtbl(dest.high(), table, scratch.high()); + } else { + vtbl(scratch.low(), table, scratch.low()); + vtbl(scratch.high(), table, scratch.high()); + vmov(dest, scratch); + } +} + void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src) { vdup(Neon8, liftoff::GetSimd128Register(dst), src.gp()); @@ -2569,14 +2957,82 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst, liftoff::GetSimd128Register(src)); } +void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + DwVfpRegister scratch = temps.AcquireD(); + vpmin(NeonU8, scratch, src.low_fp(), src.high_fp()); + vpmin(NeonU8, scratch, scratch, scratch); + vpmin(NeonU8, scratch, scratch, scratch); + vpmin(NeonU8, scratch, scratch, scratch); + ExtractLane(dst.gp(), scratch, NeonS8, 0); + cmp(dst.gp(), Operand(0)); + mov(dst.gp(), Operand(1), LeaveCC, ne); +} + +void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + Simd128Register tmp = liftoff::GetSimd128Register(src); + Simd128Register mask = temps.AcquireQ(); + + if (cache_state()->is_used(src)) { + // We only have 1 scratch Q register, so try and reuse src. + LiftoffRegList pinned = LiftoffRegList::ForRegs(src); + LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); + mask = liftoff::GetSimd128Register(unused_pair); + } + + vshr(NeonS8, tmp, liftoff::GetSimd128Register(src), 7); + // Set i-th bit of each lane i. When AND with tmp, the lanes that + // are signed will have i-th bit set, unsigned will be 0. + vmov(mask.low(), Double((uint64_t)0x8040'2010'0804'0201)); + vmov(mask.high(), Double((uint64_t)0x8040'2010'0804'0201)); + vand(tmp, mask, tmp); + vext(mask, tmp, tmp, 8); + vzip(Neon8, mask, tmp); + vpadd(Neon16, tmp.low(), tmp.low(), tmp.high()); + vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); + vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); + vmov(NeonU16, dst.gp(), tmp.low(), 0); +} + void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { - bailout(kSimd, "i8x16_shl"); + liftoff::EmitSimdShift<liftoff::kLeft, NeonS8, Neon8>(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { - bailout(kSimd, "i8x16_shli"); + vshl(NeonS8, liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs), rhs & 7); +} + +void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonS8, Neon8>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS8>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonU8, Neon8>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU8>(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, @@ -2842,6 +3298,30 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst, liftoff::GetSimd128Register(src2)); } +void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + vcvt_s32_f32(liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(src)); +} + +void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + vcvt_u32_f32(liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(src)); +} + +void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + vcvt_f32_s32(liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(src)); +} + +void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + vcvt_f32_u32(liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(src)); +} + void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { diff --git a/chromium/v8/src/wasm/baseline/arm64/liftoff-assembler-arm64.h b/chromium/v8/src/wasm/baseline/arm64/liftoff-assembler-arm64.h index 9c142e4ad0f..03643c6edd7 100644 --- a/chromium/v8/src/wasm/baseline/arm64/liftoff-assembler-arm64.h +++ b/chromium/v8/src/wasm/baseline/arm64/liftoff-assembler-arm64.h @@ -104,6 +104,76 @@ inline MemOperand GetMemOp(LiftoffAssembler* assm, return MemOperand(addr.X(), offset_imm); } +enum class ShiftDirection : bool { kLeft, kRight }; + +enum class ShiftSign : bool { kSigned, kUnsigned }; + +template <ShiftDirection dir, ShiftSign sign = ShiftSign::kSigned> +inline void EmitSimdShift(LiftoffAssembler* assm, VRegister dst, VRegister lhs, + Register rhs, VectorFormat format) { + DCHECK_IMPLIES(dir == ShiftDirection::kLeft, sign == ShiftSign::kSigned); + DCHECK(dst.IsSameFormat(lhs)); + DCHECK_EQ(dst.LaneCount(), LaneCountFromFormat(format)); + + UseScratchRegisterScope temps(assm); + VRegister tmp = temps.AcquireV(format); + Register shift = dst.Is2D() ? temps.AcquireX() : temps.AcquireW(); + int mask = LaneSizeInBitsFromFormat(format) - 1; + assm->And(shift, rhs, mask); + assm->Dup(tmp, shift); + + if (dir == ShiftDirection::kRight) { + assm->Neg(tmp, tmp); + } + + if (sign == ShiftSign::kSigned) { + assm->Sshl(dst, lhs, tmp); + } else { + assm->Ushl(dst, lhs, tmp); + } +} + +template <VectorFormat format, ShiftSign sign> +inline void EmitSimdShiftRightImmediate(LiftoffAssembler* assm, VRegister dst, + VRegister lhs, int32_t rhs) { + // Sshr and Ushr does not allow shifts to be 0, so check for that here. + int mask = LaneSizeInBitsFromFormat(format) - 1; + int32_t shift = rhs & mask; + if (!shift) { + if (dst != lhs) { + assm->Mov(dst, lhs); + } + return; + } + + if (sign == ShiftSign::kSigned) { + assm->Sshr(dst, lhs, rhs & mask); + } else { + assm->Ushr(dst, lhs, rhs & mask); + } +} + +inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister src) { + // AnyTrue does not depend on the number of lanes, so we can use V4S for all. + UseScratchRegisterScope scope(assm); + VRegister temp = scope.AcquireV(kFormatS); + assm->Umaxv(temp, src.fp().V4S()); + assm->Umov(dst.gp().W(), temp, 0); + assm->Cmp(dst.gp().W(), 0); + assm->Cset(dst.gp().W(), ne); +} + +inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister src, VectorFormat format) { + UseScratchRegisterScope scope(assm); + VRegister temp = scope.AcquireV(ScalarFormatFromFormat(format)); + assm->Uminv(temp, VRegister::Create(src.fp().code(), format)); + assm->Umov(dst.gp().W(), temp, 0); + assm->Cmp(dst.gp().W(), 0); + assm->Cset(dst.gp().W(), ne); +} + } // namespace liftoff int LiftoffAssembler::PrepareStackFrame() { @@ -299,8 +369,6 @@ void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr, case LoadType::kS128Load: Ldr(dst.fp().Q(), src_op); break; - default: - UNREACHABLE(); } } @@ -337,65 +405,280 @@ void LiftoffAssembler::Store(Register dst_addr, Register offset_reg, case StoreType::kS128Store: Str(src.fp().Q(), dst_op); break; + } +} + +namespace liftoff { +#define __ lasm-> + +inline Register CalculateActualAddress(LiftoffAssembler* lasm, + Register addr_reg, Register offset_reg, + int32_t offset_imm, + Register result_reg) { + DCHECK_NE(offset_reg, no_reg); + DCHECK_NE(addr_reg, no_reg); + __ Add(result_reg, addr_reg, Operand(offset_reg)); + if (offset_imm != 0) { + __ Add(result_reg, result_reg, Operand(offset_imm)); + } + return result_reg; +} + +enum class Binop { kAdd, kSub, kAnd, kOr, kXor, kExchange }; + +inline void AtomicBinop(LiftoffAssembler* lasm, Register dst_addr, + Register offset_reg, uint32_t offset_imm, + LiftoffRegister value, LiftoffRegister result, + StoreType type, Binop op) { + LiftoffRegList pinned = + LiftoffRegList::ForRegs(dst_addr, offset_reg, value, result); + Register store_result = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp(); + + // Make sure that {result} is unique. + Register result_reg = result.gp(); + if (result_reg == value.gp() || result_reg == dst_addr || + result_reg == offset_reg) { + result_reg = __ GetUnusedRegister(kGpReg, pinned).gp(); + } + + UseScratchRegisterScope temps(lasm); + Register actual_addr = liftoff::CalculateActualAddress( + lasm, dst_addr, offset_reg, offset_imm, temps.AcquireX()); + + // Allocate an additional {temp} register to hold the result that should be + // stored to memory. Note that {temp} and {store_result} are not allowed to be + // the same register. + Register temp = temps.AcquireX(); + + Label retry; + __ Bind(&retry); + switch (type.value()) { + case StoreType::kI64Store8: + case StoreType::kI32Store8: + __ ldaxrb(result_reg.W(), actual_addr); + break; + case StoreType::kI64Store16: + case StoreType::kI32Store16: + __ ldaxrh(result_reg.W(), actual_addr); + break; + case StoreType::kI64Store32: + case StoreType::kI32Store: + __ ldaxr(result_reg.W(), actual_addr); + break; + case StoreType::kI64Store: + __ ldaxr(result_reg.X(), actual_addr); + break; default: UNREACHABLE(); } + + switch (op) { + case Binop::kAdd: + __ add(temp, result_reg, value.gp()); + break; + case Binop::kSub: + __ sub(temp, result_reg, value.gp()); + break; + case Binop::kAnd: + __ and_(temp, result_reg, value.gp()); + break; + case Binop::kOr: + __ orr(temp, result_reg, value.gp()); + break; + case Binop::kXor: + __ eor(temp, result_reg, value.gp()); + break; + case Binop::kExchange: + __ mov(temp, value.gp()); + break; + } + + switch (type.value()) { + case StoreType::kI64Store8: + case StoreType::kI32Store8: + __ stlxrb(store_result.W(), temp.W(), actual_addr); + break; + case StoreType::kI64Store16: + case StoreType::kI32Store16: + __ stlxrh(store_result.W(), temp.W(), actual_addr); + break; + case StoreType::kI64Store32: + case StoreType::kI32Store: + __ stlxr(store_result.W(), temp.W(), actual_addr); + break; + case StoreType::kI64Store: + __ stlxr(store_result.W(), temp.X(), actual_addr); + break; + default: + UNREACHABLE(); + } + + __ Cbnz(store_result.W(), &retry); + + if (result_reg != result.gp()) { + __ mov(result.gp(), result_reg); + } } +#undef __ +} // namespace liftoff + void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr, Register offset_reg, uint32_t offset_imm, LoadType type, LiftoffRegList pinned) { - bailout(kAtomics, "AtomicLoad"); + UseScratchRegisterScope temps(this); + Register src_reg = liftoff::CalculateActualAddress( + this, src_addr, offset_reg, offset_imm, temps.AcquireX()); + switch (type.value()) { + case LoadType::kI32Load8U: + case LoadType::kI64Load8U: + Ldarb(dst.gp().W(), src_reg); + return; + case LoadType::kI32Load16U: + case LoadType::kI64Load16U: + Ldarh(dst.gp().W(), src_reg); + return; + case LoadType::kI32Load: + case LoadType::kI64Load32U: + Ldar(dst.gp().W(), src_reg); + return; + case LoadType::kI64Load: + Ldar(dst.gp().X(), src_reg); + return; + default: + UNREACHABLE(); + } } void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister src, StoreType type, LiftoffRegList pinned) { - bailout(kAtomics, "AtomicStore"); + UseScratchRegisterScope temps(this); + Register dst_reg = liftoff::CalculateActualAddress( + this, dst_addr, offset_reg, offset_imm, temps.AcquireX()); + switch (type.value()) { + case StoreType::kI64Store8: + case StoreType::kI32Store8: + Stlrb(src.gp().W(), dst_reg); + return; + case StoreType::kI64Store16: + case StoreType::kI32Store16: + Stlrh(src.gp().W(), dst_reg); + return; + case StoreType::kI64Store32: + case StoreType::kI32Store: + Stlr(src.gp().W(), dst_reg); + return; + case StoreType::kI64Store: + Stlr(src.gp().X(), dst_reg); + return; + default: + UNREACHABLE(); + } } void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type) { - bailout(kAtomics, "AtomicAdd"); + liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result, + type, liftoff::Binop::kAdd); } void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type) { - bailout(kAtomics, "AtomicSub"); + liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result, + type, liftoff::Binop::kSub); } void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type) { - bailout(kAtomics, "AtomicAnd"); + liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result, + type, liftoff::Binop::kAnd); } void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type) { - bailout(kAtomics, "AtomicOr"); + liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result, + type, liftoff::Binop::kOr); } void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type) { - bailout(kAtomics, "AtomicXor"); + liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result, + type, liftoff::Binop::kXor); } void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type) { - bailout(kAtomics, "AtomicExchange"); + liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result, + type, liftoff::Binop::kExchange); } void LiftoffAssembler::AtomicCompareExchange( Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result, StoreType type) { - bailout(kAtomics, "AtomicCompareExchange"); + LiftoffRegList pinned = + LiftoffRegList::ForRegs(dst_addr, offset_reg, expected, new_value); + + Register result_reg = result.gp(); + if (pinned.has(result)) { + result_reg = GetUnusedRegister(kGpReg, pinned).gp(); + } + + UseScratchRegisterScope temps(this); + Register store_result = temps.AcquireW(); + + Register actual_addr = liftoff::CalculateActualAddress( + this, dst_addr, offset_reg, offset_imm, temps.AcquireX()); + + Label retry; + Label done; + Bind(&retry); + switch (type.value()) { + case StoreType::kI64Store8: + case StoreType::kI32Store8: + ldaxrb(result_reg.W(), actual_addr); + Cmp(result.gp().W(), Operand(expected.gp().W(), UXTB)); + B(ne, &done); + stlxrb(store_result.W(), new_value.gp().W(), actual_addr); + break; + case StoreType::kI64Store16: + case StoreType::kI32Store16: + ldaxrh(result_reg.W(), actual_addr); + Cmp(result.gp().W(), Operand(expected.gp().W(), UXTH)); + B(ne, &done); + stlxrh(store_result.W(), new_value.gp().W(), actual_addr); + break; + case StoreType::kI64Store32: + case StoreType::kI32Store: + ldaxr(result_reg.W(), actual_addr); + Cmp(result.gp().W(), Operand(expected.gp().W(), UXTW)); + B(ne, &done); + stlxr(store_result.W(), new_value.gp().W(), actual_addr); + break; + case StoreType::kI64Store: + ldaxr(result_reg.X(), actual_addr); + Cmp(result.gp().X(), Operand(expected.gp().X(), UXTX)); + B(ne, &done); + stlxr(store_result.W(), new_value.gp().X(), actual_addr); + break; + default: + UNREACHABLE(); + } + + Cbnz(store_result.W(), &retry); + Bind(&done); + + if (result_reg != result.gp()) { + mov(result.gp(), result_reg); + } } void LiftoffAssembler::AtomicFence() { Dmb(InnerShareable, BarrierAll); } @@ -439,7 +722,7 @@ void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src, Fmov(dst.D(), src.D()); } else { DCHECK_EQ(kWasmS128, type); - Fmov(dst.Q(), src.Q()); + Mov(dst.Q(), src.Q()); } } @@ -921,6 +1204,30 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, B(trap, ne); return true; } + case kExprI32SConvertSatF32: + Fcvtzs(dst.gp().W(), src.fp().S()); + return true; + case kExprI32UConvertSatF32: + Fcvtzu(dst.gp().W(), src.fp().S()); + return true; + case kExprI32SConvertSatF64: + Fcvtzs(dst.gp().W(), src.fp().D()); + return true; + case kExprI32UConvertSatF64: + Fcvtzu(dst.gp().W(), src.fp().D()); + return true; + case kExprI64SConvertSatF32: + Fcvtzs(dst.gp().X(), src.fp().S()); + return true; + case kExprI64UConvertSatF32: + Fcvtzu(dst.gp().X(), src.fp().S()); + return true; + case kExprI64SConvertSatF64: + Fcvtzs(dst.gp().X(), src.fp().D()); + return true; + case kExprI64UConvertSatF64: + Fcvtzu(dst.gp().X(), src.fp().D()); + return true; case kExprI32ReinterpretF32: Fmov(dst.gp().W(), src.fp().S()); return true; @@ -1102,6 +1409,70 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, } } +void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, + Register offset_reg, uint32_t offset_imm, + LoadType type, + LoadTransformationKind transform, + uint32_t* protected_load_pc) { + UseScratchRegisterScope temps(this); + MemOperand src_op = + liftoff::GetMemOp(this, &temps, src_addr, offset_reg, offset_imm); + *protected_load_pc = pc_offset(); + MachineType memtype = type.mem_type(); + + if (transform == LoadTransformationKind::kExtend) { + if (memtype == MachineType::Int8()) { + Ldr(dst.fp().D(), src_op); + Sxtl(dst.fp().V8H(), dst.fp().V8B()); + } else if (memtype == MachineType::Uint8()) { + Ldr(dst.fp().D(), src_op); + Uxtl(dst.fp().V8H(), dst.fp().V8B()); + } else if (memtype == MachineType::Int16()) { + Ldr(dst.fp().D(), src_op); + Sxtl(dst.fp().V4S(), dst.fp().V4H()); + } else if (memtype == MachineType::Uint16()) { + Ldr(dst.fp().D(), src_op); + Uxtl(dst.fp().V4S(), dst.fp().V4H()); + } else if (memtype == MachineType::Int32()) { + Ldr(dst.fp().D(), src_op); + Sxtl(dst.fp().V2D(), dst.fp().V2S()); + } else if (memtype == MachineType::Uint32()) { + Ldr(dst.fp().D(), src_op); + Uxtl(dst.fp().V2D(), dst.fp().V2S()); + } + } else { + // ld1r only allows no offset or post-index, so emit an add. + DCHECK_EQ(LoadTransformationKind::kSplat, transform); + if (src_op.IsRegisterOffset()) { + // We have 2 tmp gps, so it's okay to acquire 1 more here, and actually + // doesn't matter if we acquire the same one. + Register tmp = temps.AcquireX(); + Add(tmp, src_op.base(), src_op.regoffset().X()); + src_op = MemOperand(tmp.X(), 0); + } else if (src_op.IsImmediateOffset() && src_op.offset() != 0) { + Register tmp = temps.AcquireX(); + Add(tmp, src_op.base(), src_op.offset()); + src_op = MemOperand(tmp.X(), 0); + } + + if (memtype == MachineType::Int8()) { + ld1r(dst.fp().V16B(), src_op); + } else if (memtype == MachineType::Int16()) { + ld1r(dst.fp().V8H(), src_op); + } else if (memtype == MachineType::Int32()) { + ld1r(dst.fp().V4S(), src_op); + } else if (memtype == MachineType::Int64()) { + ld1r(dst.fp().V2D(), src_op); + } + } +} + +void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + Tbl(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B()); +} + void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, LiftoffRegister src) { Dup(dst.fp().V2D(), src.fp().D(), 0); @@ -1262,12 +1633,42 @@ void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { - bailout(kSimd, "i64x2_shl"); + liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>( + this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D); } void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { - bailout(kSimd, "i64x2_shli"); + Shl(dst.fp().V2D(), lhs.fp().V2D(), rhs & 63); +} + +void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight, + liftoff::ShiftSign::kSigned>( + this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D); +} + +void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftRightImmediate<kFormat2D, liftoff::ShiftSign::kSigned>( + this, dst.fp().V2D(), lhs.fp().V2D(), rhs); +} + +void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight, + liftoff::ShiftSign::kUnsigned>( + this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D); +} + +void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftRightImmediate<kFormat2D, + liftoff::ShiftSign::kUnsigned>( + this, dst.fp().V2D(), lhs.fp().V2D(), rhs); } void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, @@ -1327,14 +1728,69 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, Neg(dst.fp().V4S(), src.fp().V4S()); } +void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue(this, dst, src, kFormat4S); +} + +void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + VRegister tmp = temps.AcquireQ(); + VRegister mask = temps.AcquireQ(); + + Sshr(tmp.V4S(), src.fp().V4S(), 31); + // Set i-th bit of each lane i. When AND with tmp, the lanes that + // are signed will have i-th bit set, unsigned will be 0. + Movi(mask.V2D(), 0x0000'0008'0000'0004, 0x0000'0002'0000'0001); + And(tmp.V16B(), mask.V16B(), tmp.V16B()); + Addv(tmp.S(), tmp.V4S()); + Mov(dst.gp().W(), tmp.V4S(), 0); +} + void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { - bailout(kSimd, "i32x4_shl"); + liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>( + this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S); } void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { - bailout(kSimd, "i32x4_shli"); + Shl(dst.fp().V4S(), lhs.fp().V4S(), rhs & 31); +} + +void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight, + liftoff::ShiftSign::kSigned>( + this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S); +} + +void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftRightImmediate<kFormat4S, liftoff::ShiftSign::kSigned>( + this, dst.fp().V4S(), lhs.fp().V4S(), rhs); +} + +void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight, + liftoff::ShiftSign::kUnsigned>( + this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S); +} + +void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftRightImmediate<kFormat4S, + liftoff::ShiftSign::kUnsigned>( + this, dst.fp().V4S(), lhs.fp().V4S(), rhs); } void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, @@ -1408,14 +1864,69 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst, Neg(dst.fp().V8H(), src.fp().V8H()); } +void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue(this, dst, src, kFormat8H); +} + +void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + VRegister tmp = temps.AcquireQ(); + VRegister mask = temps.AcquireQ(); + + Sshr(tmp.V8H(), src.fp().V8H(), 15); + // Set i-th bit of each lane i. When AND with tmp, the lanes that + // are signed will have i-th bit set, unsigned will be 0. + Movi(mask.V2D(), 0x0080'0040'0020'0010, 0x0008'0004'0002'0001); + And(tmp.V16B(), mask.V16B(), tmp.V16B()); + Addv(tmp.H(), tmp.V8H()); + Mov(dst.gp().W(), tmp.V8H(), 0); +} + void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { - bailout(kSimd, "i16x8_shl"); + liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>( + this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H); } void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { - bailout(kSimd, "i16x8_shli"); + Shl(dst.fp().V8H(), lhs.fp().V8H(), rhs & 15); +} + +void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight, + liftoff::ShiftSign::kSigned>( + this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H); +} + +void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftRightImmediate<kFormat8H, liftoff::ShiftSign::kSigned>( + this, dst.fp().V8H(), lhs.fp().V8H(), rhs); +} + +void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight, + liftoff::ShiftSign::kUnsigned>( + this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H); +} + +void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftRightImmediate<kFormat8H, + liftoff::ShiftSign::kUnsigned>( + this, dst.fp().V8H(), lhs.fp().V8H(), rhs); } void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, @@ -1481,6 +1992,45 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst, Umax(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H()); } +void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs, + const uint8_t shuffle[16]) { + VRegister src1 = lhs.fp(); + VRegister src2 = rhs.fp(); + VRegister temp = dst.fp(); + if (dst == lhs || dst == rhs) { + // dst overlaps with lhs or rhs, so we need a temporary. + temp = GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(lhs, rhs)).fp(); + } + + UseScratchRegisterScope scope(this); + + if (src1 != src2 && !AreConsecutive(src1, src2)) { + // Tbl needs consecutive registers, which our scratch registers are. + src1 = scope.AcquireV(kFormat16B); + src2 = scope.AcquireV(kFormat16B); + DCHECK(AreConsecutive(src1, src2)); + Mov(src1.Q(), lhs.fp().Q()); + Mov(src2.Q(), rhs.fp().Q()); + } + + uint8_t mask = lhs == rhs ? 0x0F : 0x1F; + int64_t imms[2] = {0, 0}; + for (int i = 7; i >= 0; i--) { + imms[0] = (imms[0] << 8) | (shuffle[i] & mask); + imms[1] = (imms[1] << 8) | (shuffle[i + 8] & mask); + } + + Movi(temp.V16B(), imms[1], imms[0]); + + if (src1 == src2) { + Tbl(dst.fp().V16B(), src1.V16B(), temp.V16B()); + } else { + Tbl(dst.fp().V16B(), src1.V16B(), src2.V16B(), temp.V16B()); + } +} + void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src) { Dup(dst.fp().V16B(), src.gp().W()); @@ -1513,14 +2063,71 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst, Neg(dst.fp().V16B(), src.fp().V16B()); } +void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue(this, dst, src, kFormat16B); +} + +void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + VRegister tmp = temps.AcquireQ(); + VRegister mask = temps.AcquireQ(); + + // Set i-th bit of each lane i. When AND with tmp, the lanes that + // are signed will have i-th bit set, unsigned will be 0. + Sshr(tmp.V16B(), src.fp().V16B(), 7); + Movi(mask.V2D(), 0x8040'2010'0804'0201); + And(tmp.V16B(), mask.V16B(), tmp.V16B()); + Ext(mask.V16B(), tmp.V16B(), tmp.V16B(), 8); + Zip1(tmp.V16B(), tmp.V16B(), mask.V16B()); + Addv(tmp.H(), tmp.V8H()); + Mov(dst.gp().W(), tmp.V8H(), 0); +} + void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { - bailout(kSimd, "i8x16_shl"); + liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>( + this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B); } void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { - bailout(kSimd, "i8x16_shli"); + Shl(dst.fp().V16B(), lhs.fp().V16B(), rhs & 7); +} + +void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight, + liftoff::ShiftSign::kSigned>( + this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B); +} + +void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftRightImmediate<kFormat16B, liftoff::ShiftSign::kSigned>( + this, dst.fp().V16B(), lhs.fp().V16B(), rhs); +} + +void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight, + liftoff::ShiftSign::kUnsigned>( + this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B); +} + +void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftRightImmediate<kFormat16B, + liftoff::ShiftSign::kUnsigned>( + this, dst.fp().V16B(), lhs.fp().V16B(), rhs); } void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, @@ -1750,6 +2357,26 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst, Bsl(dst.fp().V16B(), src1.fp().V16B(), src2.fp().V16B()); } +void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + Fcvtzs(dst.fp().V4S(), src.fp().V4S()); +} + +void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + Fcvtzu(dst.fp().V4S(), src.fp().V4S()); +} + +void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + Scvtf(dst.fp().V4S(), src.fp().V4S()); +} + +void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + Ucvtf(dst.fp().V4S(), src.fp().V4S()); +} + void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { diff --git a/chromium/v8/src/wasm/baseline/ia32/liftoff-assembler-ia32.h b/chromium/v8/src/wasm/baseline/ia32/liftoff-assembler-ia32.h index 7a1d629bf2d..468450aef66 100644 --- a/chromium/v8/src/wasm/baseline/ia32/liftoff-assembler-ia32.h +++ b/chromium/v8/src/wasm/baseline/ia32/liftoff-assembler-ia32.h @@ -130,7 +130,7 @@ inline Register GetTmpByteRegister(LiftoffAssembler* assm, Register candidate) { if (candidate.is_byte_register()) return candidate; // {GetUnusedRegister()} may insert move instructions to spill registers to // the stack. This is OK because {mov} does not change the status flags. - return assm->GetUnusedRegister(liftoff::kByteRegs).gp(); + return assm->GetUnusedRegister(liftoff::kByteRegs, {}).gp(); } inline void MoveStackValue(LiftoffAssembler* assm, const Operand& src, @@ -336,8 +336,6 @@ void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr, case LoadType::kS128Load: movdqu(dst.fp(), src_op); break; - default: - UNREACHABLE(); } } @@ -405,8 +403,6 @@ void LiftoffAssembler::Store(Register dst_addr, Register offset_reg, case StoreType::kS128Store: Movdqu(dst_op, src.fp()); break; - default: - UNREACHABLE(); } } @@ -494,7 +490,56 @@ void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg, void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type) { - bailout(kAtomics, "AtomicAdd"); + if (type.value() == StoreType::kI64Store) { + bailout(kAtomics, "AtomicAdd"); + return; + } + + DCHECK_EQ(value, result); + DCHECK(!cache_state()->is_used(result)); + bool is_64_bit_op = type.value_type() == kWasmI64; + + Register value_reg = is_64_bit_op ? value.low_gp() : value.gp(); + Register result_reg = is_64_bit_op ? result.low_gp() : result.gp(); + + bool is_byte_store = type.size() == 1; + LiftoffRegList pinned = + LiftoffRegList::ForRegs(dst_addr, value_reg, offset_reg); + + // Ensure that {value_reg} is a valid register. + if (is_byte_store && !liftoff::kByteRegs.has(value_reg)) { + Register safe_value_reg = + GetUnusedRegister(liftoff::kByteRegs, pinned).gp(); + mov(safe_value_reg, value_reg); + value_reg = safe_value_reg; + } + + Operand dst_op = Operand(dst_addr, offset_reg, times_1, offset_imm); + lock(); + switch (type.value()) { + case StoreType::kI64Store8: + case StoreType::kI32Store8: + xadd_b(dst_op, value_reg); + movzx_b(result_reg, value_reg); + break; + case StoreType::kI64Store16: + case StoreType::kI32Store16: + xadd_w(dst_op, value_reg); + movzx_w(result_reg, value_reg); + break; + case StoreType::kI64Store32: + case StoreType::kI32Store: + xadd(dst_op, value_reg); + if (value_reg != result_reg) { + mov(result_reg, value_reg); + } + break; + default: + UNREACHABLE(); + } + if (is_64_bit_op) { + xor_(result.high_gp(), result.high_gp()); + } } void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg, @@ -1349,7 +1394,7 @@ inline void EmitFloatMinOrMax(LiftoffAssembler* assm, DoubleRegister dst, // We need one tmp register to extract the sign bit. Get it right at the // beginning, such that the spilling code is not accidentially jumped over. - Register tmp = assm->GetUnusedRegister(kGpReg).gp(); + Register tmp = assm->GetUnusedRegister(kGpReg, {}).gp(); #define dop(name, ...) \ do { \ @@ -1412,9 +1457,9 @@ void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs, void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs, DoubleRegister rhs) { static constexpr int kF32SignBit = 1 << 31; - Register scratch = GetUnusedRegister(kGpReg).gp(); - Register scratch2 = - GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(scratch)).gp(); + LiftoffRegList pinned; + Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp(); + Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp(); Movd(scratch, lhs); // move {lhs} into {scratch}. and_(scratch, Immediate(~kF32SignBit)); // clear sign bit in {scratch}. Movd(scratch2, rhs); // move {rhs} into {scratch2}. @@ -1541,9 +1586,9 @@ void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs, static constexpr int kF32SignBit = 1 << 31; // On ia32, we cannot hold the whole f64 value in a gp register, so we just // operate on the upper half (UH). - Register scratch = GetUnusedRegister(kGpReg).gp(); - Register scratch2 = - GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(scratch)).gp(); + LiftoffRegList pinned; + Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp(); + Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp(); Pextrd(scratch, lhs, 1); // move UH of {lhs} into {scratch}. and_(scratch, Immediate(~kF32SignBit)); // clear sign bit in {scratch}. @@ -1612,6 +1657,7 @@ void LiftoffAssembler::emit_f64_sqrt(DoubleRegister dst, DoubleRegister src) { } namespace liftoff { +#define __ assm-> // Used for float to int conversions. If the value in {converted_back} equals // {src} afterwards, the conversion succeeded. template <typename dst_type, typename src_type> @@ -1621,21 +1667,21 @@ inline void ConvertFloatToIntAndBack(LiftoffAssembler* assm, Register dst, LiftoffRegList pinned) { if (std::is_same<double, src_type>::value) { // f64 if (std::is_signed<dst_type>::value) { // f64 -> i32 - assm->cvttsd2si(dst, src); - assm->Cvtsi2sd(converted_back, dst); + __ cvttsd2si(dst, src); + __ Cvtsi2sd(converted_back, dst); } else { // f64 -> u32 - assm->Cvttsd2ui(dst, src, liftoff::kScratchDoubleReg); - assm->Cvtui2sd(converted_back, dst, - assm->GetUnusedRegister(kGpReg, pinned).gp()); + __ Cvttsd2ui(dst, src, liftoff::kScratchDoubleReg); + __ Cvtui2sd(converted_back, dst, + __ GetUnusedRegister(kGpReg, pinned).gp()); } } else { // f32 if (std::is_signed<dst_type>::value) { // f32 -> i32 - assm->cvttss2si(dst, src); - assm->Cvtsi2ss(converted_back, dst); + __ cvttss2si(dst, src); + __ Cvtsi2ss(converted_back, dst); } else { // f32 -> u32 - assm->Cvttss2ui(dst, src, liftoff::kScratchDoubleReg); - assm->Cvtui2ss(converted_back, dst, - assm->GetUnusedRegister(kGpReg, pinned).gp()); + __ Cvttss2ui(dst, src, liftoff::kScratchDoubleReg); + __ Cvtui2ss(converted_back, dst, + __ GetUnusedRegister(kGpReg, pinned).gp()); } } } @@ -1644,36 +1690,101 @@ template <typename dst_type, typename src_type> inline bool EmitTruncateFloatToInt(LiftoffAssembler* assm, Register dst, DoubleRegister src, Label* trap) { if (!CpuFeatures::IsSupported(SSE4_1)) { - assm->bailout(kMissingCPUFeature, "no SSE4.1"); + __ bailout(kMissingCPUFeature, "no SSE4.1"); return true; } CpuFeatureScope feature(assm, SSE4_1); LiftoffRegList pinned = LiftoffRegList::ForRegs(src, dst); DoubleRegister rounded = - pinned.set(assm->GetUnusedRegister(kFpReg, pinned)).fp(); + pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp(); DoubleRegister converted_back = - pinned.set(assm->GetUnusedRegister(kFpReg, pinned)).fp(); + pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp(); if (std::is_same<double, src_type>::value) { // f64 - assm->roundsd(rounded, src, kRoundToZero); + __ roundsd(rounded, src, kRoundToZero); } else { // f32 - assm->roundss(rounded, src, kRoundToZero); + __ roundss(rounded, src, kRoundToZero); } ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded, converted_back, pinned); if (std::is_same<double, src_type>::value) { // f64 - assm->ucomisd(converted_back, rounded); + __ ucomisd(converted_back, rounded); } else { // f32 - assm->ucomiss(converted_back, rounded); + __ ucomiss(converted_back, rounded); } // Jump to trap if PF is 0 (one of the operands was NaN) or they are not // equal. - assm->j(parity_even, trap); - assm->j(not_equal, trap); + __ j(parity_even, trap); + __ j(not_equal, trap); return true; } + +template <typename dst_type, typename src_type> +inline bool EmitSatTruncateFloatToInt(LiftoffAssembler* assm, Register dst, + DoubleRegister src) { + if (!CpuFeatures::IsSupported(SSE4_1)) { + __ bailout(kMissingCPUFeature, "no SSE4.1"); + return true; + } + CpuFeatureScope feature(assm, SSE4_1); + + Label done; + Label not_nan; + Label src_positive; + + LiftoffRegList pinned = LiftoffRegList::ForRegs(src, dst); + DoubleRegister rounded = + pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp(); + DoubleRegister converted_back = + pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp(); + DoubleRegister zero_reg = + pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp(); + + if (std::is_same<double, src_type>::value) { // f64 + __ roundsd(rounded, src, kRoundToZero); + } else { // f32 + __ roundss(rounded, src, kRoundToZero); + } + + ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded, + converted_back, pinned); + if (std::is_same<double, src_type>::value) { // f64 + __ ucomisd(converted_back, rounded); + } else { // f32 + __ ucomiss(converted_back, rounded); + } + + // Return 0 if PF is 0 (one of the operands was NaN) + __ j(parity_odd, ¬_nan); + __ xor_(dst, dst); + __ jmp(&done); + + __ bind(¬_nan); + // If rounding is as expected, return result + __ j(equal, &done); + + __ Xorpd(zero_reg, zero_reg); + + // if out-of-bounds, check if src is positive + if (std::is_same<double, src_type>::value) { // f64 + __ ucomisd(src, zero_reg); + } else { // f32 + __ ucomiss(src, zero_reg); + } + __ j(above, &src_positive); + __ mov(dst, Immediate(std::numeric_limits<dst_type>::min())); + __ jmp(&done); + + __ bind(&src_positive); + + __ mov(dst, Immediate(std::numeric_limits<dst_type>::max())); + + __ bind(&done); + return true; +} +#undef __ } // namespace liftoff bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, @@ -1695,6 +1806,18 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, case kExprI32UConvertF64: return liftoff::EmitTruncateFloatToInt<uint32_t, double>(this, dst.gp(), src.fp(), trap); + case kExprI32SConvertSatF32: + return liftoff::EmitSatTruncateFloatToInt<int32_t, float>(this, dst.gp(), + src.fp()); + case kExprI32UConvertSatF32: + return liftoff::EmitSatTruncateFloatToInt<uint32_t, float>(this, dst.gp(), + src.fp()); + case kExprI32SConvertSatF64: + return liftoff::EmitSatTruncateFloatToInt<int32_t, double>(this, dst.gp(), + src.fp()); + case kExprI32UConvertSatF64: + return liftoff::EmitSatTruncateFloatToInt<uint32_t, double>( + this, dst.gp(), src.fp()); case kExprI32ReinterpretF32: Movd(dst.gp(), src.fp()); return true; @@ -2017,8 +2140,164 @@ void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst, (assm->*sse_op)(dst.fp(), shift); } } + +enum class ShiftSignedness { kSigned, kUnsigned }; + +template <bool is_signed> +void EmitI8x16Shr(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister lhs, LiftoffRegister rhs) { + // Same algorithm is used for both signed and unsigned shifts, the only + // difference is the actual shift and pack in the end. This is the same + // algorithm as used in code-generator-ia32.cc + Register tmp = + assm->GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(rhs)).gp(); + XMMRegister tmp_simd = + assm->GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dst, lhs)).fp(); + + // Unpack the bytes into words, do logical shifts, and repack. + assm->Punpckhbw(liftoff::kScratchDoubleReg, lhs.fp()); + assm->Punpcklbw(dst.fp(), lhs.fp()); + assm->mov(tmp, rhs.gp()); + // Take shift value modulo 8. + assm->and_(tmp, 7); + assm->add(tmp, Immediate(8)); + assm->Movd(tmp_simd, tmp); + if (is_signed) { + assm->Psraw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, + tmp_simd); + assm->Psraw(dst.fp(), dst.fp(), tmp_simd); + assm->Packsswb(dst.fp(), liftoff::kScratchDoubleReg); + } else { + assm->Psrlw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, + tmp_simd); + assm->Psrlw(dst.fp(), dst.fp(), tmp_simd); + assm->Packuswb(dst.fp(), liftoff::kScratchDoubleReg); + } +} + +inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister src) { + Register tmp = + assm->GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(dst)).gp(); + assm->xor_(tmp, tmp); + assm->mov(dst.gp(), Immediate(1)); + assm->Ptest(src.fp(), src.fp()); + assm->cmov(zero, dst.gp(), tmp); +} + +template <void (TurboAssembler::*pcmp)(XMMRegister, XMMRegister)> +inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister src) { + Register tmp = + assm->GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(dst)).gp(); + XMMRegister tmp_simd = liftoff::kScratchDoubleReg; + assm->mov(tmp, Immediate(1)); + assm->xor_(dst.gp(), dst.gp()); + assm->Pxor(tmp_simd, tmp_simd); + (assm->*pcmp)(tmp_simd, src.fp()); + assm->Ptest(tmp_simd, tmp_simd); + assm->cmov(zero, dst.gp(), tmp); +} + } // namespace liftoff +void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, + Register offset_reg, uint32_t offset_imm, + LoadType type, + LoadTransformationKind transform, + uint32_t* protected_load_pc) { + DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max()); + Operand src_op{src_addr, offset_reg, times_1, + static_cast<int32_t>(offset_imm)}; + *protected_load_pc = pc_offset(); + + MachineType memtype = type.mem_type(); + if (transform == LoadTransformationKind::kExtend) { + if (memtype == MachineType::Int8()) { + Pmovsxbw(dst.fp(), src_op); + } else if (memtype == MachineType::Uint8()) { + Pmovzxbw(dst.fp(), src_op); + } else if (memtype == MachineType::Int16()) { + Pmovsxwd(dst.fp(), src_op); + } else if (memtype == MachineType::Uint16()) { + Pmovzxwd(dst.fp(), src_op); + } else if (memtype == MachineType::Int32()) { + Pmovsxdq(dst.fp(), src_op); + } else if (memtype == MachineType::Uint32()) { + Pmovzxdq(dst.fp(), src_op); + } + } else { + DCHECK_EQ(LoadTransformationKind::kSplat, transform); + if (memtype == MachineType::Int8()) { + Pinsrb(dst.fp(), src_op, 0); + Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); + Pshufb(dst.fp(), liftoff::kScratchDoubleReg); + } else if (memtype == MachineType::Int16()) { + Pinsrw(dst.fp(), src_op, 0); + Pshuflw(dst.fp(), dst.fp(), uint8_t{0}); + Punpcklqdq(dst.fp(), dst.fp()); + } else if (memtype == MachineType::Int32()) { + Vbroadcastss(dst.fp(), src_op); + } else if (memtype == MachineType::Int64()) { + Movddup(dst.fp(), src_op); + } + } +} + +void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs, + const uint8_t shuffle[16]) { + LiftoffRegister tmp = GetUnusedRegister(kGpReg, {}); + // Prepare 16 byte aligned buffer for shuffle control mask. + mov(tmp.gp(), esp); + and_(esp, -16); + movups(liftoff::kScratchDoubleReg, lhs.fp()); + + for (int i = 3; i >= 0; i--) { + uint32_t mask = 0; + for (int j = 3; j >= 0; j--) { + uint8_t lane = shuffle[i * 4 + j]; + mask <<= 8; + mask |= lane < kSimd128Size ? lane : 0x80; + } + push(Immediate(mask)); + } + Pshufb(liftoff::kScratchDoubleReg, Operand(esp, 0)); + + for (int i = 3; i >= 0; i--) { + uint32_t mask = 0; + for (int j = 3; j >= 0; j--) { + uint8_t lane = shuffle[i * 4 + j]; + mask <<= 8; + mask |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80; + } + push(Immediate(mask)); + } + if (dst.fp() != rhs.fp()) { + movups(dst.fp(), rhs.fp()); + } + Pshufb(dst.fp(), Operand(esp, 0)); + Por(dst.fp(), liftoff::kScratchDoubleReg); + mov(esp, tmp.gp()); +} + +void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + XMMRegister mask = + GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(lhs, rhs)).fp(); + // Out-of-range indices should return 0, add 112 (0x70) so that any value > 15 + // saturates to 128 (top bit set), so pshufb will zero that lane. + TurboAssembler::Move(mask, uint32_t{0x70707070}); + Pshufd(mask, mask, uint8_t{0x0}); + Paddusb(mask, rhs.fp()); + if (lhs != dst) { + Movaps(dst.fp(), lhs.fp()); + } + Pshufb(dst.fp(), mask); +} + void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src) { Movd(dst.fp(), src.gp()); @@ -2350,6 +2629,21 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst, } } +void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqb>(this, dst, src); +} + +void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + Pmovmskb(dst.gp(), src.fp()); +} + void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { static constexpr RegClass tmp_rc = reg_class_for(ValueType::kI32); @@ -2381,7 +2675,7 @@ void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { static constexpr RegClass tmp_rc = reg_class_for(ValueType::kI32); - LiftoffRegister tmp = GetUnusedRegister(tmp_rc); + LiftoffRegister tmp = GetUnusedRegister(tmp_rc, {}); byte shift = static_cast<byte>(rhs & 0x7); if (CpuFeatures::IsSupported(AVX)) { CpuFeatureScope scope(this, AVX); @@ -2399,6 +2693,43 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, Pand(dst.fp(), liftoff::kScratchDoubleReg); } +void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitI8x16Shr</*is_signed=*/true>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + Punpckhbw(liftoff::kScratchDoubleReg, lhs.fp()); + Punpcklbw(dst.fp(), lhs.fp()); + uint8_t shift = (rhs & 7) + 8; + Psraw(liftoff::kScratchDoubleReg, shift); + Psraw(dst.fp(), shift); + Packsswb(dst.fp(), liftoff::kScratchDoubleReg); +} + +void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitI8x16Shr</*is_signed=*/false>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + Register tmp = GetUnusedRegister(kGpReg, {}).gp(); + // Perform 16-bit shift, then mask away high bits. + uint8_t shift = rhs & 7; + Psrlw(dst.fp(), lhs.fp(), byte{shift}); + + uint8_t bmask = 0xff >> shift; + uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask; + mov(tmp, mask); + Movd(liftoff::kScratchDoubleReg, tmp); + Pshufd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, 0); + Pand(dst.fp(), liftoff::kScratchDoubleReg); +} + void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>( @@ -2541,6 +2872,24 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst, } } +void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqw>(this, dst, src); +} + +void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + XMMRegister tmp = liftoff::kScratchDoubleReg; + Packsswb(tmp, src.fp()); + Pmovmskb(dst.gp(), tmp); + shr(dst.gp(), 8); +} + void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(this, dst, @@ -2553,6 +2902,32 @@ void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, this, dst, lhs, rhs); } +void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsraw, &Assembler::psraw, 4>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsraw, &Assembler::psraw, 4>( + this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsrlw, &Assembler::psrlw, 4>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlw, &Assembler::psrlw, 4>( + this, dst, lhs, rhs); +} + void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>( @@ -2639,6 +3014,21 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, } } +void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqd>(this, dst, src); +} + +void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + Movmskps(dst.gp(), src.fp()); +} + void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(this, dst, @@ -2651,6 +3041,32 @@ void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, this, dst, lhs, rhs); } +void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsrad, &Assembler::psrad, 5>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsrad, &Assembler::psrad, 5>( + this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsrld, &Assembler::psrld, 5>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsrld, &Assembler::psrld, 5>( + this, dst, lhs, rhs); +} + void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>( @@ -2723,6 +3139,56 @@ void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, this, dst, lhs, rhs); } +void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + XMMRegister shift = liftoff::kScratchDoubleReg; + XMMRegister tmp = + GetUnusedRegister(RegClass::kFpReg, LiftoffRegList::ForRegs(dst, lhs)) + .fp(); + + // Take shift value modulo 64. + and_(rhs.gp(), Immediate(63)); + Movd(shift, rhs.gp()); + + // Set up a mask [0x80000000,0,0x80000000,0]. + Pcmpeqb(tmp, tmp); + Psllq(tmp, tmp, 63); + + Psrlq(tmp, tmp, shift); + Psrlq(dst.fp(), lhs.fp(), shift); + Pxor(dst.fp(), tmp); + Psubq(dst.fp(), tmp); +} + +void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + XMMRegister tmp = liftoff::kScratchDoubleReg; + int32_t shift = rhs & 63; + + // Set up a mask [0x80000000,0,0x80000000,0]. + Pcmpeqb(tmp, tmp); + Psllq(tmp, tmp, 63); + + Psrlq(tmp, tmp, shift); + Psrlq(dst.fp(), lhs.fp(), shift); + Pxor(dst.fp(), tmp); + Psubq(dst.fp(), tmp); +} + +void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsrlq, &Assembler::psrlq, 6>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>( + this, dst, lhs, rhs); +} + void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>( @@ -2990,6 +3456,97 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs, Andnpd(dst.fp(), liftoff::kScratchDoubleReg); } +void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + // NAN->0 + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vcmpeqps(liftoff::kScratchDoubleReg, src.fp(), src.fp()); + vpand(dst.fp(), src.fp(), liftoff::kScratchDoubleReg); + } else { + movaps(liftoff::kScratchDoubleReg, src.fp()); + cmpeqps(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); + if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp()); + pand(dst.fp(), liftoff::kScratchDoubleReg); + } + // Set top bit if >= 0 (but not -0.0!). + Pxor(liftoff::kScratchDoubleReg, dst.fp()); + // Convert to int. + Cvttps2dq(dst.fp(), dst.fp()); + // Set top bit if >=0 is now < 0. + Pand(liftoff::kScratchDoubleReg, dst.fp()); + Psrad(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{31}); + // Set positive overflow lanes to 0x7FFFFFFF. + Pxor(dst.fp(), liftoff::kScratchDoubleReg); +} + +void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + static constexpr RegClass tmp_rc = reg_class_for(ValueType::kS128); + DoubleRegister tmp = + GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, src)).fp(); + // NAN->0, negative->0. + Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vmaxps(dst.fp(), src.fp(), liftoff::kScratchDoubleReg); + } else { + if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp()); + maxps(dst.fp(), liftoff::kScratchDoubleReg); + } + // scratch: float representation of max_signed. + Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); + Psrld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, + uint8_t{1}); // 0x7fffffff + Cvtdq2ps(liftoff::kScratchDoubleReg, + liftoff::kScratchDoubleReg); // 0x4f000000 + // tmp: convert (src-max_signed). + // Set positive overflow lanes to 0x7FFFFFFF. + // Set negative lanes to 0. + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vsubps(tmp, dst.fp(), liftoff::kScratchDoubleReg); + } else { + movaps(tmp, dst.fp()); + subps(tmp, liftoff::kScratchDoubleReg); + } + Cmpleps(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, tmp); + Cvttps2dq(tmp, tmp); + Pxor(tmp, liftoff::kScratchDoubleReg); + Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); + Pmaxsd(tmp, liftoff::kScratchDoubleReg); + // Convert to int. Overflow lanes above max_signed will be 0x80000000. + Cvttps2dq(dst.fp(), dst.fp()); + // Add (src-max_signed) for overflow lanes. + Paddd(dst.fp(), dst.fp(), tmp); +} + +void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + Cvtdq2ps(dst.fp(), src.fp()); +} + +void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); // Zeros. + Pblendw(liftoff::kScratchDoubleReg, src.fp(), + uint8_t{0x55}); // Get lo 16 bits. + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vpsubd(dst.fp(), src.fp(), liftoff::kScratchDoubleReg); // Get hi 16 bits. + } else { + if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp()); + psubd(dst.fp(), liftoff::kScratchDoubleReg); + } + Cvtdq2ps(liftoff::kScratchDoubleReg, + liftoff::kScratchDoubleReg); // Convert lo exactly. + Psrld(dst.fp(), dst.fp(), byte{1}); // Divide by 2 to get in unsigned range. + Cvtdq2ps(dst.fp(), dst.fp()); // Convert hi, exactly. + Addps(dst.fp(), dst.fp(), dst.fp()); // Double hi, exactly. + Addps(dst.fp(), dst.fp(), + liftoff::kScratchDoubleReg); // Add hi and lo, may round. +} + void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { @@ -3270,7 +3827,7 @@ void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { } void LiftoffAssembler::CallTrapCallbackForTesting() { - PrepareCallCFunction(0, GetUnusedRegister(kGpReg).gp()); + PrepareCallCFunction(0, GetUnusedRegister(kGpReg, {}).gp()); CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0); } diff --git a/chromium/v8/src/wasm/baseline/liftoff-assembler.cc b/chromium/v8/src/wasm/baseline/liftoff-assembler.cc index 923d375064c..a8b40a7b462 100644 --- a/chromium/v8/src/wasm/baseline/liftoff-assembler.cc +++ b/chromium/v8/src/wasm/baseline/liftoff-assembler.cc @@ -82,35 +82,35 @@ class StackTransferRecipe { DCHECK(load_dst_regs_.is_empty()); } - void TransferStackSlot(const VarState& dst, const VarState& src) { + V8_INLINE void TransferStackSlot(const VarState& dst, const VarState& src) { DCHECK_EQ(dst.type(), src.type()); - switch (dst.loc()) { + if (dst.is_reg()) { + LoadIntoRegister(dst.reg(), src, src.offset()); + return; + } + if (dst.is_const()) { + DCHECK_EQ(dst.i32_const(), src.i32_const()); + return; + } + DCHECK(dst.is_stack()); + switch (src.loc()) { case VarState::kStack: - switch (src.loc()) { - case VarState::kStack: - if (src.offset() == dst.offset()) break; - asm_->MoveStackValue(dst.offset(), src.offset(), src.type()); - break; - case VarState::kRegister: - asm_->Spill(dst.offset(), src.reg(), src.type()); - break; - case VarState::kIntConst: - asm_->Spill(dst.offset(), src.constant()); - break; + if (src.offset() != dst.offset()) { + asm_->MoveStackValue(dst.offset(), src.offset(), src.type()); } break; case VarState::kRegister: - LoadIntoRegister(dst.reg(), src, src.offset()); + asm_->Spill(dst.offset(), src.reg(), src.type()); break; case VarState::kIntConst: - DCHECK_EQ(dst, src); + asm_->Spill(dst.offset(), src.constant()); break; } } - void LoadIntoRegister(LiftoffRegister dst, - const LiftoffAssembler::VarState& src, - uint32_t src_offset) { + V8_INLINE void LoadIntoRegister(LiftoffRegister dst, + const LiftoffAssembler::VarState& src, + uint32_t src_offset) { switch (src.loc()) { case VarState::kStack: LoadStackSlot(dst, src_offset, src.type()); @@ -466,7 +466,7 @@ void LiftoffAssembler::CacheState::InitMerge(const CacheState& source, // they do not move). Try to keep register in registers, but avoid duplicates. InitMergeRegion(this, source_begin, target_begin, num_locals, kKeepStackSlots, kConstantsNotAllowed, kNoReuseRegisters, used_regs); - // Sanity check: All the {used_regs} are really in use now. + // Consistency check: All the {used_regs} are really in use now. DCHECK_EQ(used_regs, used_registers & used_regs); // Last, initialize the section in between. Here, constants are allowed, but @@ -510,24 +510,15 @@ LiftoffAssembler::~LiftoffAssembler() { LiftoffRegister LiftoffAssembler::LoadToRegister(VarState slot, LiftoffRegList pinned) { - switch (slot.loc()) { - case VarState::kStack: { - LiftoffRegister reg = - GetUnusedRegister(reg_class_for(slot.type()), pinned); - Fill(reg, slot.offset(), slot.type()); - return reg; - } - case VarState::kRegister: - return slot.reg(); - case VarState::kIntConst: { - RegClass rc = - kNeedI64RegPair && slot.type() == kWasmI64 ? kGpRegPair : kGpReg; - LiftoffRegister reg = GetUnusedRegister(rc, pinned); - LoadConstant(reg, slot.constant()); - return reg; - } + if (slot.is_reg()) return slot.reg(); + LiftoffRegister reg = GetUnusedRegister(reg_class_for(slot.type()), pinned); + if (slot.is_const()) { + LoadConstant(reg, slot.constant()); + } else { + DCHECK(slot.is_stack()); + Fill(reg, slot.offset(), slot.type()); } - UNREACHABLE(); + return reg; } LiftoffRegister LiftoffAssembler::LoadI64HalfIntoRegister(VarState slot, @@ -535,7 +526,7 @@ LiftoffRegister LiftoffAssembler::LoadI64HalfIntoRegister(VarState slot, if (slot.is_reg()) { return half == kLowWord ? slot.reg().low() : slot.reg().high(); } - LiftoffRegister dst = GetUnusedRegister(kGpReg); + LiftoffRegister dst = GetUnusedRegister(kGpReg, {}); if (slot.is_stack()) { FillI64Half(dst.gp(), slot.offset(), half); return dst; @@ -548,33 +539,39 @@ LiftoffRegister LiftoffAssembler::LoadI64HalfIntoRegister(VarState slot, return dst; } -LiftoffRegister LiftoffAssembler::PopToRegister(LiftoffRegList pinned) { - DCHECK(!cache_state_.stack_state.empty()); - VarState slot = cache_state_.stack_state.back(); - if (slot.is_reg()) cache_state_.dec_used(slot.reg()); - cache_state_.stack_state.pop_back(); - return LoadToRegister(slot, pinned); -} - LiftoffRegister LiftoffAssembler::PeekToRegister(int index, LiftoffRegList pinned) { DCHECK_LT(index, cache_state_.stack_state.size()); VarState& slot = cache_state_.stack_state.end()[-1 - index]; - if (slot.is_reg()) cache_state_.dec_used(slot.reg()); - LiftoffRegister reg = LoadToRegister(slot, pinned); - if (!slot.is_reg()) { - slot.MakeRegister(reg); + if (slot.is_reg()) { + cache_state_.dec_used(slot.reg()); + return slot.reg(); } + LiftoffRegister reg = LoadToRegister(slot, pinned); + slot.MakeRegister(reg); return reg; } void LiftoffAssembler::PrepareLoopArgs(int num) { for (int i = 0; i < num; ++i) { VarState& slot = cache_state_.stack_state.end()[-1 - i]; - if (!slot.is_const()) continue; - RegClass rc = - kNeedI64RegPair && slot.type() == kWasmI64 ? kGpRegPair : kGpReg; - LiftoffRegister reg = GetUnusedRegister(rc); + if (slot.is_stack()) continue; + RegClass rc = reg_class_for(slot.type()); + if (slot.is_reg()) { + if (cache_state_.get_use_count(slot.reg()) > 1) { + // If the register is used more than once, we cannot use it for the + // merge. Move it to an unused register instead. + LiftoffRegList pinned; + pinned.set(slot.reg()); + LiftoffRegister dst_reg = GetUnusedRegister(rc, pinned); + Move(dst_reg, slot.reg(), slot.type()); + cache_state_.dec_used(slot.reg()); + cache_state_.inc_used(dst_reg); + slot.MakeRegister(dst_reg); + } + continue; + } + LiftoffRegister reg = GetUnusedRegister(rc, {}); LoadConstant(reg, slot.constant()); slot.MakeRegister(reg); cache_state_.inc_used(reg); @@ -724,6 +721,8 @@ void LiftoffAssembler::PrepareBuiltinCall( PrepareStackTransfers(sig, call_descriptor, params.begin(), &stack_slots, &stack_transfers, ¶m_regs); // Create all the slots. + // Builtin stack parameters are pushed in reversed order. + stack_slots.Reverse(); stack_slots.Construct(); // Execute the stack transfers before filling the instance register. stack_transfers.Execute(); @@ -742,13 +741,14 @@ void LiftoffAssembler::PrepareCall(const FunctionSig* sig, constexpr size_t kInputShift = 1; // Spill all cache slots which are not being used as parameters. - // Don't update any register use counters, they will be reset later anyway. - for (uint32_t idx = 0, end = cache_state_.stack_height() - num_params; - idx < end; ++idx) { - VarState& slot = cache_state_.stack_state[idx]; - if (!slot.is_reg()) continue; - Spill(slot.offset(), slot.reg(), slot.type()); - slot.MakeStack(); + for (VarState* it = cache_state_.stack_state.end() - 1 - num_params; + it >= cache_state_.stack_state.begin() && + !cache_state_.used_registers.is_empty(); + --it) { + if (!it->is_reg()) continue; + Spill(it->offset(), it->reg(), it->type()); + cache_state_.dec_used(it->reg()); + it->MakeStack(); } LiftoffStackSlots stack_slots(this); @@ -859,6 +859,10 @@ void LiftoffAssembler::Move(LiftoffRegister dst, LiftoffRegister src, // Use the {StackTransferRecipe} to move pairs, as the registers in the // pairs might overlap. StackTransferRecipe(this).MoveRegister(dst, src, type); + } else if (kNeedS128RegPair && dst.is_fp_pair()) { + // Calling low_fp is fine, Move will automatically check the type and + // convert this FP to its SIMD register, and use a SIMD move. + Move(dst.low_fp(), src.low_fp(), type); } else if (dst.is_gp()) { Move(dst.gp(), src.gp(), type); } else { diff --git a/chromium/v8/src/wasm/baseline/liftoff-assembler.h b/chromium/v8/src/wasm/baseline/liftoff-assembler.h index 3377990496f..aad75b18597 100644 --- a/chromium/v8/src/wasm/baseline/liftoff-assembler.h +++ b/chromium/v8/src/wasm/baseline/liftoff-assembler.h @@ -56,20 +56,6 @@ class LiftoffAssembler : public TurboAssembler { DCHECK(type_ == kWasmI32 || type_ == kWasmI64); } - bool operator==(const VarState& other) const { - if (loc_ != other.loc_) return false; - if (type_ != other.type_) return false; - switch (loc_) { - case kStack: - return true; - case kRegister: - return reg_ == other.reg_; - case kIntConst: - return i32_const_ == other.i32_const_; - } - UNREACHABLE(); - } - bool is_stack() const { return loc_ == kStack; } bool is_gp_reg() const { return loc_ == kRegister && reg_.is_gp(); } bool is_fp_reg() const { return loc_ == kRegister && reg_.is_fp(); } @@ -140,6 +126,8 @@ class LiftoffAssembler : public TurboAssembler { CacheState() = default; CacheState(CacheState&&) V8_NOEXCEPT = default; CacheState& operator=(CacheState&&) V8_NOEXCEPT = default; + // Disallow copy construction. + CacheState(const CacheState&) = delete; base::SmallVector<VarState, 8> stack_state; LiftoffRegList used_registers; @@ -277,14 +265,23 @@ class LiftoffAssembler : public TurboAssembler { private: // Make the copy assignment operator private (to be used from {Split()}). CacheState& operator=(const CacheState&) V8_NOEXCEPT = default; - // Disallow copy construction. - CacheState(const CacheState&) = delete; }; explicit LiftoffAssembler(std::unique_ptr<AssemblerBuffer>); ~LiftoffAssembler() override; - LiftoffRegister PopToRegister(LiftoffRegList pinned = {}); + LiftoffRegister LoadToRegister(VarState slot, LiftoffRegList pinned); + + LiftoffRegister PopToRegister(LiftoffRegList pinned = {}) { + DCHECK(!cache_state_.stack_state.empty()); + VarState slot = cache_state_.stack_state.back(); + cache_state_.stack_state.pop_back(); + if (slot.is_reg()) { + cache_state_.dec_used(slot.reg()); + return slot.reg(); + } + return LoadToRegister(slot, pinned); + } // Returns the register which holds the value of stack slot {index}. If the // value is not stored in a register yet, a register is allocated for it. The @@ -340,7 +337,7 @@ class LiftoffAssembler : public TurboAssembler { // possible. LiftoffRegister GetUnusedRegister( RegClass rc, std::initializer_list<LiftoffRegister> try_first, - LiftoffRegList pinned = {}) { + LiftoffRegList pinned) { for (LiftoffRegister reg : try_first) { DCHECK_EQ(reg.reg_class(), rc); if (cache_state_.is_free(reg)) return reg; @@ -349,7 +346,7 @@ class LiftoffAssembler : public TurboAssembler { } // Get an unused register for class {rc}, potentially spilling to free one. - LiftoffRegister GetUnusedRegister(RegClass rc, LiftoffRegList pinned = {}) { + LiftoffRegister GetUnusedRegister(RegClass rc, LiftoffRegList pinned) { if (kNeedI64RegPair && rc == kGpRegPair) { LiftoffRegList candidates = kGpCacheRegList; Register low = pinned.set(GetUnusedRegister(candidates, pinned)).gp(); @@ -733,6 +730,15 @@ class LiftoffAssembler : public TurboAssembler { inline void emit_f64_set_cond(Condition condition, Register dst, DoubleRegister lhs, DoubleRegister rhs); + inline void LoadTransform(LiftoffRegister dst, Register src_addr, + Register offset_reg, uint32_t offset_imm, + LoadType type, LoadTransformationKind transform, + uint32_t* protected_load_pc); + inline void emit_s8x16_shuffle(LiftoffRegister dst, LiftoffRegister lhs, + LiftoffRegister rhs, + const uint8_t shuffle[16]); + inline void emit_s8x16_swizzle(LiftoffRegister dst, LiftoffRegister lhs, + LiftoffRegister rhs); inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src); inline void emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src); inline void emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src); @@ -801,10 +807,21 @@ class LiftoffAssembler : public TurboAssembler { inline void emit_s128_select(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, LiftoffRegister mask); inline void emit_i8x16_neg(LiftoffRegister dst, LiftoffRegister src); + inline void emit_v8x16_anytrue(LiftoffRegister dst, LiftoffRegister src); + inline void emit_v8x16_alltrue(LiftoffRegister dst, LiftoffRegister src); + inline void emit_i8x16_bitmask(LiftoffRegister dst, LiftoffRegister src); inline void emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs); inline void emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs); + inline void emit_i8x16_shr_s(LiftoffRegister dst, LiftoffRegister lhs, + LiftoffRegister rhs); + inline void emit_i8x16_shri_s(LiftoffRegister dst, LiftoffRegister lhs, + int32_t rhs); + inline void emit_i8x16_shr_u(LiftoffRegister dst, LiftoffRegister lhs, + LiftoffRegister rhs); + inline void emit_i8x16_shri_u(LiftoffRegister dst, LiftoffRegister lhs, + int32_t rhs); inline void emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs); inline void emit_i8x16_add_saturate_s(LiftoffRegister dst, @@ -832,10 +849,21 @@ class LiftoffAssembler : public TurboAssembler { inline void emit_i8x16_max_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs); inline void emit_i16x8_neg(LiftoffRegister dst, LiftoffRegister src); + inline void emit_v16x8_anytrue(LiftoffRegister dst, LiftoffRegister src); + inline void emit_v16x8_alltrue(LiftoffRegister dst, LiftoffRegister src); + inline void emit_i16x8_bitmask(LiftoffRegister dst, LiftoffRegister src); inline void emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs); inline void emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs); + inline void emit_i16x8_shr_s(LiftoffRegister dst, LiftoffRegister lhs, + LiftoffRegister rhs); + inline void emit_i16x8_shri_s(LiftoffRegister dst, LiftoffRegister lhs, + int32_t rhs); + inline void emit_i16x8_shr_u(LiftoffRegister dst, LiftoffRegister lhs, + LiftoffRegister rhs); + inline void emit_i16x8_shri_u(LiftoffRegister dst, LiftoffRegister lhs, + int32_t rhs); inline void emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs); inline void emit_i16x8_add_saturate_s(LiftoffRegister dst, @@ -863,10 +891,21 @@ class LiftoffAssembler : public TurboAssembler { inline void emit_i16x8_max_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs); inline void emit_i32x4_neg(LiftoffRegister dst, LiftoffRegister src); + inline void emit_v32x4_anytrue(LiftoffRegister dst, LiftoffRegister src); + inline void emit_v32x4_alltrue(LiftoffRegister dst, LiftoffRegister src); + inline void emit_i32x4_bitmask(LiftoffRegister dst, LiftoffRegister src); inline void emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs); inline void emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs); + inline void emit_i32x4_shr_s(LiftoffRegister dst, LiftoffRegister lhs, + LiftoffRegister rhs); + inline void emit_i32x4_shri_s(LiftoffRegister dst, LiftoffRegister lhs, + int32_t rhs); + inline void emit_i32x4_shr_u(LiftoffRegister dst, LiftoffRegister lhs, + LiftoffRegister rhs); + inline void emit_i32x4_shri_u(LiftoffRegister dst, LiftoffRegister lhs, + int32_t rhs); inline void emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs); inline void emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, @@ -886,6 +925,14 @@ class LiftoffAssembler : public TurboAssembler { LiftoffRegister rhs); inline void emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs); + inline void emit_i64x2_shr_s(LiftoffRegister dst, LiftoffRegister lhs, + LiftoffRegister rhs); + inline void emit_i64x2_shri_s(LiftoffRegister dst, LiftoffRegister lhs, + int32_t rhs); + inline void emit_i64x2_shr_u(LiftoffRegister dst, LiftoffRegister lhs, + LiftoffRegister rhs); + inline void emit_i64x2_shri_u(LiftoffRegister dst, LiftoffRegister lhs, + int32_t rhs); inline void emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs); inline void emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, @@ -922,6 +969,14 @@ class LiftoffAssembler : public TurboAssembler { LiftoffRegister rhs); inline void emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs); + inline void emit_i32x4_sconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src); + inline void emit_i32x4_uconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src); + inline void emit_f32x4_sconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src); + inline void emit_f32x4_uconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src); inline void emit_i8x16_sconvert_i16x8(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs); @@ -1074,7 +1129,6 @@ class LiftoffAssembler : public TurboAssembler { } private: - LiftoffRegister LoadToRegister(VarState slot, LiftoffRegList pinned); LiftoffRegister LoadI64HalfIntoRegister(VarState slot, RegPairHalf half); uint32_t num_locals_ = 0; @@ -1090,8 +1144,8 @@ class LiftoffAssembler : public TurboAssembler { LiftoffBailoutReason bailout_reason_ = kSuccess; const char* bailout_detail_ = nullptr; - LiftoffRegister SpillOneRegister(LiftoffRegList candidates, - LiftoffRegList pinned); + V8_NOINLINE LiftoffRegister SpillOneRegister(LiftoffRegList candidates, + LiftoffRegList pinned); // Spill one or two fp registers to get a pair of adjacent fp registers. LiftoffRegister SpillAdjacentFpRegisters(LiftoffRegList pinned); }; @@ -1212,19 +1266,19 @@ class LiftoffStackSlots { } void Add(const LiftoffAssembler::VarState& src) { slots_.emplace_back(src); } + void Reverse() { std::reverse(slots_.begin(), slots_.end()); } + inline void Construct(); private: struct Slot { - // Allow move construction. - Slot(Slot&&) V8_NOEXCEPT = default; Slot(const LiftoffAssembler::VarState& src, uint32_t src_offset, RegPairHalf half) : src_(src), src_offset_(src_offset), half_(half) {} explicit Slot(const LiftoffAssembler::VarState& src) : src_(src), half_(kLowWord) {} - const LiftoffAssembler::VarState src_; + LiftoffAssembler::VarState src_; uint32_t src_offset_ = 0; RegPairHalf half_; }; diff --git a/chromium/v8/src/wasm/baseline/liftoff-compiler.cc b/chromium/v8/src/wasm/baseline/liftoff-compiler.cc index 4d0d9dbceca..d2beb398c15 100644 --- a/chromium/v8/src/wasm/baseline/liftoff-compiler.cc +++ b/chromium/v8/src/wasm/baseline/liftoff-compiler.cc @@ -8,6 +8,7 @@ #include "src/codegen/assembler-inl.h" // TODO(clemensb): Remove dependences on compiler stuff. #include "src/codegen/interface-descriptors.h" +#include "src/codegen/machine-type.h" #include "src/codegen/macro-assembler-inl.h" #include "src/compiler/linkage.h" #include "src/compiler/wasm-compiler.h" @@ -26,7 +27,7 @@ #include "src/wasm/wasm-engine.h" #include "src/wasm/wasm-linkage.h" #include "src/wasm/wasm-objects.h" -#include "src/wasm/wasm-opcodes.h" +#include "src/wasm/wasm-opcodes-inl.h" namespace v8 { namespace internal { @@ -280,13 +281,15 @@ class LiftoffCompiler { // For debugging, we need to spill registers before a trap, to be able to // inspect them. - struct SpilledRegistersBeforeTrap { + struct SpilledRegistersBeforeTrap : public ZoneObject { struct Entry { int offset; LiftoffRegister reg; ValueType type; }; - std::vector<Entry> entries; + ZoneVector<Entry> entries; + + explicit SpilledRegistersBeforeTrap(Zone* zone) : entries(zone) {} }; struct OutOfLineCode { @@ -298,13 +301,13 @@ class LiftoffCompiler { uint32_t pc; // for trap handler. // These two pointers will only be used for debug code: DebugSideTableBuilder::EntryBuilder* debug_sidetable_entry_builder; - std::unique_ptr<SpilledRegistersBeforeTrap> spilled_registers; + SpilledRegistersBeforeTrap* spilled_registers; // Named constructors: static OutOfLineCode Trap( WasmCode::RuntimeStubId s, WasmCodePosition pos, uint32_t pc, DebugSideTableBuilder::EntryBuilder* debug_sidetable_entry_builder, - std::unique_ptr<SpilledRegistersBeforeTrap> spilled_registers) { + SpilledRegistersBeforeTrap* spilled_registers) { DCHECK_LT(0, pos); return {{}, {}, @@ -313,13 +316,13 @@ class LiftoffCompiler { {}, pc, debug_sidetable_entry_builder, - std::move(spilled_registers)}; + spilled_registers}; } static OutOfLineCode StackCheck( WasmCodePosition pos, LiftoffRegList regs, DebugSideTableBuilder::EntryBuilder* debug_sidetable_entry_builder) { return {{}, {}, WasmCode::kWasmStackGuard, pos, - regs, 0, debug_sidetable_entry_builder, {}}; + regs, 0, debug_sidetable_entry_builder, nullptr}; } }; @@ -335,6 +338,9 @@ class LiftoffCompiler { env_(env), debug_sidetable_builder_(debug_sidetable_builder), for_debugging_(for_debugging), + out_of_line_code_(compilation_zone), + source_position_table_builder_(compilation_zone), + protected_instructions_(compilation_zone), compilation_zone_(compilation_zone), safepoint_table_builder_(compilation_zone_), next_breakpoint_ptr_(breakpoints.begin()), @@ -391,12 +397,10 @@ class LiftoffCompiler { switch (type.kind()) { case ValueType::kS128: return kSimd; - case ValueType::kAnyRef: - case ValueType::kFuncRef: - case ValueType::kNullRef: - return kAnyRef; - case ValueType::kExnRef: - return kExceptionHandling; + case ValueType::kOptRef: + case ValueType::kRef: + // TODO(7748): Refine this. + return kRefTypes; case ValueType::kBottom: return kMultiValue; default: @@ -418,7 +422,7 @@ class LiftoffCompiler { } LiftoffBailoutReason bailout_reason = BailoutReasonForType(type); EmbeddedVector<char, 128> buffer; - SNPrintF(buffer, "%s %s", type.type_name(), context); + SNPrintF(buffer, "%s %s", type.type_name().c_str(), context); unsupported(decoder, bailout_reason, buffer.begin()); return false; } @@ -495,7 +499,7 @@ class LiftoffCompiler { position, __ cache_state()->used_registers, RegisterDebugSideTableEntry(DebugSideTableBuilder::kAssumeSpilling))); OutOfLineCode& ool = out_of_line_code_.back(); - Register limit_address = __ GetUnusedRegister(kGpReg).gp(); + Register limit_address = __ GetUnusedRegister(kGpReg, {}).gp(); LOAD_INSTANCE_FIELD(limit_address, StackLimitAddress, kSystemPointerSize); __ StackCheck(ool.label.get(), limit_address); __ bind(ool.continuation.get()); @@ -519,6 +523,15 @@ class LiftoffCompiler { return false; } + void TraceFunctionEntry(FullDecoder* decoder) { + DEBUG_CODE_COMMENT("trace function entry"); + __ SpillAllRegisters(); + source_position_table_builder_.AddPosition( + __ pc_offset(), SourcePosition(decoder->position()), false); + __ CallRuntimeStub(WasmCode::kWasmTraceEnter); + safepoint_table_builder_.DefineSafepoint(&asm_, Safepoint::kNoLazyDeopt); + } + void StartFunctionBody(FullDecoder* decoder, Control* block) { for (uint32_t i = 0; i < __ num_locals(); ++i) { if (!CheckSupportedType(decoder, kSupportedTypes, __ local_type(i), @@ -593,6 +606,8 @@ class LiftoffCompiler { // is never a position of any instruction in the function. StackCheck(0); + if (FLAG_trace_wasm) TraceFunctionEntry(decoder); + // If we are generating debug code, do check the "hook on function call" // flag. If set, trigger a break. if (V8_UNLIKELY(for_debugging_)) { @@ -604,7 +619,7 @@ class LiftoffCompiler { *next_breakpoint_ptr_ == decoder->position()); if (!has_breakpoint) { DEBUG_CODE_COMMENT("check hook on function call"); - Register flag = __ GetUnusedRegister(kGpReg).gp(); + Register flag = __ GetUnusedRegister(kGpReg, {}).gp(); LOAD_INSTANCE_FIELD(flag, HookOnFunctionCallAddress, kSystemPointerSize); Label no_break; @@ -693,9 +708,10 @@ class LiftoffCompiler { asm_.AbortCompilation(); } - void NextInstruction(FullDecoder* decoder, WasmOpcode opcode) { + V8_NOINLINE void EmitDebuggingInfo(FullDecoder* decoder, WasmOpcode opcode) { + DCHECK(V8_UNLIKELY(for_debugging_)); bool breakpoint = false; - if (V8_UNLIKELY(next_breakpoint_ptr_)) { + if (next_breakpoint_ptr_) { if (*next_breakpoint_ptr_ == 0) { // A single breakpoint at offset 0 indicates stepping. DCHECK_EQ(next_breakpoint_ptr_ + 1, next_breakpoint_end_); @@ -720,6 +736,12 @@ class LiftoffCompiler { } // Potentially generate the source position to OSR to this instruction. MaybeGenerateExtraSourcePos(decoder, !breakpoint); + } + + void NextInstruction(FullDecoder* decoder, WasmOpcode opcode) { + // Add a single check, so that the fast path can be inlined while + // {EmitDebuggingInfo} stays outlined. + if (V8_UNLIKELY(for_debugging_)) EmitDebuggingInfo(decoder, opcode); TraceCacheState(decoder); #ifdef DEBUG SLOW_DCHECK(__ ValidateCacheState()); @@ -923,10 +945,10 @@ class LiftoffCompiler { constexpr RegClass result_rc = reg_class_for(result_type); LiftoffRegister src = __ PopToRegister(); LiftoffRegister dst = src_rc == result_rc - ? __ GetUnusedRegister(result_rc, {src}) - : __ GetUnusedRegister(result_rc); + ? __ GetUnusedRegister(result_rc, {src}, {}) + : __ GetUnusedRegister(result_rc, {}); CallEmitFn(fn, dst, src); - __ PushRegister(ValueType(result_type), dst); + __ PushRegister(ValueType::Primitive(result_type), dst); } template <ValueType::Kind type> @@ -936,9 +958,9 @@ class LiftoffCompiler { auto emit_with_c_fallback = [=](LiftoffRegister dst, LiftoffRegister src) { if ((asm_.*emit_fn)(dst.fp(), src.fp())) return; ExternalReference ext_ref = fallback_fn(); - ValueType sig_reps[] = {ValueType(type)}; + ValueType sig_reps[] = {ValueType::Primitive(type)}; FunctionSig sig(0, 1, sig_reps); - GenerateCCall(&dst, &sig, ValueType(type), &src, ext_ref); + GenerateCCall(&dst, &sig, ValueType::Primitive(type), &src, ext_ref); }; EmitUnOp<type, type>(emit_with_c_fallback); } @@ -951,8 +973,9 @@ class LiftoffCompiler { static constexpr RegClass src_rc = reg_class_for(src_type); static constexpr RegClass dst_rc = reg_class_for(dst_type); LiftoffRegister src = __ PopToRegister(); - LiftoffRegister dst = src_rc == dst_rc ? __ GetUnusedRegister(dst_rc, {src}) - : __ GetUnusedRegister(dst_rc); + LiftoffRegister dst = src_rc == dst_rc + ? __ GetUnusedRegister(dst_rc, {src}, {}) + : __ GetUnusedRegister(dst_rc, {}); DCHECK_EQ(!!can_trap, trap_position > 0); Label* trap = can_trap ? AddOutOfLineTrap( trap_position, @@ -963,20 +986,22 @@ class LiftoffCompiler { ExternalReference ext_ref = fallback_fn(); if (can_trap) { // External references for potentially trapping conversions return int. - ValueType sig_reps[] = {kWasmI32, ValueType(src_type)}; + ValueType sig_reps[] = {kWasmI32, ValueType::Primitive(src_type)}; FunctionSig sig(1, 1, sig_reps); LiftoffRegister ret_reg = __ GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(dst)); LiftoffRegister dst_regs[] = {ret_reg, dst}; - GenerateCCall(dst_regs, &sig, ValueType(dst_type), &src, ext_ref); + GenerateCCall(dst_regs, &sig, ValueType::Primitive(dst_type), &src, + ext_ref); __ emit_cond_jump(kEqual, trap, kWasmI32, ret_reg.gp()); } else { - ValueType sig_reps[] = {ValueType(src_type)}; + ValueType sig_reps[] = {ValueType::Primitive(src_type)}; FunctionSig sig(0, 1, sig_reps); - GenerateCCall(&dst, &sig, ValueType(dst_type), &src, ext_ref); + GenerateCCall(&dst, &sig, ValueType::Primitive(dst_type), &src, + ext_ref); } } - __ PushRegister(ValueType(dst_type), dst); + __ PushRegister(ValueType::Primitive(dst_type), dst); } void UnOp(FullDecoder* decoder, WasmOpcode opcode, const Value& value, @@ -1088,14 +1113,22 @@ class LiftoffCompiler { __ emit_type_conversion(kExprI64UConvertI32, dst, c_call_dst, nullptr); }); - case kExprI32SConvertSatF32: - case kExprI32UConvertSatF32: - case kExprI32SConvertSatF64: - case kExprI32UConvertSatF64: - case kExprI64SConvertSatF32: - case kExprI64UConvertSatF32: - case kExprI64SConvertSatF64: - case kExprI64UConvertSatF64: + CASE_TYPE_CONVERSION(I32SConvertSatF32, I32, F32, nullptr, kNoTrap) + CASE_TYPE_CONVERSION(I32UConvertSatF32, I32, F32, nullptr, kNoTrap) + CASE_TYPE_CONVERSION(I32SConvertSatF64, I32, F64, nullptr, kNoTrap) + CASE_TYPE_CONVERSION(I32UConvertSatF64, I32, F64, nullptr, kNoTrap) + CASE_TYPE_CONVERSION(I64SConvertSatF32, I64, F32, + &ExternalReference::wasm_float32_to_int64_sat, + kNoTrap) + CASE_TYPE_CONVERSION(I64UConvertSatF32, I64, F32, + &ExternalReference::wasm_float32_to_uint64_sat, + kNoTrap) + CASE_TYPE_CONVERSION(I64SConvertSatF64, I64, F64, + &ExternalReference::wasm_float64_to_int64_sat, + kNoTrap) + CASE_TYPE_CONVERSION(I64UConvertSatF64, I64, F64, + &ExternalReference::wasm_float64_to_uint64_sat, + kNoTrap) return unsupported(decoder, kNonTrappingFloatToInt, WasmOpcodes::OpcodeName(opcode)); default: @@ -1122,11 +1155,11 @@ class LiftoffCompiler { LiftoffRegister lhs = __ PopToRegister(); LiftoffRegister dst = src_rc == result_rc - ? __ GetUnusedRegister(result_rc, {lhs}) - : __ GetUnusedRegister(result_rc); + ? __ GetUnusedRegister(result_rc, {lhs}, {}) + : __ GetUnusedRegister(result_rc, {}); CallEmitFn(fnImm, dst, lhs, imm); - __ PushRegister(ValueType(result_type), dst); + __ PushRegister(ValueType::Primitive(result_type), dst); } else { // The RHS was not an immediate. EmitBinOp<src_type, result_type>(fn); @@ -1141,13 +1174,13 @@ class LiftoffCompiler { LiftoffRegister rhs = __ PopToRegister(); LiftoffRegister lhs = __ PopToRegister(LiftoffRegList::ForRegs(rhs)); LiftoffRegister dst = src_rc == result_rc - ? __ GetUnusedRegister(result_rc, {lhs, rhs}) - : __ GetUnusedRegister(result_rc); + ? __ GetUnusedRegister(result_rc, {lhs, rhs}, {}) + : __ GetUnusedRegister(result_rc, {}); if (swap_lhs_rhs) std::swap(lhs, rhs); CallEmitFn(fn, dst, lhs, rhs); - __ PushRegister(ValueType(result_type), dst); + __ PushRegister(ValueType::Primitive(result_type), dst); } void EmitDivOrRem64CCall(LiftoffRegister dst, LiftoffRegister lhs, @@ -1483,34 +1516,34 @@ class LiftoffCompiler { if (value_i32 == value) { __ PushConstant(kWasmI64, value_i32); } else { - LiftoffRegister reg = __ GetUnusedRegister(reg_class_for(kWasmI64)); + LiftoffRegister reg = __ GetUnusedRegister(reg_class_for(kWasmI64), {}); __ LoadConstant(reg, WasmValue(value)); __ PushRegister(kWasmI64, reg); } } void F32Const(FullDecoder* decoder, Value* result, float value) { - LiftoffRegister reg = __ GetUnusedRegister(kFpReg); + LiftoffRegister reg = __ GetUnusedRegister(kFpReg, {}); __ LoadConstant(reg, WasmValue(value)); __ PushRegister(kWasmF32, reg); } void F64Const(FullDecoder* decoder, Value* result, double value) { - LiftoffRegister reg = __ GetUnusedRegister(kFpReg); + LiftoffRegister reg = __ GetUnusedRegister(kFpReg, {}); __ LoadConstant(reg, WasmValue(value)); __ PushRegister(kWasmF64, reg); } void RefNull(FullDecoder* decoder, Value* result) { - unsupported(decoder, kAnyRef, "ref_null"); + unsupported(decoder, kRefTypes, "ref_null"); } void RefFunc(FullDecoder* decoder, uint32_t function_index, Value* result) { - unsupported(decoder, kAnyRef, "func"); + unsupported(decoder, kRefTypes, "func"); } void RefAsNonNull(FullDecoder* decoder, const Value& arg, Value* result) { - unsupported(decoder, kAnyRef, "ref.as_non_null"); + unsupported(decoder, kRefTypes, "ref.as_non_null"); } void Drop(FullDecoder* decoder, const Value& value) { @@ -1520,7 +1553,44 @@ class LiftoffCompiler { __ cache_state()->stack_state.pop_back(); } + void TraceFunctionExit(FullDecoder* decoder) { + DEBUG_CODE_COMMENT("trace function exit"); + // Before making the runtime call, spill all cache registers. + __ SpillAllRegisters(); + LiftoffRegList pinned; + // Get a register to hold the stack slot for the return value. + LiftoffRegister info = pinned.set(__ GetUnusedRegister(kGpReg, pinned)); + __ AllocateStackSlot(info.gp(), sizeof(int64_t)); + + // Store the return value if there is exactly one. Multiple return values + // are not handled yet. + size_t num_returns = decoder->sig_->return_count(); + if (num_returns == 1) { + ValueType return_type = decoder->sig_->GetReturn(0); + LiftoffRegister return_reg = + __ LoadToRegister(__ cache_state()->stack_state.back(), pinned); + __ Store(info.gp(), no_reg, 0, return_reg, + StoreType::ForValueType(return_type), pinned); + } + // Put the parameter in its place. + WasmTraceExitDescriptor descriptor; + DCHECK_EQ(0, descriptor.GetStackParameterCount()); + DCHECK_EQ(1, descriptor.GetRegisterParameterCount()); + Register param_reg = descriptor.GetRegisterParameter(0); + if (info.gp() != param_reg) { + __ Move(param_reg, info.gp(), LiftoffAssembler::kWasmIntPtr); + } + + source_position_table_builder_.AddPosition( + __ pc_offset(), SourcePosition(decoder->position()), false); + __ CallRuntimeStub(WasmCode::kWasmTraceExit); + safepoint_table_builder_.DefineSafepoint(&asm_, Safepoint::kNoLazyDeopt); + + __ DeallocateStackSlot(sizeof(int64_t)); + } + void ReturnImpl(FullDecoder* decoder) { + if (FLAG_trace_wasm) TraceFunctionExit(decoder); size_t num_returns = decoder->sig_->return_count(); if (num_returns > 0) __ MoveToReturnLocations(decoder->sig_, descriptor_); DEBUG_CODE_COMMENT("leave frame"); @@ -1546,7 +1616,7 @@ class LiftoffCompiler { break; case kStack: { auto rc = reg_class_for(imm.type); - LiftoffRegister reg = __ GetUnusedRegister(rc); + LiftoffRegister reg = __ GetUnusedRegister(rc, {}); __ Fill(reg, slot.offset(), imm.type); __ PushRegister(slot.type(), reg); break; @@ -1570,7 +1640,7 @@ class LiftoffCompiler { } DCHECK_EQ(type, __ local_type(local_index)); RegClass rc = reg_class_for(type); - LiftoffRegister dst_reg = __ GetUnusedRegister(rc); + LiftoffRegister dst_reg = __ GetUnusedRegister(rc, {}); __ Fill(dst_reg, src_slot.offset(), type); *dst_slot = LiftoffAssembler::VarState(type, dst_reg, dst_slot->offset()); __ cache_state()->inc_used(dst_reg); @@ -1607,9 +1677,19 @@ class LiftoffCompiler { LocalSet(imm.index, true); } + void AllocateLocals(FullDecoder* decoder, Vector<Value> local_values) { + // TODO(7748): Introduce typed functions bailout reason + unsupported(decoder, kGC, "let"); + } + + void DeallocateLocals(FullDecoder* decoder, uint32_t count) { + // TODO(7748): Introduce typed functions bailout reason + unsupported(decoder, kGC, "let"); + } + Register GetGlobalBaseAndOffset(const WasmGlobal* global, LiftoffRegList* pinned, uint32_t* offset) { - Register addr = pinned->set(__ GetUnusedRegister(kGpReg)).gp(); + Register addr = pinned->set(__ GetUnusedRegister(kGpReg, {})).gp(); if (global->mutability && global->imported) { LOAD_INSTANCE_FIELD(addr, ImportedMutableGlobals, kSystemPointerSize); __ Load(LiftoffRegister(addr), addr, no_reg, @@ -1652,12 +1732,12 @@ class LiftoffCompiler { void TableGet(FullDecoder* decoder, const Value& index, Value* result, const TableIndexImmediate<validate>& imm) { - unsupported(decoder, kAnyRef, "table_get"); + unsupported(decoder, kRefTypes, "table_get"); } void TableSet(FullDecoder* decoder, const Value& index, const Value& value, const TableIndexImmediate<validate>& imm) { - unsupported(decoder, kAnyRef, "table_set"); + unsupported(decoder, kRefTypes, "table_set"); } void Unreachable(FullDecoder* decoder) { @@ -1675,8 +1755,8 @@ class LiftoffCompiler { DCHECK_EQ(type, __ cache_state()->stack_state.end()[-2].type()); LiftoffRegister false_value = pinned.set(__ PopToRegister(pinned)); LiftoffRegister true_value = __ PopToRegister(pinned); - LiftoffRegister dst = - __ GetUnusedRegister(true_value.reg_class(), {true_value, false_value}); + LiftoffRegister dst = __ GetUnusedRegister(true_value.reg_class(), + {true_value, false_value}, {}); __ PushRegister(type, dst); // Now emit the actual code to move either {true_value} or {false_value} @@ -1819,11 +1899,12 @@ class LiftoffCompiler { __ cache_state()->Steal(c->else_state->state); } - std::unique_ptr<SpilledRegistersBeforeTrap> GetSpilledRegistersBeforeTrap() { - if (V8_LIKELY(!for_debugging_)) return nullptr; + SpilledRegistersBeforeTrap* GetSpilledRegistersBeforeTrap() { + DCHECK(for_debugging_); // If we are generating debugging code, we really need to spill all // registers to make them inspectable when stopping at the trap. - auto spilled = std::make_unique<SpilledRegistersBeforeTrap>(); + auto* spilled = + new (compilation_zone_) SpilledRegistersBeforeTrap(compilation_zone_); for (uint32_t i = 0, e = __ cache_state()->stack_height(); i < e; ++i) { auto& slot = __ cache_state()->stack_state[i]; if (!slot.is_reg()) continue; @@ -1840,7 +1921,8 @@ class LiftoffCompiler { out_of_line_code_.push_back(OutOfLineCode::Trap( stub, position, pc, RegisterDebugSideTableEntry(DebugSideTableBuilder::kAssumeSpilling), - GetSpilledRegistersBeforeTrap())); + V8_UNLIKELY(for_debugging_) ? GetSpilledRegistersBeforeTrap() + : nullptr)); return out_of_line_code_.back().label.get(); } @@ -1852,7 +1934,7 @@ class LiftoffCompiler { uint32_t offset, Register index, LiftoffRegList pinned, ForceCheck force_check) { const bool statically_oob = - !base::IsInBounds(offset, access_size, env_->max_memory_size); + !base::IsInBounds<uint64_t>(offset, access_size, env_->max_memory_size); if (!force_check && !statically_oob && (!FLAG_wasm_bounds_checks || env_->use_trap_handler)) { @@ -1868,10 +1950,7 @@ class LiftoffCompiler { if (statically_oob) { __ emit_jump(trap_label); - Control* current_block = decoder->control_at(0); - if (current_block->reachable()) { - current_block->reachability = kSpecOnlyReachable; - } + decoder->SetSucceedingCodeDynamicallyUnreachable(); return true; } @@ -2033,11 +2112,54 @@ class LiftoffCompiler { offset, decoder->position()); } } + void LoadTransform(FullDecoder* decoder, LoadType type, LoadTransformationKind transform, const MemoryAccessImmediate<validate>& imm, const Value& index_val, Value* result) { - unsupported(decoder, kSimd, "simd"); + // LoadTransform requires SIMD support, so check for it here. If + // unsupported, bailout and let TurboFan lower the code. + if (!CheckSupportedType(decoder, kSupportedTypes, kWasmS128, + "LoadTransform")) { + return; + } + + LiftoffRegList pinned; + Register index = pinned.set(__ PopToRegister()).gp(); + // For load splats, LoadType is the size of the load, and for load + // extends, LoadType is the size of the lane, and it always loads 8 bytes. + uint32_t access_size = + transform == LoadTransformationKind::kExtend ? 8 : type.size(); + if (BoundsCheckMem(decoder, access_size, imm.offset, index, pinned, + kDontForceCheck)) { + return; + } + + uint32_t offset = imm.offset; + index = AddMemoryMasking(index, &offset, &pinned); + DEBUG_CODE_COMMENT("LoadTransform from memory"); + Register addr = __ GetUnusedRegister(kGpReg, pinned).gp(); + LOAD_INSTANCE_FIELD(addr, MemoryStart, kSystemPointerSize); + LiftoffRegister value = __ GetUnusedRegister(reg_class_for(kS128), {}); + uint32_t protected_load_pc = 0; + __ LoadTransform(value, addr, index, offset, type, transform, + &protected_load_pc); + + if (env_->use_trap_handler) { + AddOutOfLineTrap(decoder->position(), + WasmCode::kThrowWasmTrapMemOutOfBounds, + protected_load_pc); + } + __ PushRegister(ValueType::Primitive(kS128), value); + + if (FLAG_trace_wasm_memory) { + // Again load extend is different. + MachineRepresentation mem_rep = + transform == LoadTransformationKind::kExtend + ? MachineRepresentation::kWord64 + : type.mem_type().representation(); + TraceMemoryOperation(false, mem_rep, index, offset, decoder->position()); + } } void StoreMem(FullDecoder* decoder, StoreType type, @@ -2075,7 +2197,7 @@ class LiftoffCompiler { } void CurrentMemoryPages(FullDecoder* decoder, Value* result) { - Register mem_size = __ GetUnusedRegister(kGpReg).gp(); + Register mem_size = __ GetUnusedRegister(kGpReg, {}).gp(); LOAD_INSTANCE_FIELD(mem_size, MemorySize, kSystemPointerSize); __ emit_ptrsize_shri(mem_size, mem_size, kWasmPageSizeLog2); __ PushRegister(kWasmI32, LiftoffRegister(mem_size)); @@ -2184,7 +2306,7 @@ class LiftoffCompiler { const CallIndirectImmediate<validate>& imm, const Value args[], Value returns[]) { if (imm.table_index != 0) { - return unsupported(decoder, kAnyRef, "table index != 0"); + return unsupported(decoder, kRefTypes, "table index != 0"); } for (ValueType ret : imm.sig->returns()) { if (!CheckSupportedType(decoder, kSupportedTypes, ret, "return")) { @@ -2326,7 +2448,7 @@ class LiftoffCompiler { } void BrOnNull(FullDecoder* decoder, const Value& ref_object, uint32_t depth) { - unsupported(decoder, kAnyRef, "br_on_null"); + unsupported(decoder, kRefTypes, "br_on_null"); } template <ValueType::Kind src_type, ValueType::Kind result_type, @@ -2344,9 +2466,9 @@ class LiftoffCompiler { src_rc == result_rc ? __ GetUnusedRegister(result_rc, {src3}, LiftoffRegList::ForRegs(src1, src2)) - : __ GetUnusedRegister(result_rc); + : __ GetUnusedRegister(result_rc, {}); CallEmitFn(fn, dst, src1, src2, src3); - __ PushRegister(ValueType(result_type), dst); + __ PushRegister(ValueType::Primitive(result_type), dst); } template <typename EmitFn, typename EmitFnImm> @@ -2360,14 +2482,14 @@ class LiftoffCompiler { int32_t imm = rhs_slot.i32_const(); LiftoffRegister operand = __ PopToRegister(); - LiftoffRegister dst = __ GetUnusedRegister(result_rc, {operand}); + LiftoffRegister dst = __ GetUnusedRegister(result_rc, {operand}, {}); CallEmitFn(fnImm, dst, operand, imm); __ PushRegister(kWasmS128, dst); } else { LiftoffRegister count = __ PopToRegister(); LiftoffRegister operand = __ PopToRegister(); - LiftoffRegister dst = __ GetUnusedRegister(result_rc, {operand}); + LiftoffRegister dst = __ GetUnusedRegister(result_rc, {operand}, {}); CallEmitFn(fn, dst, operand, count); __ PushRegister(kWasmS128, dst); @@ -2380,6 +2502,8 @@ class LiftoffCompiler { return unsupported(decoder, kSimd, "simd"); } switch (opcode) { + case wasm::kExprS8x16Swizzle: + return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_s8x16_swizzle); case wasm::kExprI8x16Splat: return EmitUnOp<kI32, kS128>(&LiftoffAssembler::emit_i8x16_splat); case wasm::kExprI16x8Splat: @@ -2500,9 +2624,21 @@ class LiftoffCompiler { return EmitTerOp<kS128, kS128>(&LiftoffAssembler::emit_s128_select); case wasm::kExprI8x16Neg: return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_neg); + case wasm::kExprV8x16AnyTrue: + return EmitUnOp<kS128, kI32>(&LiftoffAssembler::emit_v8x16_anytrue); + case wasm::kExprV8x16AllTrue: + return EmitUnOp<kS128, kI32>(&LiftoffAssembler::emit_v8x16_alltrue); + case wasm::kExprI8x16BitMask: + return EmitUnOp<kS128, kI32>(&LiftoffAssembler::emit_i8x16_bitmask); case wasm::kExprI8x16Shl: return EmitSimdShiftOp(&LiftoffAssembler::emit_i8x16_shl, &LiftoffAssembler::emit_i8x16_shli); + case wasm::kExprI8x16ShrS: + return EmitSimdShiftOp(&LiftoffAssembler::emit_i8x16_shr_s, + &LiftoffAssembler::emit_i8x16_shri_s); + case wasm::kExprI8x16ShrU: + return EmitSimdShiftOp(&LiftoffAssembler::emit_i8x16_shr_u, + &LiftoffAssembler::emit_i8x16_shri_u); case wasm::kExprI8x16Add: return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_add); case wasm::kExprI8x16AddSaturateS: @@ -2531,9 +2667,21 @@ class LiftoffCompiler { return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_max_u); case wasm::kExprI16x8Neg: return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_neg); + case wasm::kExprV16x8AnyTrue: + return EmitUnOp<kS128, kI32>(&LiftoffAssembler::emit_v16x8_anytrue); + case wasm::kExprV16x8AllTrue: + return EmitUnOp<kS128, kI32>(&LiftoffAssembler::emit_v16x8_alltrue); + case wasm::kExprI16x8BitMask: + return EmitUnOp<kS128, kI32>(&LiftoffAssembler::emit_i16x8_bitmask); case wasm::kExprI16x8Shl: return EmitSimdShiftOp(&LiftoffAssembler::emit_i16x8_shl, &LiftoffAssembler::emit_i16x8_shli); + case wasm::kExprI16x8ShrS: + return EmitSimdShiftOp(&LiftoffAssembler::emit_i16x8_shr_s, + &LiftoffAssembler::emit_i16x8_shri_s); + case wasm::kExprI16x8ShrU: + return EmitSimdShiftOp(&LiftoffAssembler::emit_i16x8_shr_u, + &LiftoffAssembler::emit_i16x8_shri_u); case wasm::kExprI16x8Add: return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_add); case wasm::kExprI16x8AddSaturateS: @@ -2562,9 +2710,21 @@ class LiftoffCompiler { return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_max_u); case wasm::kExprI32x4Neg: return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_neg); + case wasm::kExprV32x4AnyTrue: + return EmitUnOp<kS128, kI32>(&LiftoffAssembler::emit_v32x4_anytrue); + case wasm::kExprV32x4AllTrue: + return EmitUnOp<kS128, kI32>(&LiftoffAssembler::emit_v32x4_alltrue); + case wasm::kExprI32x4BitMask: + return EmitUnOp<kS128, kI32>(&LiftoffAssembler::emit_i32x4_bitmask); case wasm::kExprI32x4Shl: return EmitSimdShiftOp(&LiftoffAssembler::emit_i32x4_shl, &LiftoffAssembler::emit_i32x4_shli); + case wasm::kExprI32x4ShrS: + return EmitSimdShiftOp(&LiftoffAssembler::emit_i32x4_shr_s, + &LiftoffAssembler::emit_i32x4_shri_s); + case wasm::kExprI32x4ShrU: + return EmitSimdShiftOp(&LiftoffAssembler::emit_i32x4_shr_u, + &LiftoffAssembler::emit_i32x4_shri_u); case wasm::kExprI32x4Add: return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_add); case wasm::kExprI32x4Sub: @@ -2584,6 +2744,12 @@ class LiftoffCompiler { case wasm::kExprI64x2Shl: return EmitSimdShiftOp(&LiftoffAssembler::emit_i64x2_shl, &LiftoffAssembler::emit_i64x2_shli); + case wasm::kExprI64x2ShrS: + return EmitSimdShiftOp(&LiftoffAssembler::emit_i64x2_shr_s, + &LiftoffAssembler::emit_i64x2_shri_s); + case wasm::kExprI64x2ShrU: + return EmitSimdShiftOp(&LiftoffAssembler::emit_i64x2_shr_u, + &LiftoffAssembler::emit_i64x2_shri_u); case wasm::kExprI64x2Add: return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_add); case wasm::kExprI64x2Sub: @@ -2626,6 +2792,18 @@ class LiftoffCompiler { return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_min); case wasm::kExprF64x2Max: return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_max); + case wasm::kExprI32x4SConvertF32x4: + return EmitUnOp<kS128, kS128>( + &LiftoffAssembler::emit_i32x4_sconvert_f32x4); + case wasm::kExprI32x4UConvertF32x4: + return EmitUnOp<kS128, kS128>( + &LiftoffAssembler::emit_i32x4_uconvert_f32x4); + case wasm::kExprF32x4SConvertI32x4: + return EmitUnOp<kS128, kS128>( + &LiftoffAssembler::emit_f32x4_sconvert_i32x4); + case wasm::kExprF32x4UConvertI32x4: + return EmitUnOp<kS128, kS128>( + &LiftoffAssembler::emit_f32x4_uconvert_i32x4); case wasm::kExprI8x16SConvertI16x8: return EmitBinOp<kS128, kS128>( &LiftoffAssembler::emit_i8x16_sconvert_i16x8); @@ -2689,10 +2867,10 @@ class LiftoffCompiler { static constexpr RegClass result_rc = reg_class_for(result_type); LiftoffRegister lhs = __ PopToRegister(); LiftoffRegister dst = src_rc == result_rc - ? __ GetUnusedRegister(result_rc, {lhs}) - : __ GetUnusedRegister(result_rc); + ? __ GetUnusedRegister(result_rc, {lhs}, {}) + : __ GetUnusedRegister(result_rc, {}); fn(dst, lhs, imm.lane); - __ PushRegister(ValueType(result_type), dst); + __ PushRegister(ValueType::Primitive(result_type), dst); } template <ValueType::Kind src2_type, typename EmitFn> @@ -2716,7 +2894,7 @@ class LiftoffCompiler { (src2_rc == result_rc || pin_src2) ? __ GetUnusedRegister(result_rc, {src1}, LiftoffRegList::ForRegs(src2)) - : __ GetUnusedRegister(result_rc, {src1}); + : __ GetUnusedRegister(result_rc, {src1}, {}); fn(dst, src1, src2, imm.lane); __ PushRegister(kWasmS128, dst); } @@ -2770,8 +2948,15 @@ class LiftoffCompiler { const Simd8x16ShuffleImmediate<validate>& imm, const Value& input0, const Value& input1, Value* result) { - unsupported(decoder, kSimd, "simd"); + static constexpr RegClass result_rc = reg_class_for(ValueType::kS128); + LiftoffRegister rhs = __ PopToRegister(); + LiftoffRegister lhs = __ PopToRegister(LiftoffRegList::ForRegs(rhs)); + LiftoffRegister dst = __ GetUnusedRegister(result_rc, {lhs, rhs}, {}); + + __ LiftoffAssembler::emit_s8x16_shuffle(dst, lhs, rhs, imm.shuffle); + __ PushRegister(kWasmS128, dst); } + void Throw(FullDecoder* decoder, const ExceptionIndexImmediate<validate>&, const Vector<Value>& args) { unsupported(decoder, kExceptionHandling, "throw"); @@ -3369,17 +3554,17 @@ class LiftoffCompiler { void TableGrow(FullDecoder* decoder, const TableIndexImmediate<validate>& imm, const Value& value, const Value& delta, Value* result) { - unsupported(decoder, kAnyRef, "table.grow"); + unsupported(decoder, kRefTypes, "table.grow"); } void TableSize(FullDecoder* decoder, const TableIndexImmediate<validate>& imm, Value* result) { - unsupported(decoder, kAnyRef, "table.size"); + unsupported(decoder, kRefTypes, "table.size"); } void TableFill(FullDecoder* decoder, const TableIndexImmediate<validate>& imm, const Value& start, const Value& value, const Value& count) { - unsupported(decoder, kAnyRef, "table.fill"); + unsupported(decoder, kRefTypes, "table.fill"); } void StructNew(FullDecoder* decoder, @@ -3389,7 +3574,8 @@ class LiftoffCompiler { unsupported(decoder, kGC, "struct.new"); } void StructGet(FullDecoder* decoder, const Value& struct_obj, - const FieldIndexImmediate<validate>& field, Value* result) { + const FieldIndexImmediate<validate>& field, bool is_signed, + Value* result) { // TODO(7748): Implement. unsupported(decoder, kGC, "struct.get"); } @@ -3408,7 +3594,7 @@ class LiftoffCompiler { } void ArrayGet(FullDecoder* decoder, const Value& array_obj, const ArrayIndexImmediate<validate>& imm, const Value& index, - Value* result) { + bool is_signed, Value* result) { // TODO(7748): Implement. unsupported(decoder, kGC, "array.get"); } @@ -3423,6 +3609,12 @@ class LiftoffCompiler { unsupported(decoder, kGC, "array.len"); } + void RttCanon(FullDecoder* decoder, const TypeIndexImmediate<validate>& imm, + Value* result) { + // TODO(7748): Implement. + unsupported(decoder, kGC, "rtt.canon"); + } + void PassThrough(FullDecoder* decoder, const Value& from, Value* to) { // TODO(7748): Implement. unsupported(decoder, kGC, ""); @@ -3484,9 +3676,9 @@ class LiftoffCompiler { DebugSideTableBuilder* const debug_sidetable_builder_; const ForDebugging for_debugging_; LiftoffBailoutReason bailout_reason_ = kSuccess; - std::vector<OutOfLineCode> out_of_line_code_; + ZoneVector<OutOfLineCode> out_of_line_code_; SourcePositionTableBuilder source_position_table_builder_; - std::vector<trap_handler::ProtectedInstructionData> protected_instructions_; + ZoneVector<trap_handler::ProtectedInstructionData> protected_instructions_; // Zone used to store information during compilation. The result will be // stored independently, such that this zone can die together with the // LiftoffCompiler after compilation. @@ -3536,9 +3728,9 @@ WasmCompilationResult ExecuteLiftoffCompilation( std::unique_ptr<DebugSideTable>* debug_sidetable, Vector<int> extra_source_pos) { int func_body_size = static_cast<int>(func_body.end - func_body.start); - TRACE_EVENT2(TRACE_DISABLED_BY_DEFAULT("v8.wasm"), - "ExecuteLiftoffCompilation", "func_index", func_index, - "body_size", func_body_size); + TRACE_EVENT2(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"), + "wasm.CompileBaseline", "func_index", func_index, "body_size", + func_body_size); Zone zone(allocator, "LiftoffCompilationZone"); auto call_descriptor = compiler::GetWasmCallDescriptor(&zone, func_body.sig); diff --git a/chromium/v8/src/wasm/baseline/liftoff-compiler.h b/chromium/v8/src/wasm/baseline/liftoff-compiler.h index 434172c4cf1..bb2ddaf050c 100644 --- a/chromium/v8/src/wasm/baseline/liftoff-compiler.h +++ b/chromium/v8/src/wasm/baseline/liftoff-compiler.h @@ -38,7 +38,7 @@ enum LiftoffBailoutReason : int8_t { kComplexOperation = 4, // Unimplemented proposals: kSimd = 5, - kAnyRef = 6, + kRefTypes = 6, kExceptionHandling = 7, kMultiValue = 8, kTailCall = 9, diff --git a/chromium/v8/src/wasm/baseline/mips/liftoff-assembler-mips.h b/chromium/v8/src/wasm/baseline/mips/liftoff-assembler-mips.h index f24c95008c9..0560a66dfe7 100644 --- a/chromium/v8/src/wasm/baseline/mips/liftoff-assembler-mips.h +++ b/chromium/v8/src/wasm/baseline/mips/liftoff-assembler-mips.h @@ -603,7 +603,7 @@ void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src, void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset, ValueType type) { DCHECK_NE(dst_offset, src_offset); - LiftoffRegister reg = GetUnusedRegister(reg_class_for(type)); + LiftoffRegister reg = GetUnusedRegister(reg_class_for(type), {}); Fill(reg, src_offset, type); Spill(dst_offset, reg, type); } @@ -646,13 +646,13 @@ void LiftoffAssembler::Spill(int offset, WasmValue value) { MemOperand dst = liftoff::GetStackSlot(offset); switch (value.type().kind()) { case ValueType::kI32: { - LiftoffRegister tmp = GetUnusedRegister(kGpReg); + LiftoffRegister tmp = GetUnusedRegister(kGpReg, {}); TurboAssembler::li(tmp.gp(), Operand(value.to_i32())); sw(tmp.gp(), dst); break; } case ValueType::kI64: { - LiftoffRegister tmp = GetUnusedRegister(kGpRegPair); + LiftoffRegister tmp = GetUnusedRegister(kGpRegPair, {}); int32_t low_word = value.to_i64(); int32_t high_word = value.to_i64() >> 32; @@ -1269,6 +1269,30 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, bailout(kUnsupportedArchitecture, "kExprI32UConvertF64"); return true; } + case kExprI32SConvertSatF32: + bailout(kNonTrappingFloatToInt, "kExprI32SConvertSatF32"); + return true; + case kExprI32UConvertSatF32: + bailout(kNonTrappingFloatToInt, "kExprI32UConvertSatF32"); + return true; + case kExprI32SConvertSatF64: + bailout(kNonTrappingFloatToInt, "kExprI32SConvertSatF64"); + return true; + case kExprI32UConvertSatF64: + bailout(kNonTrappingFloatToInt, "kExprI32UConvertSatF64"); + return true; + case kExprI64SConvertSatF32: + bailout(kNonTrappingFloatToInt, "kExprI64SConvertSatF32"); + return true; + case kExprI64UConvertSatF32: + bailout(kNonTrappingFloatToInt, "kExprI64UConvertSatF32"); + return true; + case kExprI64SConvertSatF64: + bailout(kNonTrappingFloatToInt, "kExprI64SConvertSatF64"); + return true; + case kExprI64UConvertSatF64: + bailout(kNonTrappingFloatToInt, "kExprI64UConvertSatF64"); + return true; case kExprI32ReinterpretF32: mfc1(dst.gp(), src.fp()); return true; @@ -1542,6 +1566,27 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, bind(&cont); } +void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, + Register offset_reg, uint32_t offset_imm, + LoadType type, + LoadTransformationKind transform, + uint32_t* protected_load_pc) { + bailout(kSimd, "load extend and load splat unimplemented"); +} + +void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs, + const uint8_t shuffle[16]) { + bailout(kSimd, "emit_s8x16_shuffle"); +} + +void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "emit_s8x16_swizzle"); +} + void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src) { bailout(kSimd, "emit_i8x16_splat"); @@ -1739,6 +1784,21 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst, bailout(kSimd, "emit_i8x16_neg"); } +void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "emit_v8x16_anytrue"); +} + +void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "emit_v8x16_alltrue"); +} + +void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "emit_i8x16_bitmask"); +} + void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kSimd, "emit_i8x16_shl"); @@ -1749,6 +1809,28 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, bailout(kSimd, "emit_i8x16_shli"); } +void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "emit_i8x16_shr_s"); +} + +void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "emit_i8x16_shri_s"); +} + +void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "emit_i8x16_shr_u"); +} + +void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "emit_i8x16_shri_u"); +} + void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kSimd, "emit_i8x16_add"); @@ -1817,6 +1899,21 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst, bailout(kSimd, "emit_i16x8_neg"); } +void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "emit_v16x8_anytrue"); +} + +void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "emit_v16x8_alltrue"); +} + +void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "emit_i16x8_bitmask"); +} + void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kSimd, "emit_i16x8_shl"); @@ -1827,6 +1924,28 @@ void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, bailout(kSimd, "emit_i16x8_shli"); } +void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "emit_i16x8_shr_s"); +} + +void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "emit_i16x8_shri_s"); +} + +void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "emit_i16x8_shr_u"); +} + +void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "emit_i16x8_shri_u"); +} + void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kSimd, "emit_i16x8_add"); @@ -1895,6 +2014,21 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, bailout(kSimd, "emit_i32x4_neg"); } +void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "emit_v32x4_anytrue"); +} + +void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "emit_v32x4_alltrue"); +} + +void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "emit_i32x4_bitmask"); +} + void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kSimd, "emit_i32x4_shl"); @@ -1905,6 +2039,28 @@ void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, bailout(kSimd, "emit_i32x4_shli"); } +void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "emit_i32x4_shr_s"); +} + +void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "emit_i32x4_shri_s"); +} + +void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "emit_i32x4_shr_u"); +} + +void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "emit_i32x4_shri_u"); +} + void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kSimd, "emit_i32x4_add"); @@ -1959,6 +2115,28 @@ void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, bailout(kSimd, "emit_i64x2_shli"); } +void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "emit_i64x2_shr_s"); +} + +void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "emit_i64x2_shri_s"); +} + +void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "emit_i64x2_shr_u"); +} + +void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "emit_i64x2_shri_u"); +} + void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kSimd, "emit_i64x2_add"); @@ -2064,6 +2242,26 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs, bailout(kSimd, "emit_f64x2_max"); } +void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "emit_i32x4_sconvert_f32x4"); +} + +void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "emit_i32x4_uconvert_f32x4"); +} + +void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "emit_f32x4_sconvert_i32x4"); +} + +void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "emit_f32x4_uconvert_i32x4"); +} + void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { @@ -2251,7 +2449,7 @@ void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { } void LiftoffAssembler::CallTrapCallbackForTesting() { - PrepareCallCFunction(0, GetUnusedRegister(kGpReg).gp()); + PrepareCallCFunction(0, GetUnusedRegister(kGpReg, {}).gp()); CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0); } diff --git a/chromium/v8/src/wasm/baseline/mips64/liftoff-assembler-mips64.h b/chromium/v8/src/wasm/baseline/mips64/liftoff-assembler-mips64.h index 292f8032b8f..70946d3f6b5 100644 --- a/chromium/v8/src/wasm/baseline/mips64/liftoff-assembler-mips64.h +++ b/chromium/v8/src/wasm/baseline/mips64/liftoff-assembler-mips64.h @@ -532,7 +532,7 @@ void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src, void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset, ValueType type) { DCHECK_NE(dst_offset, src_offset); - LiftoffRegister reg = GetUnusedRegister(reg_class_for(type)); + LiftoffRegister reg = GetUnusedRegister(reg_class_for(type), {}); Fill(reg, src_offset, type); Spill(dst_offset, reg, type); } @@ -582,13 +582,13 @@ void LiftoffAssembler::Spill(int offset, WasmValue value) { MemOperand dst = liftoff::GetStackSlot(offset); switch (value.type().kind()) { case ValueType::kI32: { - LiftoffRegister tmp = GetUnusedRegister(kGpReg); + LiftoffRegister tmp = GetUnusedRegister(kGpReg, {}); TurboAssembler::li(tmp.gp(), Operand(value.to_i32())); sw(tmp.gp(), dst); break; } case ValueType::kI64: { - LiftoffRegister tmp = GetUnusedRegister(kGpReg); + LiftoffRegister tmp = GetUnusedRegister(kGpReg, {}); TurboAssembler::li(tmp.gp(), value.to_i64()); sd(tmp.gp(), dst); break; @@ -1177,6 +1177,30 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, case kExprF64ReinterpretI64: dmtc1(src.gp(), dst.fp()); return true; + case kExprI32SConvertSatF32: + bailout(kNonTrappingFloatToInt, "kExprI32SConvertSatF32"); + return true; + case kExprI32UConvertSatF32: + bailout(kNonTrappingFloatToInt, "kExprI32UConvertSatF32"); + return true; + case kExprI32SConvertSatF64: + bailout(kNonTrappingFloatToInt, "kExprI32SConvertSatF64"); + return true; + case kExprI32UConvertSatF64: + bailout(kNonTrappingFloatToInt, "kExprI32UConvertSatF64"); + return true; + case kExprI64SConvertSatF32: + bailout(kNonTrappingFloatToInt, "kExprI64SConvertSatF32"); + return true; + case kExprI64UConvertSatF32: + bailout(kNonTrappingFloatToInt, "kExprI64UConvertSatF32"); + return true; + case kExprI64SConvertSatF64: + bailout(kNonTrappingFloatToInt, "kExprI64SConvertSatF64"); + return true; + case kExprI64UConvertSatF64: + bailout(kNonTrappingFloatToInt, "kExprI64UConvertSatF64"); + return true; default: return false; } @@ -1297,6 +1321,26 @@ inline FPUCondition ConditionToConditionCmpFPU(Condition condition, UNREACHABLE(); } +inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister src) { + Label all_false; + assm->BranchMSA(&all_false, MSA_BRANCH_V, all_zero, src.fp().toW(), + USE_DELAY_SLOT); + assm->li(dst.gp(), 0l); + assm->li(dst.gp(), 1); + assm->bind(&all_false); +} + +inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister src, MSABranchDF msa_branch_df) { + Label all_true; + assm->BranchMSA(&all_true, msa_branch_df, all_not_zero, src.fp().toW(), + USE_DELAY_SLOT); + assm->li(dst.gp(), 1); + assm->li(dst.gp(), 0l); + assm->bind(&all_true); +} + } // namespace liftoff void LiftoffAssembler::emit_f32_set_cond(Condition cond, Register dst, @@ -1357,6 +1401,112 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, bind(&cont); } +void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, + Register offset_reg, uint32_t offset_imm, + LoadType type, + LoadTransformationKind transform, + uint32_t* protected_load_pc) { + UseScratchRegisterScope temps(this); + Register scratch = temps.Acquire(); + Daddu(scratch, src_addr, offset_reg); + MemOperand src_op = MemOperand(scratch, offset_imm); + MSARegister dst_msa = dst.fp().toW(); + *protected_load_pc = pc_offset(); + MachineType memtype = type.mem_type(); + + if (transform == LoadTransformationKind::kExtend) { + Ld(scratch, src_op); + if (memtype == MachineType::Int8()) { + fill_d(dst_msa, scratch); + clti_s_b(kSimd128ScratchReg, dst_msa, 0); + ilvr_b(dst_msa, kSimd128ScratchReg, dst_msa); + } else if (memtype == MachineType::Uint8()) { + xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero); + fill_d(dst_msa, scratch); + ilvr_b(dst_msa, kSimd128RegZero, dst_msa); + } else if (memtype == MachineType::Int16()) { + fill_d(dst_msa, scratch); + clti_s_h(kSimd128ScratchReg, dst_msa, 0); + ilvr_h(dst_msa, kSimd128ScratchReg, dst_msa); + } else if (memtype == MachineType::Uint16()) { + xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero); + fill_d(dst_msa, scratch); + ilvr_h(dst_msa, kSimd128RegZero, dst_msa); + } else if (memtype == MachineType::Int32()) { + fill_d(dst_msa, scratch); + clti_s_w(kSimd128ScratchReg, dst_msa, 0); + ilvr_w(dst_msa, kSimd128ScratchReg, dst_msa); + } else if (memtype == MachineType::Uint32()) { + xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero); + fill_d(dst_msa, scratch); + ilvr_w(dst_msa, kSimd128RegZero, dst_msa); + } + } else { + DCHECK_EQ(LoadTransformationKind::kSplat, transform); + if (memtype == MachineType::Int8()) { + Lb(scratch, src_op); + fill_b(dst_msa, scratch); + } else if (memtype == MachineType::Int16()) { + Lh(scratch, src_op); + fill_h(dst_msa, scratch); + } else if (memtype == MachineType::Int32()) { + Lw(scratch, src_op); + fill_w(dst_msa, scratch); + } else if (memtype == MachineType::Int64()) { + Ld(scratch, src_op); + fill_d(dst_msa, scratch); + } + } +} + +void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs, + const uint8_t shuffle[16]) { + MSARegister dst_msa = dst.fp().toW(); + MSARegister lhs_msa = lhs.fp().toW(); + MSARegister rhs_msa = rhs.fp().toW(); + + uint64_t control_hi = 0; + uint64_t control_low = 0; + for (int i = 7; i >= 0; i--) { + control_hi <<= 8; + control_hi |= shuffle[i + 8]; + control_low <<= 8; + control_low |= shuffle[i]; + } + + if (dst_msa == lhs_msa) { + move_v(kSimd128ScratchReg, lhs_msa); + lhs_msa = kSimd128ScratchReg; + } else if (dst_msa == rhs_msa) { + move_v(kSimd128ScratchReg, rhs_msa); + rhs_msa = kSimd128ScratchReg; + } + + li(kScratchReg, control_low); + insert_d(dst_msa, 0, kScratchReg); + li(kScratchReg, control_hi); + insert_d(dst_msa, 1, kScratchReg); + vshf_b(dst_msa, rhs_msa, lhs_msa); +} + +void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + MSARegister dst_msa = dst.fp().toW(); + MSARegister lhs_msa = lhs.fp().toW(); + MSARegister rhs_msa = rhs.fp().toW(); + + if (dst == lhs) { + move_v(kSimd128ScratchReg, lhs_msa); + lhs_msa = kSimd128ScratchReg; + } + xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero); + move_v(dst_msa, rhs_msa); + vshf_b(dst_msa, kSimd128RegZero, lhs_msa); +} + void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src) { fill_b(dst.fp().toW(), src.gp()); @@ -1567,6 +1717,32 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst, subv_b(dst.fp().toW(), kSimd128RegZero, src.fp().toW()); } +void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue(this, dst, src, MSA_BRANCH_B); +} + +void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + MSARegister scratch0 = kSimd128RegZero; + MSARegister scratch1 = kSimd128ScratchReg; + srli_b(scratch0, src.fp().toW(), 7); + srli_h(scratch1, scratch0, 7); + or_v(scratch0, scratch0, scratch1); + srli_w(scratch1, scratch0, 14); + or_v(scratch0, scratch0, scratch1); + srli_d(scratch1, scratch0, 28); + or_v(scratch0, scratch0, scratch1); + shf_w(scratch1, scratch0, 0x0E); + ilvev_b(scratch0, scratch1, scratch0); + copy_u_h(dst.gp(), scratch0, 0); +} + void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { fill_b(kSimd128ScratchReg, rhs.gp()); @@ -1578,6 +1754,30 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, slli_b(dst.fp().toW(), lhs.fp().toW(), rhs & 7); } +void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + fill_b(kSimd128ScratchReg, rhs.gp()); + sra_b(dst.fp().toW(), lhs.fp().toW(), kSimd128ScratchReg); +} + +void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + srai_b(dst.fp().toW(), lhs.fp().toW(), rhs & 7); +} + +void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + fill_b(kSimd128ScratchReg, rhs.gp()); + srl_b(dst.fp().toW(), lhs.fp().toW(), kSimd128ScratchReg); +} + +void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + srli_b(dst.fp().toW(), lhs.fp().toW(), rhs & 7); +} + void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { addv_b(dst.fp().toW(), lhs.fp().toW(), rhs.fp().toW()); @@ -1647,6 +1847,31 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst, subv_h(dst.fp().toW(), kSimd128RegZero, src.fp().toW()); } +void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue(this, dst, src, MSA_BRANCH_H); +} + +void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + MSARegister scratch0 = kSimd128RegZero; + MSARegister scratch1 = kSimd128ScratchReg; + srli_h(scratch0, src.fp().toW(), 15); + srli_w(scratch1, scratch0, 15); + or_v(scratch0, scratch0, scratch1); + srli_d(scratch1, scratch0, 30); + or_v(scratch0, scratch0, scratch1); + shf_w(scratch1, scratch0, 0x0E); + slli_d(scratch1, scratch1, 4); + or_v(scratch0, scratch0, scratch1); + copy_u_b(dst.gp(), scratch0, 0); +} + void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { fill_h(kSimd128ScratchReg, rhs.gp()); @@ -1658,6 +1883,30 @@ void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, slli_h(dst.fp().toW(), lhs.fp().toW(), rhs & 15); } +void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + fill_h(kSimd128ScratchReg, rhs.gp()); + sra_h(dst.fp().toW(), lhs.fp().toW(), kSimd128ScratchReg); +} + +void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + srai_h(dst.fp().toW(), lhs.fp().toW(), rhs & 15); +} + +void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + fill_h(kSimd128ScratchReg, rhs.gp()); + srl_h(dst.fp().toW(), lhs.fp().toW(), kSimd128ScratchReg); +} + +void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + srli_h(dst.fp().toW(), lhs.fp().toW(), rhs & 15); +} + void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { addv_h(dst.fp().toW(), lhs.fp().toW(), rhs.fp().toW()); @@ -1727,6 +1976,29 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, subv_w(dst.fp().toW(), kSimd128RegZero, src.fp().toW()); } +void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue(this, dst, src, MSA_BRANCH_W); +} + +void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + MSARegister scratch0 = kSimd128RegZero; + MSARegister scratch1 = kSimd128ScratchReg; + srli_w(scratch0, src.fp().toW(), 31); + srli_d(scratch1, scratch0, 31); + or_v(scratch0, scratch0, scratch1); + shf_w(scratch1, scratch0, 0x0E); + slli_d(scratch1, scratch1, 2); + or_v(scratch0, scratch0, scratch1); + copy_u_b(dst.gp(), scratch0, 0); +} + void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { fill_w(kSimd128ScratchReg, rhs.gp()); @@ -1738,6 +2010,30 @@ void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, slli_w(dst.fp().toW(), lhs.fp().toW(), rhs & 31); } +void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + fill_w(kSimd128ScratchReg, rhs.gp()); + sra_w(dst.fp().toW(), lhs.fp().toW(), kSimd128ScratchReg); +} + +void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + srai_w(dst.fp().toW(), lhs.fp().toW(), rhs & 31); +} + +void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + fill_w(kSimd128ScratchReg, rhs.gp()); + srl_w(dst.fp().toW(), lhs.fp().toW(), kSimd128ScratchReg); +} + +void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + srli_w(dst.fp().toW(), lhs.fp().toW(), rhs & 31); +} + void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { addv_w(dst.fp().toW(), lhs.fp().toW(), rhs.fp().toW()); @@ -1794,6 +2090,30 @@ void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, slli_d(dst.fp().toW(), lhs.fp().toW(), rhs & 63); } +void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + fill_d(kSimd128ScratchReg, rhs.gp()); + sra_d(dst.fp().toW(), lhs.fp().toW(), kSimd128ScratchReg); +} + +void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + srai_d(dst.fp().toW(), lhs.fp().toW(), rhs & 63); +} + +void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + fill_d(kSimd128ScratchReg, rhs.gp()); + srl_d(dst.fp().toW(), lhs.fp().toW(), kSimd128ScratchReg); +} + +void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + srli_d(dst.fp().toW(), lhs.fp().toW(), rhs & 63); +} + void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { addv_d(dst.fp().toW(), lhs.fp().toW(), rhs.fp().toW()); @@ -1959,6 +2279,26 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs, bsel_v(dst_msa, scratch0, scratch1); } +void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + ftrunc_s_w(dst.fp().toW(), src.fp().toW()); +} + +void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + ftrunc_u_w(dst.fp().toW(), src.fp().toW()); +} + +void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + ffint_s_w(dst.fp().toW(), src.fp().toW()); +} + +void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + ffint_u_w(dst.fp().toW(), src.fp().toW()); +} + void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { @@ -2197,7 +2537,7 @@ void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { } void LiftoffAssembler::CallTrapCallbackForTesting() { - PrepareCallCFunction(0, GetUnusedRegister(kGpReg).gp()); + PrepareCallCFunction(0, GetUnusedRegister(kGpReg, {}).gp()); CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0); } diff --git a/chromium/v8/src/wasm/baseline/ppc/liftoff-assembler-ppc.h b/chromium/v8/src/wasm/baseline/ppc/liftoff-assembler-ppc.h index e02ab95ae4b..920dda4fe6c 100644 --- a/chromium/v8/src/wasm/baseline/ppc/liftoff-assembler-ppc.h +++ b/chromium/v8/src/wasm/baseline/ppc/liftoff-assembler-ppc.h @@ -539,6 +539,20 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, bailout(kUnsupportedArchitecture, "emit_f64_set_cond"); } +void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, + Register offset_reg, uint32_t offset_imm, + LoadType type, + LoadTransformationKind transform, + uint32_t* protected_load_pc) { + bailout(kSimd, "Load transform unimplemented"); +} + +void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kUnsupportedArchitecture, "emit_s8x16_swizzle"); +} + void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, LiftoffRegister src) { bailout(kUnsupportedArchitecture, "emit_f64x2splat"); @@ -698,6 +712,28 @@ void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, bailout(kSimd, "i64x2_shli"); } +void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i64x2_shr_s"); +} + +void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i64x2_shri_s"); +} + +void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i64x2_shr_u"); +} + +void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i64x2_shri_u"); +} + void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kUnsupportedArchitecture, "emit_i64x2add"); @@ -736,6 +772,21 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, bailout(kUnsupportedArchitecture, "emit_i32x4neg"); } +void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "v32x4_anytrue"); +} + +void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "v32x4_alltrue"); +} + +void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "i32x4_bitmask"); +} + void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kSimd, "i32x4_shl"); @@ -746,6 +797,28 @@ void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, bailout(kSimd, "i32x4_shli"); } +void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i32x4_shr_s"); +} + +void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i32x4_shri_s"); +} + +void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i32x4_shr_u"); +} + +void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i32x4_shri_u"); +} + void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kUnsupportedArchitecture, "emit_i32x4add"); @@ -795,6 +868,21 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst, bailout(kUnsupportedArchitecture, "emit_i16x8neg"); } +void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "v16x8_anytrue"); +} + +void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "v16x8_alltrue"); +} + +void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "i16x8_bitmask"); +} + void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kSimd, "i16x8_shl"); @@ -805,6 +893,28 @@ void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, bailout(kSimd, "i16x8_shli"); } +void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i16x8_shr_s"); +} + +void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i16x8_shri_s"); +} + +void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i16x8_shr_u"); +} + +void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i16x8_shri_u"); +} + void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kUnsupportedArchitecture, "emit_i16x8add"); @@ -887,6 +997,13 @@ void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst, bailout(kUnsupportedArchitecture, "emit_i16x8extractlane_s"); } +void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs, + const uint8_t shuffle[16]) { + bailout(kSimd, "s8x16_shuffle"); +} + void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src) { bailout(kUnsupportedArchitecture, "emit_i8x16splat"); @@ -910,6 +1027,21 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst, bailout(kUnsupportedArchitecture, "emit_i8x16neg"); } +void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "v8x16_anytrue"); +} + +void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "v8x16_alltrue"); +} + +void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "i8x16_bitmask"); +} + void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kSimd, "i8x16_shl"); @@ -920,6 +1052,28 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, bailout(kSimd, "i8x16_shli"); } +void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i8x16_shr_s"); +} + +void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i8x16_shri_s"); +} + +void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i8x16_shr_u"); +} + +void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i8x16_shri_u"); +} + void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx) { @@ -1117,6 +1271,26 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst, bailout(kUnsupportedArchitecture, "emit_s128select"); } +void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "i32x4_sconvert_f32x4"); +} + +void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "i32x4_uconvert_f32x4"); +} + +void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "f32x4_sconvert_i32x4"); +} + +void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "f32x4_uconvert_i32x4"); +} + void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { diff --git a/chromium/v8/src/wasm/baseline/s390/liftoff-assembler-s390.h b/chromium/v8/src/wasm/baseline/s390/liftoff-assembler-s390.h index 704fcb81d74..803358c97e7 100644 --- a/chromium/v8/src/wasm/baseline/s390/liftoff-assembler-s390.h +++ b/chromium/v8/src/wasm/baseline/s390/liftoff-assembler-s390.h @@ -543,6 +543,20 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, bailout(kUnsupportedArchitecture, "emit_f64_set_cond"); } +void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, + Register offset_reg, uint32_t offset_imm, + LoadType type, + LoadTransformationKind transform, + uint32_t* protected_load_pc) { + bailout(kSimd, "Load transform unimplemented"); +} + +void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kUnsupportedArchitecture, "emit_s8x16_swizzle"); +} + void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, LiftoffRegister src) { bailout(kUnsupportedArchitecture, "emit_f64x2splat"); @@ -702,6 +716,28 @@ void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, bailout(kSimd, "i64x2_shli"); } +void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i64x2_shr_s"); +} + +void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i64x2_shri_s"); +} + +void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i64x2_shr_u"); +} + +void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i64x2_shri_u"); +} + void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kUnsupportedArchitecture, "emit_i64x2add"); @@ -740,6 +776,21 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, bailout(kUnsupportedArchitecture, "emit_i32x4neg"); } +void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "v32x4_anytrue"); +} + +void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "v32x4_alltrue"); +} + +void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "i32x4_bitmask"); +} + void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kSimd, "i32x4_shl"); @@ -750,6 +801,28 @@ void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, bailout(kSimd, "i32x4_shli"); } +void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i32x4_shr_s"); +} + +void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i32x4_shri_s"); +} + +void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i32x4_shr_u"); +} + +void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i32x4_shri_u"); +} + void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kUnsupportedArchitecture, "emit_i32x4add"); @@ -799,6 +872,21 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst, bailout(kUnsupportedArchitecture, "emit_i16x8neg"); } +void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "v16x8_anytrue"); +} + +void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "v16x8_alltrue"); +} + +void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "i16x8_bitmask"); +} + void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kSimd, "i16x8_shl"); @@ -809,6 +897,28 @@ void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, bailout(kSimd, "i16x8_shli"); } +void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i16x8_shr_s"); +} + +void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i16x8_shri_s"); +} + +void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i16x8_shr_u"); +} + +void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i16x8_shri_u"); +} + void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kUnsupportedArchitecture, "emit_i16x8add"); @@ -891,6 +1001,13 @@ void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst, bailout(kUnsupportedArchitecture, "emit_i16x8extractlane_s"); } +void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs, + const uint8_t shuffle[16]) { + bailout(kSimd, "s8x16_shuffle"); +} + void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src) { bailout(kUnsupportedArchitecture, "emit_i8x16splat"); @@ -920,6 +1037,21 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst, bailout(kUnsupportedArchitecture, "emit_i8x16neg"); } +void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "v8x16_anytrue"); +} + +void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "v8x16_alltrue"); +} + +void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "i8x16_bitmask"); +} + void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kSimd, "i8x16_shl"); @@ -930,6 +1062,28 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, bailout(kSimd, "i8x16_shli"); } +void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i8x16_shr_s"); +} + +void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i8x16_shri_s"); +} + +void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + bailout(kSimd, "i8x16_shr_u"); +} + +void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + bailout(kSimd, "i8x16_shri_u"); +} + void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { bailout(kUnsupportedArchitecture, "emit_i8x16add"); @@ -1149,6 +1303,26 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst, bailout(kUnsupportedArchitecture, "emit_s128select"); } +void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "i32x4_sconvert_f32x4"); +} + +void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "i32x4_uconvert_f32x4"); +} + +void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "f32x4_sconvert_i32x4"); +} + +void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + bailout(kSimd, "f32x4_uconvert_i32x4"); +} + void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { diff --git a/chromium/v8/src/wasm/baseline/x64/liftoff-assembler-x64.h b/chromium/v8/src/wasm/baseline/x64/liftoff-assembler-x64.h index 7638c4f9cc0..83571a18f4c 100644 --- a/chromium/v8/src/wasm/baseline/x64/liftoff-assembler-x64.h +++ b/chromium/v8/src/wasm/baseline/x64/liftoff-assembler-x64.h @@ -8,7 +8,6 @@ #include "src/wasm/baseline/liftoff-assembler.h" #include "src/codegen/assembler.h" -#include "src/wasm/value-type.h" namespace v8 { namespace internal { @@ -306,8 +305,6 @@ void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr, case LoadType::kS128Load: Movdqu(dst.fp(), src_op); break; - default: - UNREACHABLE(); } } @@ -345,8 +342,6 @@ void LiftoffAssembler::Store(Register dst_addr, Register offset_reg, case StoreType::kS128Store: Movdqu(dst_op, src.fp()); break; - default: - UNREACHABLE(); } } @@ -1060,10 +1055,10 @@ inline void EmitShiftOperation(LiftoffAssembler* assm, Register dst, void (Assembler::*emit_shift)(Register)) { // If dst is rcx, compute into the scratch register first, then move to rcx. if (dst == rcx) { - assm->Move(kScratchRegister, src, ValueType(type)); - if (amount != rcx) assm->Move(rcx, amount, ValueType(type)); + assm->Move(kScratchRegister, src, ValueType::Primitive(type)); + if (amount != rcx) assm->Move(rcx, amount, ValueType::Primitive(type)); (assm->*emit_shift)(kScratchRegister); - assm->Move(rcx, kScratchRegister, ValueType(type)); + assm->Move(rcx, kScratchRegister, ValueType::Primitive(type)); return; } @@ -1075,11 +1070,11 @@ inline void EmitShiftOperation(LiftoffAssembler* assm, Register dst, src == rcx || assm->cache_state()->is_used(LiftoffRegister(rcx)); if (use_scratch) assm->movq(kScratchRegister, rcx); if (src == rcx) src = kScratchRegister; - assm->Move(rcx, amount, ValueType(type)); + assm->Move(rcx, amount, ValueType::Primitive(type)); } // Do the actual shift. - if (dst != src) assm->Move(dst, src, ValueType(type)); + if (dst != src) assm->Move(dst, src, ValueType::Primitive(type)); (assm->*emit_shift)(dst); // Restore rcx if needed. @@ -1620,6 +1615,7 @@ void LiftoffAssembler::emit_f64_sqrt(DoubleRegister dst, DoubleRegister src) { } namespace liftoff { +#define __ assm-> // Used for float to int conversions. If the value in {converted_back} equals // {src} afterwards, the conversion succeeded. template <typename dst_type, typename src_type> @@ -1628,29 +1624,29 @@ inline void ConvertFloatToIntAndBack(LiftoffAssembler* assm, Register dst, DoubleRegister converted_back) { if (std::is_same<double, src_type>::value) { // f64 if (std::is_same<int32_t, dst_type>::value) { // f64 -> i32 - assm->Cvttsd2si(dst, src); - assm->Cvtlsi2sd(converted_back, dst); + __ Cvttsd2si(dst, src); + __ Cvtlsi2sd(converted_back, dst); } else if (std::is_same<uint32_t, dst_type>::value) { // f64 -> u32 - assm->Cvttsd2siq(dst, src); - assm->movl(dst, dst); - assm->Cvtqsi2sd(converted_back, dst); + __ Cvttsd2siq(dst, src); + __ movl(dst, dst); + __ Cvtqsi2sd(converted_back, dst); } else if (std::is_same<int64_t, dst_type>::value) { // f64 -> i64 - assm->Cvttsd2siq(dst, src); - assm->Cvtqsi2sd(converted_back, dst); + __ Cvttsd2siq(dst, src); + __ Cvtqsi2sd(converted_back, dst); } else { UNREACHABLE(); } } else { // f32 if (std::is_same<int32_t, dst_type>::value) { // f32 -> i32 - assm->Cvttss2si(dst, src); - assm->Cvtlsi2ss(converted_back, dst); + __ Cvttss2si(dst, src); + __ Cvtlsi2ss(converted_back, dst); } else if (std::is_same<uint32_t, dst_type>::value) { // f32 -> u32 - assm->Cvttss2siq(dst, src); - assm->movl(dst, dst); - assm->Cvtqsi2ss(converted_back, dst); + __ Cvttss2siq(dst, src); + __ movl(dst, dst); + __ Cvtqsi2ss(converted_back, dst); } else if (std::is_same<int64_t, dst_type>::value) { // f32 -> i64 - assm->Cvttss2siq(dst, src); - assm->Cvtqsi2ss(converted_back, dst); + __ Cvttss2siq(dst, src); + __ Cvtqsi2ss(converted_back, dst); } else { UNREACHABLE(); } @@ -1661,7 +1657,7 @@ template <typename dst_type, typename src_type> inline bool EmitTruncateFloatToInt(LiftoffAssembler* assm, Register dst, DoubleRegister src, Label* trap) { if (!CpuFeatures::IsSupported(SSE4_1)) { - assm->bailout(kMissingCPUFeature, "no SSE4.1"); + __ bailout(kMissingCPUFeature, "no SSE4.1"); return true; } CpuFeatureScope feature(assm, SSE4_1); @@ -1670,24 +1666,143 @@ inline bool EmitTruncateFloatToInt(LiftoffAssembler* assm, Register dst, DoubleRegister converted_back = kScratchDoubleReg2; if (std::is_same<double, src_type>::value) { // f64 - assm->Roundsd(rounded, src, kRoundToZero); + __ Roundsd(rounded, src, kRoundToZero); } else { // f32 - assm->Roundss(rounded, src, kRoundToZero); + __ Roundss(rounded, src, kRoundToZero); } ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded, converted_back); if (std::is_same<double, src_type>::value) { // f64 - assm->Ucomisd(converted_back, rounded); + __ Ucomisd(converted_back, rounded); } else { // f32 - assm->Ucomiss(converted_back, rounded); + __ Ucomiss(converted_back, rounded); } // Jump to trap if PF is 0 (one of the operands was NaN) or they are not // equal. - assm->j(parity_even, trap); - assm->j(not_equal, trap); + __ j(parity_even, trap); + __ j(not_equal, trap); + return true; +} + +template <typename dst_type, typename src_type> +inline bool EmitSatTruncateFloatToInt(LiftoffAssembler* assm, Register dst, + DoubleRegister src) { + if (!CpuFeatures::IsSupported(SSE4_1)) { + __ bailout(kMissingCPUFeature, "no SSE4.1"); + return true; + } + CpuFeatureScope feature(assm, SSE4_1); + + Label done; + Label not_nan; + Label src_positive; + + DoubleRegister rounded = kScratchDoubleReg; + DoubleRegister converted_back = kScratchDoubleReg2; + DoubleRegister zero_reg = kScratchDoubleReg; + + if (std::is_same<double, src_type>::value) { // f64 + __ Roundsd(rounded, src, kRoundToZero); + } else { // f32 + __ Roundss(rounded, src, kRoundToZero); + } + + ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded, + converted_back); + if (std::is_same<double, src_type>::value) { // f64 + __ Ucomisd(converted_back, rounded); + } else { // f32 + __ Ucomiss(converted_back, rounded); + } + + // Return 0 if PF is 0 (one of the operands was NaN) + __ j(parity_odd, ¬_nan); + __ xorl(dst, dst); + __ jmp(&done); + + __ bind(¬_nan); + // If rounding is as expected, return result + __ j(equal, &done); + + __ xorpd(zero_reg, zero_reg); + + // if out-of-bounds, check if src is positive + if (std::is_same<double, src_type>::value) { // f64 + __ Ucomisd(src, zero_reg); + } else { // f32 + __ Ucomiss(src, zero_reg); + } + __ j(above, &src_positive); + if (std::is_same<int32_t, dst_type>::value || + std::is_same<uint32_t, dst_type>::value) { // i32 + __ movl( + dst, + Immediate(static_cast<int32_t>(std::numeric_limits<dst_type>::min()))); + } else if (std::is_same<int64_t, dst_type>::value) { // i64s + __ movq(dst, Immediate64(std::numeric_limits<dst_type>::min())); + } else { + UNREACHABLE(); + } + __ jmp(&done); + + __ bind(&src_positive); + if (std::is_same<int32_t, dst_type>::value || + std::is_same<uint32_t, dst_type>::value) { // i32 + __ movl( + dst, + Immediate(static_cast<int32_t>(std::numeric_limits<dst_type>::max()))); + } else if (std::is_same<int64_t, dst_type>::value) { // i64s + __ movq(dst, Immediate64(std::numeric_limits<dst_type>::max())); + } else { + UNREACHABLE(); + } + + __ bind(&done); + return true; +} + +template <typename src_type> +inline bool EmitSatTruncateFloatToUInt64(LiftoffAssembler* assm, Register dst, + DoubleRegister src) { + if (!CpuFeatures::IsSupported(SSE4_1)) { + __ bailout(kMissingCPUFeature, "no SSE4.1"); + return true; + } + CpuFeatureScope feature(assm, SSE4_1); + + Label done; + Label neg_or_nan; + Label overflow; + + DoubleRegister zero_reg = kScratchDoubleReg; + + __ xorpd(zero_reg, zero_reg); + if (std::is_same<double, src_type>::value) { // f64 + __ Ucomisd(src, zero_reg); + } else { // f32 + __ Ucomiss(src, zero_reg); + } + // Check if NaN + __ j(parity_even, &neg_or_nan); + __ j(below, &neg_or_nan); + if (std::is_same<double, src_type>::value) { // f64 + __ Cvttsd2uiq(dst, src, &overflow); + } else { // f32 + __ Cvttss2uiq(dst, src, &overflow); + } + __ jmp(&done); + + __ bind(&neg_or_nan); + __ movq(dst, zero_reg); + __ jmp(&done); + + __ bind(&overflow); + __ movq(dst, Immediate64(std::numeric_limits<uint64_t>::max())); + __ bind(&done); return true; } +#undef __ } // namespace liftoff bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, @@ -1709,6 +1824,18 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, case kExprI32UConvertF64: return liftoff::EmitTruncateFloatToInt<uint32_t, double>(this, dst.gp(), src.fp(), trap); + case kExprI32SConvertSatF32: + return liftoff::EmitSatTruncateFloatToInt<int32_t, float>(this, dst.gp(), + src.fp()); + case kExprI32UConvertSatF32: + return liftoff::EmitSatTruncateFloatToInt<uint32_t, float>(this, dst.gp(), + src.fp()); + case kExprI32SConvertSatF64: + return liftoff::EmitSatTruncateFloatToInt<int32_t, double>(this, dst.gp(), + src.fp()); + case kExprI32UConvertSatF64: + return liftoff::EmitSatTruncateFloatToInt<uint32_t, double>( + this, dst.gp(), src.fp()); case kExprI32ReinterpretF32: Movd(dst.gp(), src.fp()); return true; @@ -1731,6 +1858,20 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, Cvttsd2uiq(dst.gp(), src.fp(), trap); return true; } + case kExprI64SConvertSatF32: + return liftoff::EmitSatTruncateFloatToInt<int64_t, float>(this, dst.gp(), + src.fp()); + case kExprI64UConvertSatF32: { + return liftoff::EmitSatTruncateFloatToUInt64<float>(this, dst.gp(), + src.fp()); + } + case kExprI64SConvertSatF64: + return liftoff::EmitSatTruncateFloatToInt<int64_t, double>(this, dst.gp(), + src.fp()); + case kExprI64UConvertSatF64: { + return liftoff::EmitSatTruncateFloatToUInt64<double>(this, dst.gp(), + src.fp()); + } case kExprI64UConvertI32: AssertZeroExtended(src.gp()); if (dst.gp() != src.gp()) movl(dst.gp(), src.gp()); @@ -1975,8 +2116,185 @@ void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst, (assm->*sse_op)(dst.fp(), shift); } } + +template <bool is_signed> +void EmitI8x16Shr(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister lhs, LiftoffRegister rhs) { + // Same algorithm as the one in code-generator-x64.cc. + assm->Punpckhbw(kScratchDoubleReg, lhs.fp()); + assm->Punpcklbw(dst.fp(), lhs.fp()); + // Prepare shift value + assm->movq(kScratchRegister, rhs.gp()); + // Take shift value modulo 8. + assm->andq(kScratchRegister, Immediate(7)); + assm->addq(kScratchRegister, Immediate(8)); + assm->Movq(liftoff::kScratchDoubleReg2, kScratchRegister); + if (is_signed) { + assm->Psraw(kScratchDoubleReg, liftoff::kScratchDoubleReg2); + assm->Psraw(dst.fp(), liftoff::kScratchDoubleReg2); + assm->Packsswb(dst.fp(), kScratchDoubleReg); + } else { + assm->Psrlw(kScratchDoubleReg, liftoff::kScratchDoubleReg2); + assm->Psrlw(dst.fp(), liftoff::kScratchDoubleReg2); + assm->Packuswb(dst.fp(), kScratchDoubleReg); + } +} + +// Can be used by both the immediate and register version of the shifts. psraq +// is only available in AVX512, so we can't use it yet. +template <typename ShiftOperand> +void EmitI64x2ShrS(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister lhs, ShiftOperand rhs, + bool shift_is_rcx = false) { + bool restore_rcx = false; + Register backup = kScratchRegister2; + if (!shift_is_rcx) { + if (assm->cache_state()->is_used(LiftoffRegister(rcx))) { + restore_rcx = true; + assm->movq(backup, rcx); + } + assm->movl(rcx, rhs); + } + + Register tmp = kScratchRegister; + + assm->Pextrq(tmp, lhs.fp(), int8_t{0x0}); + assm->sarq_cl(tmp); + assm->Pinsrq(dst.fp(), tmp, int8_t{0x0}); + + assm->Pextrq(tmp, lhs.fp(), int8_t{0x1}); + assm->sarq_cl(tmp); + assm->Pinsrq(dst.fp(), tmp, int8_t{0x1}); + + // restore rcx. + if (restore_rcx) { + assm->movq(rcx, backup); + } +} + +inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister src) { + assm->xorq(dst.gp(), dst.gp()); + assm->Ptest(src.fp(), src.fp()); + assm->setcc(not_equal, dst.gp()); +} + +template <void (TurboAssembler::*pcmp)(XMMRegister, XMMRegister)> +inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister src) { + XMMRegister tmp = kScratchDoubleReg; + assm->xorq(dst.gp(), dst.gp()); + assm->Pxor(tmp, tmp); + (assm->*pcmp)(tmp, src.fp()); + assm->Ptest(tmp, tmp); + assm->setcc(equal, dst.gp()); +} + } // namespace liftoff +void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, + Register offset_reg, uint32_t offset_imm, + LoadType type, + LoadTransformationKind transform, + uint32_t* protected_load_pc) { + if (emit_debug_code() && offset_reg != no_reg) { + AssertZeroExtended(offset_reg); + } + Operand src_op = liftoff::GetMemOp(this, src_addr, offset_reg, offset_imm); + *protected_load_pc = pc_offset(); + MachineType memtype = type.mem_type(); + if (transform == LoadTransformationKind::kExtend) { + if (memtype == MachineType::Int8()) { + Pmovsxbw(dst.fp(), src_op); + } else if (memtype == MachineType::Uint8()) { + Pmovzxbw(dst.fp(), src_op); + } else if (memtype == MachineType::Int16()) { + Pmovsxwd(dst.fp(), src_op); + } else if (memtype == MachineType::Uint16()) { + Pmovzxwd(dst.fp(), src_op); + } else if (memtype == MachineType::Int32()) { + Pmovsxdq(dst.fp(), src_op); + } else if (memtype == MachineType::Uint32()) { + Pmovzxdq(dst.fp(), src_op); + } + } else { + DCHECK_EQ(LoadTransformationKind::kSplat, transform); + if (memtype == MachineType::Int8()) { + Pinsrb(dst.fp(), src_op, 0); + Pxor(kScratchDoubleReg, kScratchDoubleReg); + Pshufb(dst.fp(), kScratchDoubleReg); + } else if (memtype == MachineType::Int16()) { + Pinsrw(dst.fp(), src_op, 0); + Pshuflw(dst.fp(), dst.fp(), uint8_t{0}); + Punpcklqdq(dst.fp(), dst.fp()); + } else if (memtype == MachineType::Int32()) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope avx_scope(this, AVX); + vbroadcastss(dst.fp(), src_op); + } else { + Movss(dst.fp(), src_op); + Shufps(dst.fp(), dst.fp(), byte{0}); + } + } else if (memtype == MachineType::Int64()) { + Movddup(dst.fp(), src_op); + } + } +} + +void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs, + const uint8_t shuffle[16]) { + LiftoffRegister tmp_simd = + GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dst, lhs, rhs)); + Movups(kScratchDoubleReg, lhs.fp()); + + uint64_t mask1[2] = {}; + for (int i = 15; i >= 0; i--) { + uint8_t lane = shuffle[i]; + int j = i >> 3; + mask1[j] <<= 8; + mask1[j] |= lane < kSimd128Size ? lane : 0x80; + } + TurboAssembler::Move(tmp_simd.fp(), mask1[0]); + movq(kScratchRegister, mask1[1]); + Pinsrq(tmp_simd.fp(), kScratchRegister, int8_t{1}); + Pshufb(kScratchDoubleReg, tmp_simd.fp()); + + uint64_t mask2[2] = {}; + for (int i = 15; i >= 0; i--) { + uint8_t lane = shuffle[i]; + int j = i >> 3; + mask2[j] <<= 8; + mask2[j] |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80; + } + TurboAssembler::Move(tmp_simd.fp(), mask2[0]); + movq(kScratchRegister, mask2[1]); + Pinsrq(tmp_simd.fp(), kScratchRegister, int8_t{1}); + + if (dst.fp() != rhs.fp()) { + Movups(dst.fp(), rhs.fp()); + } + Pshufb(dst.fp(), tmp_simd.fp()); + Por(dst.fp(), kScratchDoubleReg); +} + +void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + XMMRegister mask = + GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(lhs, rhs)).fp(); + // Out-of-range indices should return 0, add 112 (0x70) so that any value > 15 + // saturates to 128 (top bit set), so pshufb will zero that lane. + TurboAssembler::Move(mask, uint32_t{0x70707070}); + Pshufd(mask, mask, uint8_t{0x0}); + Paddusb(mask, rhs.fp()); + if (lhs != dst) { + Movaps(dst.fp(), lhs.fp()); + } + Pshufb(dst.fp(), mask); +} + void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src) { Movd(dst.fp(), src.gp()); @@ -2302,6 +2620,21 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst, } } +void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqb>(this, dst, src); +} + +void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + Pmovmskb(dst.gp(), src.fp()); +} + void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { static constexpr RegClass tmp_simd_rc = reg_class_for(ValueType::kS128); @@ -2347,6 +2680,48 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, Pand(dst.fp(), kScratchDoubleReg); } +void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitI8x16Shr</*is_signed=*/true>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + Punpckhbw(kScratchDoubleReg, lhs.fp()); + Punpcklbw(dst.fp(), lhs.fp()); + uint8_t shift = (rhs & 7) + 8; + Psraw(kScratchDoubleReg, shift); + Psraw(dst.fp(), shift); + Packsswb(dst.fp(), kScratchDoubleReg); +} + +void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitI8x16Shr</*is_signed=*/false>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + // Perform 16-bit shift, then mask away high bits. + uint8_t shift = rhs & 7; // i.InputInt3(1); + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vpsrlw(dst.fp(), lhs.fp(), byte{shift}); + } else if (dst != lhs) { + Movaps(dst.fp(), lhs.fp()); + psrlw(dst.fp(), byte{shift}); + } + + uint8_t bmask = 0xff >> shift; + uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask; + movl(kScratchRegister, Immediate(mask)); + Movd(kScratchDoubleReg, kScratchRegister); + Pshufd(kScratchDoubleReg, kScratchDoubleReg, byte{0}); + Pand(dst.fp(), kScratchDoubleReg); +} + void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>( @@ -2489,6 +2864,24 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst, } } +void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqw>(this, dst, src); +} + +void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + XMMRegister tmp = kScratchDoubleReg; + Packsswb(tmp, src.fp()); + Pmovmskb(dst.gp(), tmp); + shrq(dst.gp(), Immediate(8)); +} + void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(this, dst, @@ -2501,6 +2894,32 @@ void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, this, dst, lhs, rhs); } +void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsraw, &Assembler::psraw, 4>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsraw, &Assembler::psraw, 4>( + this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsrlw, &Assembler::psrlw, 4>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlw, &Assembler::psrlw, 4>( + this, dst, lhs, rhs); +} + void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>( @@ -2587,6 +3006,21 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, } } +void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqd>(this, dst, src); +} + +void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + Movmskps(dst.gp(), src.fp()); +} + void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(this, dst, @@ -2599,6 +3033,32 @@ void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, this, dst, lhs, rhs); } +void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsrad, &Assembler::psrad, 5>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsrad, &Assembler::psrad, 5>( + this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsrld, &Assembler::psrld, 5>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsrld, &Assembler::psrld, 5>( + this, dst, lhs, rhs); +} + void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>( @@ -2670,6 +3130,31 @@ void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, this, dst, lhs, rhs); } +void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitI64x2ShrS(this, dst, lhs, rhs.gp(), + /*shift_is_rcx=*/rhs.gp() == rcx); +} + +void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitI64x2ShrS(this, dst, lhs, Immediate(rhs)); +} + +void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsrlq, &Assembler::psrlq, 6>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>( + this, dst, lhs, rhs); +} + void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>( @@ -2937,6 +3422,89 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs, Andnpd(dst.fp(), kScratchDoubleReg); } +void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + // NAN->0 + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vcmpeqps(kScratchDoubleReg, src.fp(), src.fp()); + vpand(dst.fp(), src.fp(), kScratchDoubleReg); + } else { + movaps(kScratchDoubleReg, src.fp()); + cmpeqps(kScratchDoubleReg, kScratchDoubleReg); + if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp()); + pand(dst.fp(), kScratchDoubleReg); + } + // Set top bit if >= 0 (but not -0.0!). + Pxor(kScratchDoubleReg, dst.fp()); + // Convert to int. + Cvttps2dq(dst.fp(), dst.fp()); + // Set top bit if >=0 is now < 0. + Pand(kScratchDoubleReg, dst.fp()); + Psrad(kScratchDoubleReg, byte{31}); + // Set positive overflow lanes to 0x7FFFFFFF. + Pxor(dst.fp(), kScratchDoubleReg); +} + +void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + // NAN->0, negative->0. + Pxor(kScratchDoubleReg, kScratchDoubleReg); + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vmaxps(dst.fp(), src.fp(), kScratchDoubleReg); + } else { + if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp()); + maxps(dst.fp(), kScratchDoubleReg); + } + // scratch: float representation of max_signed. + Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); + Psrld(kScratchDoubleReg, uint8_t{1}); // 0x7fffffff + Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000 + // scratch2: convert (src-max_signed). + // Set positive overflow lanes to 0x7FFFFFFF. + // Set negative lanes to 0. + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vsubps(liftoff::kScratchDoubleReg2, dst.fp(), kScratchDoubleReg); + } else { + movaps(liftoff::kScratchDoubleReg2, dst.fp()); + subps(liftoff::kScratchDoubleReg2, kScratchDoubleReg); + } + Cmpleps(kScratchDoubleReg, liftoff::kScratchDoubleReg2); + Cvttps2dq(liftoff::kScratchDoubleReg2, liftoff::kScratchDoubleReg2); + Pxor(liftoff::kScratchDoubleReg2, kScratchDoubleReg); + Pxor(kScratchDoubleReg, kScratchDoubleReg); + Pmaxsd(liftoff::kScratchDoubleReg2, kScratchDoubleReg); + // Convert to int. Overflow lanes above max_signed will be 0x80000000. + Cvttps2dq(dst.fp(), dst.fp()); + // Add (src-max_signed) for overflow lanes. + Paddd(dst.fp(), liftoff::kScratchDoubleReg2); +} + +void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + Cvtdq2ps(dst.fp(), src.fp()); +} + +void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + Pxor(kScratchDoubleReg, kScratchDoubleReg); // Zeros. + Pblendw(kScratchDoubleReg, src.fp(), uint8_t{0x55}); // Get lo 16 bits. + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vpsubd(dst.fp(), src.fp(), kScratchDoubleReg); // Get hi 16 bits. + } else { + if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp()); + psubd(dst.fp(), kScratchDoubleReg); + } + Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // Convert lo exactly. + Psrld(dst.fp(), byte{1}); // Divide by 2 to get in unsigned range. + Cvtdq2ps(dst.fp(), dst.fp()); // Convert hi, exactly. + Addps(dst.fp(), dst.fp()); // Double hi, exactly. + Addps(dst.fp(), kScratchDoubleReg); // Add hi and lo, may round. +} + void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { |