diff options
Diffstat (limited to 'chromium/v8/src/wasm/baseline/ia32')
-rw-r--r-- | chromium/v8/src/wasm/baseline/ia32/liftoff-assembler-ia32.h | 625 |
1 files changed, 591 insertions, 34 deletions
diff --git a/chromium/v8/src/wasm/baseline/ia32/liftoff-assembler-ia32.h b/chromium/v8/src/wasm/baseline/ia32/liftoff-assembler-ia32.h index 7a1d629bf2d..468450aef66 100644 --- a/chromium/v8/src/wasm/baseline/ia32/liftoff-assembler-ia32.h +++ b/chromium/v8/src/wasm/baseline/ia32/liftoff-assembler-ia32.h @@ -130,7 +130,7 @@ inline Register GetTmpByteRegister(LiftoffAssembler* assm, Register candidate) { if (candidate.is_byte_register()) return candidate; // {GetUnusedRegister()} may insert move instructions to spill registers to // the stack. This is OK because {mov} does not change the status flags. - return assm->GetUnusedRegister(liftoff::kByteRegs).gp(); + return assm->GetUnusedRegister(liftoff::kByteRegs, {}).gp(); } inline void MoveStackValue(LiftoffAssembler* assm, const Operand& src, @@ -336,8 +336,6 @@ void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr, case LoadType::kS128Load: movdqu(dst.fp(), src_op); break; - default: - UNREACHABLE(); } } @@ -405,8 +403,6 @@ void LiftoffAssembler::Store(Register dst_addr, Register offset_reg, case StoreType::kS128Store: Movdqu(dst_op, src.fp()); break; - default: - UNREACHABLE(); } } @@ -494,7 +490,56 @@ void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg, void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type) { - bailout(kAtomics, "AtomicAdd"); + if (type.value() == StoreType::kI64Store) { + bailout(kAtomics, "AtomicAdd"); + return; + } + + DCHECK_EQ(value, result); + DCHECK(!cache_state()->is_used(result)); + bool is_64_bit_op = type.value_type() == kWasmI64; + + Register value_reg = is_64_bit_op ? value.low_gp() : value.gp(); + Register result_reg = is_64_bit_op ? result.low_gp() : result.gp(); + + bool is_byte_store = type.size() == 1; + LiftoffRegList pinned = + LiftoffRegList::ForRegs(dst_addr, value_reg, offset_reg); + + // Ensure that {value_reg} is a valid register. + if (is_byte_store && !liftoff::kByteRegs.has(value_reg)) { + Register safe_value_reg = + GetUnusedRegister(liftoff::kByteRegs, pinned).gp(); + mov(safe_value_reg, value_reg); + value_reg = safe_value_reg; + } + + Operand dst_op = Operand(dst_addr, offset_reg, times_1, offset_imm); + lock(); + switch (type.value()) { + case StoreType::kI64Store8: + case StoreType::kI32Store8: + xadd_b(dst_op, value_reg); + movzx_b(result_reg, value_reg); + break; + case StoreType::kI64Store16: + case StoreType::kI32Store16: + xadd_w(dst_op, value_reg); + movzx_w(result_reg, value_reg); + break; + case StoreType::kI64Store32: + case StoreType::kI32Store: + xadd(dst_op, value_reg); + if (value_reg != result_reg) { + mov(result_reg, value_reg); + } + break; + default: + UNREACHABLE(); + } + if (is_64_bit_op) { + xor_(result.high_gp(), result.high_gp()); + } } void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg, @@ -1349,7 +1394,7 @@ inline void EmitFloatMinOrMax(LiftoffAssembler* assm, DoubleRegister dst, // We need one tmp register to extract the sign bit. Get it right at the // beginning, such that the spilling code is not accidentially jumped over. - Register tmp = assm->GetUnusedRegister(kGpReg).gp(); + Register tmp = assm->GetUnusedRegister(kGpReg, {}).gp(); #define dop(name, ...) \ do { \ @@ -1412,9 +1457,9 @@ void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs, void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs, DoubleRegister rhs) { static constexpr int kF32SignBit = 1 << 31; - Register scratch = GetUnusedRegister(kGpReg).gp(); - Register scratch2 = - GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(scratch)).gp(); + LiftoffRegList pinned; + Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp(); + Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp(); Movd(scratch, lhs); // move {lhs} into {scratch}. and_(scratch, Immediate(~kF32SignBit)); // clear sign bit in {scratch}. Movd(scratch2, rhs); // move {rhs} into {scratch2}. @@ -1541,9 +1586,9 @@ void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs, static constexpr int kF32SignBit = 1 << 31; // On ia32, we cannot hold the whole f64 value in a gp register, so we just // operate on the upper half (UH). - Register scratch = GetUnusedRegister(kGpReg).gp(); - Register scratch2 = - GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(scratch)).gp(); + LiftoffRegList pinned; + Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp(); + Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp(); Pextrd(scratch, lhs, 1); // move UH of {lhs} into {scratch}. and_(scratch, Immediate(~kF32SignBit)); // clear sign bit in {scratch}. @@ -1612,6 +1657,7 @@ void LiftoffAssembler::emit_f64_sqrt(DoubleRegister dst, DoubleRegister src) { } namespace liftoff { +#define __ assm-> // Used for float to int conversions. If the value in {converted_back} equals // {src} afterwards, the conversion succeeded. template <typename dst_type, typename src_type> @@ -1621,21 +1667,21 @@ inline void ConvertFloatToIntAndBack(LiftoffAssembler* assm, Register dst, LiftoffRegList pinned) { if (std::is_same<double, src_type>::value) { // f64 if (std::is_signed<dst_type>::value) { // f64 -> i32 - assm->cvttsd2si(dst, src); - assm->Cvtsi2sd(converted_back, dst); + __ cvttsd2si(dst, src); + __ Cvtsi2sd(converted_back, dst); } else { // f64 -> u32 - assm->Cvttsd2ui(dst, src, liftoff::kScratchDoubleReg); - assm->Cvtui2sd(converted_back, dst, - assm->GetUnusedRegister(kGpReg, pinned).gp()); + __ Cvttsd2ui(dst, src, liftoff::kScratchDoubleReg); + __ Cvtui2sd(converted_back, dst, + __ GetUnusedRegister(kGpReg, pinned).gp()); } } else { // f32 if (std::is_signed<dst_type>::value) { // f32 -> i32 - assm->cvttss2si(dst, src); - assm->Cvtsi2ss(converted_back, dst); + __ cvttss2si(dst, src); + __ Cvtsi2ss(converted_back, dst); } else { // f32 -> u32 - assm->Cvttss2ui(dst, src, liftoff::kScratchDoubleReg); - assm->Cvtui2ss(converted_back, dst, - assm->GetUnusedRegister(kGpReg, pinned).gp()); + __ Cvttss2ui(dst, src, liftoff::kScratchDoubleReg); + __ Cvtui2ss(converted_back, dst, + __ GetUnusedRegister(kGpReg, pinned).gp()); } } } @@ -1644,36 +1690,101 @@ template <typename dst_type, typename src_type> inline bool EmitTruncateFloatToInt(LiftoffAssembler* assm, Register dst, DoubleRegister src, Label* trap) { if (!CpuFeatures::IsSupported(SSE4_1)) { - assm->bailout(kMissingCPUFeature, "no SSE4.1"); + __ bailout(kMissingCPUFeature, "no SSE4.1"); return true; } CpuFeatureScope feature(assm, SSE4_1); LiftoffRegList pinned = LiftoffRegList::ForRegs(src, dst); DoubleRegister rounded = - pinned.set(assm->GetUnusedRegister(kFpReg, pinned)).fp(); + pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp(); DoubleRegister converted_back = - pinned.set(assm->GetUnusedRegister(kFpReg, pinned)).fp(); + pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp(); if (std::is_same<double, src_type>::value) { // f64 - assm->roundsd(rounded, src, kRoundToZero); + __ roundsd(rounded, src, kRoundToZero); } else { // f32 - assm->roundss(rounded, src, kRoundToZero); + __ roundss(rounded, src, kRoundToZero); } ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded, converted_back, pinned); if (std::is_same<double, src_type>::value) { // f64 - assm->ucomisd(converted_back, rounded); + __ ucomisd(converted_back, rounded); } else { // f32 - assm->ucomiss(converted_back, rounded); + __ ucomiss(converted_back, rounded); } // Jump to trap if PF is 0 (one of the operands was NaN) or they are not // equal. - assm->j(parity_even, trap); - assm->j(not_equal, trap); + __ j(parity_even, trap); + __ j(not_equal, trap); return true; } + +template <typename dst_type, typename src_type> +inline bool EmitSatTruncateFloatToInt(LiftoffAssembler* assm, Register dst, + DoubleRegister src) { + if (!CpuFeatures::IsSupported(SSE4_1)) { + __ bailout(kMissingCPUFeature, "no SSE4.1"); + return true; + } + CpuFeatureScope feature(assm, SSE4_1); + + Label done; + Label not_nan; + Label src_positive; + + LiftoffRegList pinned = LiftoffRegList::ForRegs(src, dst); + DoubleRegister rounded = + pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp(); + DoubleRegister converted_back = + pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp(); + DoubleRegister zero_reg = + pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp(); + + if (std::is_same<double, src_type>::value) { // f64 + __ roundsd(rounded, src, kRoundToZero); + } else { // f32 + __ roundss(rounded, src, kRoundToZero); + } + + ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded, + converted_back, pinned); + if (std::is_same<double, src_type>::value) { // f64 + __ ucomisd(converted_back, rounded); + } else { // f32 + __ ucomiss(converted_back, rounded); + } + + // Return 0 if PF is 0 (one of the operands was NaN) + __ j(parity_odd, ¬_nan); + __ xor_(dst, dst); + __ jmp(&done); + + __ bind(¬_nan); + // If rounding is as expected, return result + __ j(equal, &done); + + __ Xorpd(zero_reg, zero_reg); + + // if out-of-bounds, check if src is positive + if (std::is_same<double, src_type>::value) { // f64 + __ ucomisd(src, zero_reg); + } else { // f32 + __ ucomiss(src, zero_reg); + } + __ j(above, &src_positive); + __ mov(dst, Immediate(std::numeric_limits<dst_type>::min())); + __ jmp(&done); + + __ bind(&src_positive); + + __ mov(dst, Immediate(std::numeric_limits<dst_type>::max())); + + __ bind(&done); + return true; +} +#undef __ } // namespace liftoff bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, @@ -1695,6 +1806,18 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, case kExprI32UConvertF64: return liftoff::EmitTruncateFloatToInt<uint32_t, double>(this, dst.gp(), src.fp(), trap); + case kExprI32SConvertSatF32: + return liftoff::EmitSatTruncateFloatToInt<int32_t, float>(this, dst.gp(), + src.fp()); + case kExprI32UConvertSatF32: + return liftoff::EmitSatTruncateFloatToInt<uint32_t, float>(this, dst.gp(), + src.fp()); + case kExprI32SConvertSatF64: + return liftoff::EmitSatTruncateFloatToInt<int32_t, double>(this, dst.gp(), + src.fp()); + case kExprI32UConvertSatF64: + return liftoff::EmitSatTruncateFloatToInt<uint32_t, double>( + this, dst.gp(), src.fp()); case kExprI32ReinterpretF32: Movd(dst.gp(), src.fp()); return true; @@ -2017,8 +2140,164 @@ void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst, (assm->*sse_op)(dst.fp(), shift); } } + +enum class ShiftSignedness { kSigned, kUnsigned }; + +template <bool is_signed> +void EmitI8x16Shr(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister lhs, LiftoffRegister rhs) { + // Same algorithm is used for both signed and unsigned shifts, the only + // difference is the actual shift and pack in the end. This is the same + // algorithm as used in code-generator-ia32.cc + Register tmp = + assm->GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(rhs)).gp(); + XMMRegister tmp_simd = + assm->GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dst, lhs)).fp(); + + // Unpack the bytes into words, do logical shifts, and repack. + assm->Punpckhbw(liftoff::kScratchDoubleReg, lhs.fp()); + assm->Punpcklbw(dst.fp(), lhs.fp()); + assm->mov(tmp, rhs.gp()); + // Take shift value modulo 8. + assm->and_(tmp, 7); + assm->add(tmp, Immediate(8)); + assm->Movd(tmp_simd, tmp); + if (is_signed) { + assm->Psraw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, + tmp_simd); + assm->Psraw(dst.fp(), dst.fp(), tmp_simd); + assm->Packsswb(dst.fp(), liftoff::kScratchDoubleReg); + } else { + assm->Psrlw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, + tmp_simd); + assm->Psrlw(dst.fp(), dst.fp(), tmp_simd); + assm->Packuswb(dst.fp(), liftoff::kScratchDoubleReg); + } +} + +inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister src) { + Register tmp = + assm->GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(dst)).gp(); + assm->xor_(tmp, tmp); + assm->mov(dst.gp(), Immediate(1)); + assm->Ptest(src.fp(), src.fp()); + assm->cmov(zero, dst.gp(), tmp); +} + +template <void (TurboAssembler::*pcmp)(XMMRegister, XMMRegister)> +inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister src) { + Register tmp = + assm->GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(dst)).gp(); + XMMRegister tmp_simd = liftoff::kScratchDoubleReg; + assm->mov(tmp, Immediate(1)); + assm->xor_(dst.gp(), dst.gp()); + assm->Pxor(tmp_simd, tmp_simd); + (assm->*pcmp)(tmp_simd, src.fp()); + assm->Ptest(tmp_simd, tmp_simd); + assm->cmov(zero, dst.gp(), tmp); +} + } // namespace liftoff +void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, + Register offset_reg, uint32_t offset_imm, + LoadType type, + LoadTransformationKind transform, + uint32_t* protected_load_pc) { + DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max()); + Operand src_op{src_addr, offset_reg, times_1, + static_cast<int32_t>(offset_imm)}; + *protected_load_pc = pc_offset(); + + MachineType memtype = type.mem_type(); + if (transform == LoadTransformationKind::kExtend) { + if (memtype == MachineType::Int8()) { + Pmovsxbw(dst.fp(), src_op); + } else if (memtype == MachineType::Uint8()) { + Pmovzxbw(dst.fp(), src_op); + } else if (memtype == MachineType::Int16()) { + Pmovsxwd(dst.fp(), src_op); + } else if (memtype == MachineType::Uint16()) { + Pmovzxwd(dst.fp(), src_op); + } else if (memtype == MachineType::Int32()) { + Pmovsxdq(dst.fp(), src_op); + } else if (memtype == MachineType::Uint32()) { + Pmovzxdq(dst.fp(), src_op); + } + } else { + DCHECK_EQ(LoadTransformationKind::kSplat, transform); + if (memtype == MachineType::Int8()) { + Pinsrb(dst.fp(), src_op, 0); + Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); + Pshufb(dst.fp(), liftoff::kScratchDoubleReg); + } else if (memtype == MachineType::Int16()) { + Pinsrw(dst.fp(), src_op, 0); + Pshuflw(dst.fp(), dst.fp(), uint8_t{0}); + Punpcklqdq(dst.fp(), dst.fp()); + } else if (memtype == MachineType::Int32()) { + Vbroadcastss(dst.fp(), src_op); + } else if (memtype == MachineType::Int64()) { + Movddup(dst.fp(), src_op); + } + } +} + +void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs, + const uint8_t shuffle[16]) { + LiftoffRegister tmp = GetUnusedRegister(kGpReg, {}); + // Prepare 16 byte aligned buffer for shuffle control mask. + mov(tmp.gp(), esp); + and_(esp, -16); + movups(liftoff::kScratchDoubleReg, lhs.fp()); + + for (int i = 3; i >= 0; i--) { + uint32_t mask = 0; + for (int j = 3; j >= 0; j--) { + uint8_t lane = shuffle[i * 4 + j]; + mask <<= 8; + mask |= lane < kSimd128Size ? lane : 0x80; + } + push(Immediate(mask)); + } + Pshufb(liftoff::kScratchDoubleReg, Operand(esp, 0)); + + for (int i = 3; i >= 0; i--) { + uint32_t mask = 0; + for (int j = 3; j >= 0; j--) { + uint8_t lane = shuffle[i * 4 + j]; + mask <<= 8; + mask |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80; + } + push(Immediate(mask)); + } + if (dst.fp() != rhs.fp()) { + movups(dst.fp(), rhs.fp()); + } + Pshufb(dst.fp(), Operand(esp, 0)); + Por(dst.fp(), liftoff::kScratchDoubleReg); + mov(esp, tmp.gp()); +} + +void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + XMMRegister mask = + GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(lhs, rhs)).fp(); + // Out-of-range indices should return 0, add 112 (0x70) so that any value > 15 + // saturates to 128 (top bit set), so pshufb will zero that lane. + TurboAssembler::Move(mask, uint32_t{0x70707070}); + Pshufd(mask, mask, uint8_t{0x0}); + Paddusb(mask, rhs.fp()); + if (lhs != dst) { + Movaps(dst.fp(), lhs.fp()); + } + Pshufb(dst.fp(), mask); +} + void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src) { Movd(dst.fp(), src.gp()); @@ -2350,6 +2629,21 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst, } } +void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqb>(this, dst, src); +} + +void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + Pmovmskb(dst.gp(), src.fp()); +} + void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { static constexpr RegClass tmp_rc = reg_class_for(ValueType::kI32); @@ -2381,7 +2675,7 @@ void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { static constexpr RegClass tmp_rc = reg_class_for(ValueType::kI32); - LiftoffRegister tmp = GetUnusedRegister(tmp_rc); + LiftoffRegister tmp = GetUnusedRegister(tmp_rc, {}); byte shift = static_cast<byte>(rhs & 0x7); if (CpuFeatures::IsSupported(AVX)) { CpuFeatureScope scope(this, AVX); @@ -2399,6 +2693,43 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, Pand(dst.fp(), liftoff::kScratchDoubleReg); } +void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitI8x16Shr</*is_signed=*/true>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + Punpckhbw(liftoff::kScratchDoubleReg, lhs.fp()); + Punpcklbw(dst.fp(), lhs.fp()); + uint8_t shift = (rhs & 7) + 8; + Psraw(liftoff::kScratchDoubleReg, shift); + Psraw(dst.fp(), shift); + Packsswb(dst.fp(), liftoff::kScratchDoubleReg); +} + +void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitI8x16Shr</*is_signed=*/false>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + Register tmp = GetUnusedRegister(kGpReg, {}).gp(); + // Perform 16-bit shift, then mask away high bits. + uint8_t shift = rhs & 7; + Psrlw(dst.fp(), lhs.fp(), byte{shift}); + + uint8_t bmask = 0xff >> shift; + uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask; + mov(tmp, mask); + Movd(liftoff::kScratchDoubleReg, tmp); + Pshufd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, 0); + Pand(dst.fp(), liftoff::kScratchDoubleReg); +} + void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>( @@ -2541,6 +2872,24 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst, } } +void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqw>(this, dst, src); +} + +void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + XMMRegister tmp = liftoff::kScratchDoubleReg; + Packsswb(tmp, src.fp()); + Pmovmskb(dst.gp(), tmp); + shr(dst.gp(), 8); +} + void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(this, dst, @@ -2553,6 +2902,32 @@ void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, this, dst, lhs, rhs); } +void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsraw, &Assembler::psraw, 4>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsraw, &Assembler::psraw, 4>( + this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsrlw, &Assembler::psrlw, 4>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlw, &Assembler::psrlw, 4>( + this, dst, lhs, rhs); +} + void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>( @@ -2639,6 +3014,21 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, } } +void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqd>(this, dst, src); +} + +void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + Movmskps(dst.gp(), src.fp()); +} + void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(this, dst, @@ -2651,6 +3041,32 @@ void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, this, dst, lhs, rhs); } +void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsrad, &Assembler::psrad, 5>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsrad, &Assembler::psrad, 5>( + this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsrld, &Assembler::psrld, 5>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsrld, &Assembler::psrld, 5>( + this, dst, lhs, rhs); +} + void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>( @@ -2723,6 +3139,56 @@ void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, this, dst, lhs, rhs); } +void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + XMMRegister shift = liftoff::kScratchDoubleReg; + XMMRegister tmp = + GetUnusedRegister(RegClass::kFpReg, LiftoffRegList::ForRegs(dst, lhs)) + .fp(); + + // Take shift value modulo 64. + and_(rhs.gp(), Immediate(63)); + Movd(shift, rhs.gp()); + + // Set up a mask [0x80000000,0,0x80000000,0]. + Pcmpeqb(tmp, tmp); + Psllq(tmp, tmp, 63); + + Psrlq(tmp, tmp, shift); + Psrlq(dst.fp(), lhs.fp(), shift); + Pxor(dst.fp(), tmp); + Psubq(dst.fp(), tmp); +} + +void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + XMMRegister tmp = liftoff::kScratchDoubleReg; + int32_t shift = rhs & 63; + + // Set up a mask [0x80000000,0,0x80000000,0]. + Pcmpeqb(tmp, tmp); + Psllq(tmp, tmp, 63); + + Psrlq(tmp, tmp, shift); + Psrlq(dst.fp(), lhs.fp(), shift); + Pxor(dst.fp(), tmp); + Psubq(dst.fp(), tmp); +} + +void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShiftOp<&Assembler::vpsrlq, &Assembler::psrlq, 6>(this, dst, + lhs, rhs); +} + +void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>( + this, dst, lhs, rhs); +} + void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>( @@ -2990,6 +3456,97 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs, Andnpd(dst.fp(), liftoff::kScratchDoubleReg); } +void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + // NAN->0 + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vcmpeqps(liftoff::kScratchDoubleReg, src.fp(), src.fp()); + vpand(dst.fp(), src.fp(), liftoff::kScratchDoubleReg); + } else { + movaps(liftoff::kScratchDoubleReg, src.fp()); + cmpeqps(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); + if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp()); + pand(dst.fp(), liftoff::kScratchDoubleReg); + } + // Set top bit if >= 0 (but not -0.0!). + Pxor(liftoff::kScratchDoubleReg, dst.fp()); + // Convert to int. + Cvttps2dq(dst.fp(), dst.fp()); + // Set top bit if >=0 is now < 0. + Pand(liftoff::kScratchDoubleReg, dst.fp()); + Psrad(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{31}); + // Set positive overflow lanes to 0x7FFFFFFF. + Pxor(dst.fp(), liftoff::kScratchDoubleReg); +} + +void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + static constexpr RegClass tmp_rc = reg_class_for(ValueType::kS128); + DoubleRegister tmp = + GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, src)).fp(); + // NAN->0, negative->0. + Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vmaxps(dst.fp(), src.fp(), liftoff::kScratchDoubleReg); + } else { + if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp()); + maxps(dst.fp(), liftoff::kScratchDoubleReg); + } + // scratch: float representation of max_signed. + Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); + Psrld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, + uint8_t{1}); // 0x7fffffff + Cvtdq2ps(liftoff::kScratchDoubleReg, + liftoff::kScratchDoubleReg); // 0x4f000000 + // tmp: convert (src-max_signed). + // Set positive overflow lanes to 0x7FFFFFFF. + // Set negative lanes to 0. + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vsubps(tmp, dst.fp(), liftoff::kScratchDoubleReg); + } else { + movaps(tmp, dst.fp()); + subps(tmp, liftoff::kScratchDoubleReg); + } + Cmpleps(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, tmp); + Cvttps2dq(tmp, tmp); + Pxor(tmp, liftoff::kScratchDoubleReg); + Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); + Pmaxsd(tmp, liftoff::kScratchDoubleReg); + // Convert to int. Overflow lanes above max_signed will be 0x80000000. + Cvttps2dq(dst.fp(), dst.fp()); + // Add (src-max_signed) for overflow lanes. + Paddd(dst.fp(), dst.fp(), tmp); +} + +void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + Cvtdq2ps(dst.fp(), src.fp()); +} + +void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); // Zeros. + Pblendw(liftoff::kScratchDoubleReg, src.fp(), + uint8_t{0x55}); // Get lo 16 bits. + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vpsubd(dst.fp(), src.fp(), liftoff::kScratchDoubleReg); // Get hi 16 bits. + } else { + if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp()); + psubd(dst.fp(), liftoff::kScratchDoubleReg); + } + Cvtdq2ps(liftoff::kScratchDoubleReg, + liftoff::kScratchDoubleReg); // Convert lo exactly. + Psrld(dst.fp(), dst.fp(), byte{1}); // Divide by 2 to get in unsigned range. + Cvtdq2ps(dst.fp(), dst.fp()); // Convert hi, exactly. + Addps(dst.fp(), dst.fp(), dst.fp()); // Double hi, exactly. + Addps(dst.fp(), dst.fp(), + liftoff::kScratchDoubleReg); // Add hi and lo, may round. +} + void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { @@ -3270,7 +3827,7 @@ void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { } void LiftoffAssembler::CallTrapCallbackForTesting() { - PrepareCallCFunction(0, GetUnusedRegister(kGpReg).gp()); + PrepareCallCFunction(0, GetUnusedRegister(kGpReg, {}).gp()); CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0); } |