summaryrefslogtreecommitdiff
path: root/chromium/v8/src/wasm/baseline/ia32
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2020-10-12 14:27:29 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2020-10-13 09:35:20 +0000
commitc30a6232df03e1efbd9f3b226777b07e087a1122 (patch)
treee992f45784689f373bcc38d1b79a239ebe17ee23 /chromium/v8/src/wasm/baseline/ia32
parent7b5b123ac58f58ffde0f4f6e488bcd09aa4decd3 (diff)
downloadqtwebengine-chromium-85-based.tar.gz
BASELINE: Update Chromium to 85.0.4183.14085-based
Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/v8/src/wasm/baseline/ia32')
-rw-r--r--chromium/v8/src/wasm/baseline/ia32/liftoff-assembler-ia32.h625
1 files changed, 591 insertions, 34 deletions
diff --git a/chromium/v8/src/wasm/baseline/ia32/liftoff-assembler-ia32.h b/chromium/v8/src/wasm/baseline/ia32/liftoff-assembler-ia32.h
index 7a1d629bf2d..468450aef66 100644
--- a/chromium/v8/src/wasm/baseline/ia32/liftoff-assembler-ia32.h
+++ b/chromium/v8/src/wasm/baseline/ia32/liftoff-assembler-ia32.h
@@ -130,7 +130,7 @@ inline Register GetTmpByteRegister(LiftoffAssembler* assm, Register candidate) {
if (candidate.is_byte_register()) return candidate;
// {GetUnusedRegister()} may insert move instructions to spill registers to
// the stack. This is OK because {mov} does not change the status flags.
- return assm->GetUnusedRegister(liftoff::kByteRegs).gp();
+ return assm->GetUnusedRegister(liftoff::kByteRegs, {}).gp();
}
inline void MoveStackValue(LiftoffAssembler* assm, const Operand& src,
@@ -336,8 +336,6 @@ void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
case LoadType::kS128Load:
movdqu(dst.fp(), src_op);
break;
- default:
- UNREACHABLE();
}
}
@@ -405,8 +403,6 @@ void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
case StoreType::kS128Store:
Movdqu(dst_op, src.fp());
break;
- default:
- UNREACHABLE();
}
}
@@ -494,7 +490,56 @@ void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
LiftoffRegister result, StoreType type) {
- bailout(kAtomics, "AtomicAdd");
+ if (type.value() == StoreType::kI64Store) {
+ bailout(kAtomics, "AtomicAdd");
+ return;
+ }
+
+ DCHECK_EQ(value, result);
+ DCHECK(!cache_state()->is_used(result));
+ bool is_64_bit_op = type.value_type() == kWasmI64;
+
+ Register value_reg = is_64_bit_op ? value.low_gp() : value.gp();
+ Register result_reg = is_64_bit_op ? result.low_gp() : result.gp();
+
+ bool is_byte_store = type.size() == 1;
+ LiftoffRegList pinned =
+ LiftoffRegList::ForRegs(dst_addr, value_reg, offset_reg);
+
+ // Ensure that {value_reg} is a valid register.
+ if (is_byte_store && !liftoff::kByteRegs.has(value_reg)) {
+ Register safe_value_reg =
+ GetUnusedRegister(liftoff::kByteRegs, pinned).gp();
+ mov(safe_value_reg, value_reg);
+ value_reg = safe_value_reg;
+ }
+
+ Operand dst_op = Operand(dst_addr, offset_reg, times_1, offset_imm);
+ lock();
+ switch (type.value()) {
+ case StoreType::kI64Store8:
+ case StoreType::kI32Store8:
+ xadd_b(dst_op, value_reg);
+ movzx_b(result_reg, value_reg);
+ break;
+ case StoreType::kI64Store16:
+ case StoreType::kI32Store16:
+ xadd_w(dst_op, value_reg);
+ movzx_w(result_reg, value_reg);
+ break;
+ case StoreType::kI64Store32:
+ case StoreType::kI32Store:
+ xadd(dst_op, value_reg);
+ if (value_reg != result_reg) {
+ mov(result_reg, value_reg);
+ }
+ break;
+ default:
+ UNREACHABLE();
+ }
+ if (is_64_bit_op) {
+ xor_(result.high_gp(), result.high_gp());
+ }
}
void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
@@ -1349,7 +1394,7 @@ inline void EmitFloatMinOrMax(LiftoffAssembler* assm, DoubleRegister dst,
// We need one tmp register to extract the sign bit. Get it right at the
// beginning, such that the spilling code is not accidentially jumped over.
- Register tmp = assm->GetUnusedRegister(kGpReg).gp();
+ Register tmp = assm->GetUnusedRegister(kGpReg, {}).gp();
#define dop(name, ...) \
do { \
@@ -1412,9 +1457,9 @@ void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs,
void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
DoubleRegister rhs) {
static constexpr int kF32SignBit = 1 << 31;
- Register scratch = GetUnusedRegister(kGpReg).gp();
- Register scratch2 =
- GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(scratch)).gp();
+ LiftoffRegList pinned;
+ Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
+ Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
Movd(scratch, lhs); // move {lhs} into {scratch}.
and_(scratch, Immediate(~kF32SignBit)); // clear sign bit in {scratch}.
Movd(scratch2, rhs); // move {rhs} into {scratch2}.
@@ -1541,9 +1586,9 @@ void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
static constexpr int kF32SignBit = 1 << 31;
// On ia32, we cannot hold the whole f64 value in a gp register, so we just
// operate on the upper half (UH).
- Register scratch = GetUnusedRegister(kGpReg).gp();
- Register scratch2 =
- GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(scratch)).gp();
+ LiftoffRegList pinned;
+ Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
+ Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
Pextrd(scratch, lhs, 1); // move UH of {lhs} into {scratch}.
and_(scratch, Immediate(~kF32SignBit)); // clear sign bit in {scratch}.
@@ -1612,6 +1657,7 @@ void LiftoffAssembler::emit_f64_sqrt(DoubleRegister dst, DoubleRegister src) {
}
namespace liftoff {
+#define __ assm->
// Used for float to int conversions. If the value in {converted_back} equals
// {src} afterwards, the conversion succeeded.
template <typename dst_type, typename src_type>
@@ -1621,21 +1667,21 @@ inline void ConvertFloatToIntAndBack(LiftoffAssembler* assm, Register dst,
LiftoffRegList pinned) {
if (std::is_same<double, src_type>::value) { // f64
if (std::is_signed<dst_type>::value) { // f64 -> i32
- assm->cvttsd2si(dst, src);
- assm->Cvtsi2sd(converted_back, dst);
+ __ cvttsd2si(dst, src);
+ __ Cvtsi2sd(converted_back, dst);
} else { // f64 -> u32
- assm->Cvttsd2ui(dst, src, liftoff::kScratchDoubleReg);
- assm->Cvtui2sd(converted_back, dst,
- assm->GetUnusedRegister(kGpReg, pinned).gp());
+ __ Cvttsd2ui(dst, src, liftoff::kScratchDoubleReg);
+ __ Cvtui2sd(converted_back, dst,
+ __ GetUnusedRegister(kGpReg, pinned).gp());
}
} else { // f32
if (std::is_signed<dst_type>::value) { // f32 -> i32
- assm->cvttss2si(dst, src);
- assm->Cvtsi2ss(converted_back, dst);
+ __ cvttss2si(dst, src);
+ __ Cvtsi2ss(converted_back, dst);
} else { // f32 -> u32
- assm->Cvttss2ui(dst, src, liftoff::kScratchDoubleReg);
- assm->Cvtui2ss(converted_back, dst,
- assm->GetUnusedRegister(kGpReg, pinned).gp());
+ __ Cvttss2ui(dst, src, liftoff::kScratchDoubleReg);
+ __ Cvtui2ss(converted_back, dst,
+ __ GetUnusedRegister(kGpReg, pinned).gp());
}
}
}
@@ -1644,36 +1690,101 @@ template <typename dst_type, typename src_type>
inline bool EmitTruncateFloatToInt(LiftoffAssembler* assm, Register dst,
DoubleRegister src, Label* trap) {
if (!CpuFeatures::IsSupported(SSE4_1)) {
- assm->bailout(kMissingCPUFeature, "no SSE4.1");
+ __ bailout(kMissingCPUFeature, "no SSE4.1");
return true;
}
CpuFeatureScope feature(assm, SSE4_1);
LiftoffRegList pinned = LiftoffRegList::ForRegs(src, dst);
DoubleRegister rounded =
- pinned.set(assm->GetUnusedRegister(kFpReg, pinned)).fp();
+ pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
DoubleRegister converted_back =
- pinned.set(assm->GetUnusedRegister(kFpReg, pinned)).fp();
+ pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
if (std::is_same<double, src_type>::value) { // f64
- assm->roundsd(rounded, src, kRoundToZero);
+ __ roundsd(rounded, src, kRoundToZero);
} else { // f32
- assm->roundss(rounded, src, kRoundToZero);
+ __ roundss(rounded, src, kRoundToZero);
}
ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
converted_back, pinned);
if (std::is_same<double, src_type>::value) { // f64
- assm->ucomisd(converted_back, rounded);
+ __ ucomisd(converted_back, rounded);
} else { // f32
- assm->ucomiss(converted_back, rounded);
+ __ ucomiss(converted_back, rounded);
}
// Jump to trap if PF is 0 (one of the operands was NaN) or they are not
// equal.
- assm->j(parity_even, trap);
- assm->j(not_equal, trap);
+ __ j(parity_even, trap);
+ __ j(not_equal, trap);
return true;
}
+
+template <typename dst_type, typename src_type>
+inline bool EmitSatTruncateFloatToInt(LiftoffAssembler* assm, Register dst,
+ DoubleRegister src) {
+ if (!CpuFeatures::IsSupported(SSE4_1)) {
+ __ bailout(kMissingCPUFeature, "no SSE4.1");
+ return true;
+ }
+ CpuFeatureScope feature(assm, SSE4_1);
+
+ Label done;
+ Label not_nan;
+ Label src_positive;
+
+ LiftoffRegList pinned = LiftoffRegList::ForRegs(src, dst);
+ DoubleRegister rounded =
+ pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
+ DoubleRegister converted_back =
+ pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
+ DoubleRegister zero_reg =
+ pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
+
+ if (std::is_same<double, src_type>::value) { // f64
+ __ roundsd(rounded, src, kRoundToZero);
+ } else { // f32
+ __ roundss(rounded, src, kRoundToZero);
+ }
+
+ ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
+ converted_back, pinned);
+ if (std::is_same<double, src_type>::value) { // f64
+ __ ucomisd(converted_back, rounded);
+ } else { // f32
+ __ ucomiss(converted_back, rounded);
+ }
+
+ // Return 0 if PF is 0 (one of the operands was NaN)
+ __ j(parity_odd, &not_nan);
+ __ xor_(dst, dst);
+ __ jmp(&done);
+
+ __ bind(&not_nan);
+ // If rounding is as expected, return result
+ __ j(equal, &done);
+
+ __ Xorpd(zero_reg, zero_reg);
+
+ // if out-of-bounds, check if src is positive
+ if (std::is_same<double, src_type>::value) { // f64
+ __ ucomisd(src, zero_reg);
+ } else { // f32
+ __ ucomiss(src, zero_reg);
+ }
+ __ j(above, &src_positive);
+ __ mov(dst, Immediate(std::numeric_limits<dst_type>::min()));
+ __ jmp(&done);
+
+ __ bind(&src_positive);
+
+ __ mov(dst, Immediate(std::numeric_limits<dst_type>::max()));
+
+ __ bind(&done);
+ return true;
+}
+#undef __
} // namespace liftoff
bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
@@ -1695,6 +1806,18 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
case kExprI32UConvertF64:
return liftoff::EmitTruncateFloatToInt<uint32_t, double>(this, dst.gp(),
src.fp(), trap);
+ case kExprI32SConvertSatF32:
+ return liftoff::EmitSatTruncateFloatToInt<int32_t, float>(this, dst.gp(),
+ src.fp());
+ case kExprI32UConvertSatF32:
+ return liftoff::EmitSatTruncateFloatToInt<uint32_t, float>(this, dst.gp(),
+ src.fp());
+ case kExprI32SConvertSatF64:
+ return liftoff::EmitSatTruncateFloatToInt<int32_t, double>(this, dst.gp(),
+ src.fp());
+ case kExprI32UConvertSatF64:
+ return liftoff::EmitSatTruncateFloatToInt<uint32_t, double>(
+ this, dst.gp(), src.fp());
case kExprI32ReinterpretF32:
Movd(dst.gp(), src.fp());
return true;
@@ -2017,8 +2140,164 @@ void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
(assm->*sse_op)(dst.fp(), shift);
}
}
+
+enum class ShiftSignedness { kSigned, kUnsigned };
+
+template <bool is_signed>
+void EmitI8x16Shr(LiftoffAssembler* assm, LiftoffRegister dst,
+ LiftoffRegister lhs, LiftoffRegister rhs) {
+ // Same algorithm is used for both signed and unsigned shifts, the only
+ // difference is the actual shift and pack in the end. This is the same
+ // algorithm as used in code-generator-ia32.cc
+ Register tmp =
+ assm->GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(rhs)).gp();
+ XMMRegister tmp_simd =
+ assm->GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dst, lhs)).fp();
+
+ // Unpack the bytes into words, do logical shifts, and repack.
+ assm->Punpckhbw(liftoff::kScratchDoubleReg, lhs.fp());
+ assm->Punpcklbw(dst.fp(), lhs.fp());
+ assm->mov(tmp, rhs.gp());
+ // Take shift value modulo 8.
+ assm->and_(tmp, 7);
+ assm->add(tmp, Immediate(8));
+ assm->Movd(tmp_simd, tmp);
+ if (is_signed) {
+ assm->Psraw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg,
+ tmp_simd);
+ assm->Psraw(dst.fp(), dst.fp(), tmp_simd);
+ assm->Packsswb(dst.fp(), liftoff::kScratchDoubleReg);
+ } else {
+ assm->Psrlw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg,
+ tmp_simd);
+ assm->Psrlw(dst.fp(), dst.fp(), tmp_simd);
+ assm->Packuswb(dst.fp(), liftoff::kScratchDoubleReg);
+ }
+}
+
+inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
+ LiftoffRegister src) {
+ Register tmp =
+ assm->GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(dst)).gp();
+ assm->xor_(tmp, tmp);
+ assm->mov(dst.gp(), Immediate(1));
+ assm->Ptest(src.fp(), src.fp());
+ assm->cmov(zero, dst.gp(), tmp);
+}
+
+template <void (TurboAssembler::*pcmp)(XMMRegister, XMMRegister)>
+inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst,
+ LiftoffRegister src) {
+ Register tmp =
+ assm->GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(dst)).gp();
+ XMMRegister tmp_simd = liftoff::kScratchDoubleReg;
+ assm->mov(tmp, Immediate(1));
+ assm->xor_(dst.gp(), dst.gp());
+ assm->Pxor(tmp_simd, tmp_simd);
+ (assm->*pcmp)(tmp_simd, src.fp());
+ assm->Ptest(tmp_simd, tmp_simd);
+ assm->cmov(zero, dst.gp(), tmp);
+}
+
} // namespace liftoff
+void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
+ Register offset_reg, uint32_t offset_imm,
+ LoadType type,
+ LoadTransformationKind transform,
+ uint32_t* protected_load_pc) {
+ DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
+ Operand src_op{src_addr, offset_reg, times_1,
+ static_cast<int32_t>(offset_imm)};
+ *protected_load_pc = pc_offset();
+
+ MachineType memtype = type.mem_type();
+ if (transform == LoadTransformationKind::kExtend) {
+ if (memtype == MachineType::Int8()) {
+ Pmovsxbw(dst.fp(), src_op);
+ } else if (memtype == MachineType::Uint8()) {
+ Pmovzxbw(dst.fp(), src_op);
+ } else if (memtype == MachineType::Int16()) {
+ Pmovsxwd(dst.fp(), src_op);
+ } else if (memtype == MachineType::Uint16()) {
+ Pmovzxwd(dst.fp(), src_op);
+ } else if (memtype == MachineType::Int32()) {
+ Pmovsxdq(dst.fp(), src_op);
+ } else if (memtype == MachineType::Uint32()) {
+ Pmovzxdq(dst.fp(), src_op);
+ }
+ } else {
+ DCHECK_EQ(LoadTransformationKind::kSplat, transform);
+ if (memtype == MachineType::Int8()) {
+ Pinsrb(dst.fp(), src_op, 0);
+ Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
+ Pshufb(dst.fp(), liftoff::kScratchDoubleReg);
+ } else if (memtype == MachineType::Int16()) {
+ Pinsrw(dst.fp(), src_op, 0);
+ Pshuflw(dst.fp(), dst.fp(), uint8_t{0});
+ Punpcklqdq(dst.fp(), dst.fp());
+ } else if (memtype == MachineType::Int32()) {
+ Vbroadcastss(dst.fp(), src_op);
+ } else if (memtype == MachineType::Int64()) {
+ Movddup(dst.fp(), src_op);
+ }
+ }
+}
+
+void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs,
+ const uint8_t shuffle[16]) {
+ LiftoffRegister tmp = GetUnusedRegister(kGpReg, {});
+ // Prepare 16 byte aligned buffer for shuffle control mask.
+ mov(tmp.gp(), esp);
+ and_(esp, -16);
+ movups(liftoff::kScratchDoubleReg, lhs.fp());
+
+ for (int i = 3; i >= 0; i--) {
+ uint32_t mask = 0;
+ for (int j = 3; j >= 0; j--) {
+ uint8_t lane = shuffle[i * 4 + j];
+ mask <<= 8;
+ mask |= lane < kSimd128Size ? lane : 0x80;
+ }
+ push(Immediate(mask));
+ }
+ Pshufb(liftoff::kScratchDoubleReg, Operand(esp, 0));
+
+ for (int i = 3; i >= 0; i--) {
+ uint32_t mask = 0;
+ for (int j = 3; j >= 0; j--) {
+ uint8_t lane = shuffle[i * 4 + j];
+ mask <<= 8;
+ mask |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80;
+ }
+ push(Immediate(mask));
+ }
+ if (dst.fp() != rhs.fp()) {
+ movups(dst.fp(), rhs.fp());
+ }
+ Pshufb(dst.fp(), Operand(esp, 0));
+ Por(dst.fp(), liftoff::kScratchDoubleReg);
+ mov(esp, tmp.gp());
+}
+
+void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ XMMRegister mask =
+ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(lhs, rhs)).fp();
+ // Out-of-range indices should return 0, add 112 (0x70) so that any value > 15
+ // saturates to 128 (top bit set), so pshufb will zero that lane.
+ TurboAssembler::Move(mask, uint32_t{0x70707070});
+ Pshufd(mask, mask, uint8_t{0x0});
+ Paddusb(mask, rhs.fp());
+ if (lhs != dst) {
+ Movaps(dst.fp(), lhs.fp());
+ }
+ Pshufb(dst.fp(), mask);
+}
+
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
@@ -2350,6 +2629,21 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
}
}
+void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst,
+ LiftoffRegister src) {
+ liftoff::EmitAnyTrue(this, dst, src);
+}
+
+void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst,
+ LiftoffRegister src) {
+ liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqb>(this, dst, src);
+}
+
+void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Pmovmskb(dst.gp(), src.fp());
+}
+
void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
static constexpr RegClass tmp_rc = reg_class_for(ValueType::kI32);
@@ -2381,7 +2675,7 @@ void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
static constexpr RegClass tmp_rc = reg_class_for(ValueType::kI32);
- LiftoffRegister tmp = GetUnusedRegister(tmp_rc);
+ LiftoffRegister tmp = GetUnusedRegister(tmp_rc, {});
byte shift = static_cast<byte>(rhs & 0x7);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
@@ -2399,6 +2693,43 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
Pand(dst.fp(), liftoff::kScratchDoubleReg);
}
+void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitI8x16Shr</*is_signed=*/true>(this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ Punpckhbw(liftoff::kScratchDoubleReg, lhs.fp());
+ Punpcklbw(dst.fp(), lhs.fp());
+ uint8_t shift = (rhs & 7) + 8;
+ Psraw(liftoff::kScratchDoubleReg, shift);
+ Psraw(dst.fp(), shift);
+ Packsswb(dst.fp(), liftoff::kScratchDoubleReg);
+}
+
+void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitI8x16Shr</*is_signed=*/false>(this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ Register tmp = GetUnusedRegister(kGpReg, {}).gp();
+ // Perform 16-bit shift, then mask away high bits.
+ uint8_t shift = rhs & 7;
+ Psrlw(dst.fp(), lhs.fp(), byte{shift});
+
+ uint8_t bmask = 0xff >> shift;
+ uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
+ mov(tmp, mask);
+ Movd(liftoff::kScratchDoubleReg, tmp);
+ Pshufd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, 0);
+ Pand(dst.fp(), liftoff::kScratchDoubleReg);
+}
+
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
@@ -2541,6 +2872,24 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
}
}
+void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst,
+ LiftoffRegister src) {
+ liftoff::EmitAnyTrue(this, dst, src);
+}
+
+void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst,
+ LiftoffRegister src) {
+ liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqw>(this, dst, src);
+}
+
+void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
+ LiftoffRegister src) {
+ XMMRegister tmp = liftoff::kScratchDoubleReg;
+ Packsswb(tmp, src.fp());
+ Pmovmskb(dst.gp(), tmp);
+ shr(dst.gp(), 8);
+}
+
void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(this, dst,
@@ -2553,6 +2902,32 @@ void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
+void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShiftOp<&Assembler::vpsraw, &Assembler::psraw, 4>(this, dst,
+ lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ liftoff::EmitSimdShiftOpImm<&Assembler::vpsraw, &Assembler::psraw, 4>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShiftOp<&Assembler::vpsrlw, &Assembler::psrlw, 4>(this, dst,
+ lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlw, &Assembler::psrlw, 4>(
+ this, dst, lhs, rhs);
+}
+
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
@@ -2639,6 +3014,21 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
}
}
+void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst,
+ LiftoffRegister src) {
+ liftoff::EmitAnyTrue(this, dst, src);
+}
+
+void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst,
+ LiftoffRegister src) {
+ liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqd>(this, dst, src);
+}
+
+void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Movmskps(dst.gp(), src.fp());
+}
+
void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(this, dst,
@@ -2651,6 +3041,32 @@ void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
+void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShiftOp<&Assembler::vpsrad, &Assembler::psrad, 5>(this, dst,
+ lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ liftoff::EmitSimdShiftOpImm<&Assembler::vpsrad, &Assembler::psrad, 5>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShiftOp<&Assembler::vpsrld, &Assembler::psrld, 5>(this, dst,
+ lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ liftoff::EmitSimdShiftOpImm<&Assembler::vpsrld, &Assembler::psrld, 5>(
+ this, dst, lhs, rhs);
+}
+
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
@@ -2723,6 +3139,56 @@ void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
+void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ XMMRegister shift = liftoff::kScratchDoubleReg;
+ XMMRegister tmp =
+ GetUnusedRegister(RegClass::kFpReg, LiftoffRegList::ForRegs(dst, lhs))
+ .fp();
+
+ // Take shift value modulo 64.
+ and_(rhs.gp(), Immediate(63));
+ Movd(shift, rhs.gp());
+
+ // Set up a mask [0x80000000,0,0x80000000,0].
+ Pcmpeqb(tmp, tmp);
+ Psllq(tmp, tmp, 63);
+
+ Psrlq(tmp, tmp, shift);
+ Psrlq(dst.fp(), lhs.fp(), shift);
+ Pxor(dst.fp(), tmp);
+ Psubq(dst.fp(), tmp);
+}
+
+void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ XMMRegister tmp = liftoff::kScratchDoubleReg;
+ int32_t shift = rhs & 63;
+
+ // Set up a mask [0x80000000,0,0x80000000,0].
+ Pcmpeqb(tmp, tmp);
+ Psllq(tmp, tmp, 63);
+
+ Psrlq(tmp, tmp, shift);
+ Psrlq(dst.fp(), lhs.fp(), shift);
+ Pxor(dst.fp(), tmp);
+ Psubq(dst.fp(), tmp);
+}
+
+void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShiftOp<&Assembler::vpsrlq, &Assembler::psrlq, 6>(this, dst,
+ lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
+ this, dst, lhs, rhs);
+}
+
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
@@ -2990,6 +3456,97 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
Andnpd(dst.fp(), liftoff::kScratchDoubleReg);
}
+void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
+ LiftoffRegister src) {
+ // NAN->0
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(this, AVX);
+ vcmpeqps(liftoff::kScratchDoubleReg, src.fp(), src.fp());
+ vpand(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
+ } else {
+ movaps(liftoff::kScratchDoubleReg, src.fp());
+ cmpeqps(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
+ if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
+ pand(dst.fp(), liftoff::kScratchDoubleReg);
+ }
+ // Set top bit if >= 0 (but not -0.0!).
+ Pxor(liftoff::kScratchDoubleReg, dst.fp());
+ // Convert to int.
+ Cvttps2dq(dst.fp(), dst.fp());
+ // Set top bit if >=0 is now < 0.
+ Pand(liftoff::kScratchDoubleReg, dst.fp());
+ Psrad(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{31});
+ // Set positive overflow lanes to 0x7FFFFFFF.
+ Pxor(dst.fp(), liftoff::kScratchDoubleReg);
+}
+
+void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
+ LiftoffRegister src) {
+ static constexpr RegClass tmp_rc = reg_class_for(ValueType::kS128);
+ DoubleRegister tmp =
+ GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, src)).fp();
+ // NAN->0, negative->0.
+ Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(this, AVX);
+ vmaxps(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
+ } else {
+ if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
+ maxps(dst.fp(), liftoff::kScratchDoubleReg);
+ }
+ // scratch: float representation of max_signed.
+ Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
+ Psrld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg,
+ uint8_t{1}); // 0x7fffffff
+ Cvtdq2ps(liftoff::kScratchDoubleReg,
+ liftoff::kScratchDoubleReg); // 0x4f000000
+ // tmp: convert (src-max_signed).
+ // Set positive overflow lanes to 0x7FFFFFFF.
+ // Set negative lanes to 0.
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(this, AVX);
+ vsubps(tmp, dst.fp(), liftoff::kScratchDoubleReg);
+ } else {
+ movaps(tmp, dst.fp());
+ subps(tmp, liftoff::kScratchDoubleReg);
+ }
+ Cmpleps(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, tmp);
+ Cvttps2dq(tmp, tmp);
+ Pxor(tmp, liftoff::kScratchDoubleReg);
+ Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
+ Pmaxsd(tmp, liftoff::kScratchDoubleReg);
+ // Convert to int. Overflow lanes above max_signed will be 0x80000000.
+ Cvttps2dq(dst.fp(), dst.fp());
+ // Add (src-max_signed) for overflow lanes.
+ Paddd(dst.fp(), dst.fp(), tmp);
+}
+
+void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Cvtdq2ps(dst.fp(), src.fp());
+}
+
+void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); // Zeros.
+ Pblendw(liftoff::kScratchDoubleReg, src.fp(),
+ uint8_t{0x55}); // Get lo 16 bits.
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(this, AVX);
+ vpsubd(dst.fp(), src.fp(), liftoff::kScratchDoubleReg); // Get hi 16 bits.
+ } else {
+ if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
+ psubd(dst.fp(), liftoff::kScratchDoubleReg);
+ }
+ Cvtdq2ps(liftoff::kScratchDoubleReg,
+ liftoff::kScratchDoubleReg); // Convert lo exactly.
+ Psrld(dst.fp(), dst.fp(), byte{1}); // Divide by 2 to get in unsigned range.
+ Cvtdq2ps(dst.fp(), dst.fp()); // Convert hi, exactly.
+ Addps(dst.fp(), dst.fp(), dst.fp()); // Double hi, exactly.
+ Addps(dst.fp(), dst.fp(),
+ liftoff::kScratchDoubleReg); // Add hi and lo, may round.
+}
+
void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
@@ -3270,7 +3827,7 @@ void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
}
void LiftoffAssembler::CallTrapCallbackForTesting() {
- PrepareCallCFunction(0, GetUnusedRegister(kGpReg).gp());
+ PrepareCallCFunction(0, GetUnusedRegister(kGpReg, {}).gp());
CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
}