summaryrefslogtreecommitdiff
path: root/chromium/v8/src/wasm/baseline/arm
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/v8/src/wasm/baseline/arm')
-rw-r--r--chromium/v8/src/wasm/baseline/arm/liftoff-assembler-arm.h512
1 files changed, 496 insertions, 16 deletions
diff --git a/chromium/v8/src/wasm/baseline/arm/liftoff-assembler-arm.h b/chromium/v8/src/wasm/baseline/arm/liftoff-assembler-arm.h
index eb91b79ea55..4a9cffb9728 100644
--- a/chromium/v8/src/wasm/baseline/arm/liftoff-assembler-arm.h
+++ b/chromium/v8/src/wasm/baseline/arm/liftoff-assembler-arm.h
@@ -332,6 +332,71 @@ inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, MemOperand src,
}
}
+constexpr int MaskFromNeonDataType(NeonDataType dt) {
+ switch (dt) {
+ case NeonS8:
+ case NeonU8:
+ return 7;
+ case NeonS16:
+ case NeonU16:
+ return 15;
+ case NeonS32:
+ case NeonU32:
+ return 31;
+ case NeonS64:
+ case NeonU64:
+ return 63;
+ }
+}
+
+enum ShiftDirection { kLeft, kRight };
+
+template <ShiftDirection dir = kLeft, NeonDataType dt, NeonSize sz>
+inline void EmitSimdShift(LiftoffAssembler* assm, LiftoffRegister dst,
+ LiftoffRegister lhs, LiftoffRegister rhs) {
+ constexpr int mask = MaskFromNeonDataType(dt);
+ UseScratchRegisterScope temps(assm);
+ QwNeonRegister tmp = temps.AcquireQ();
+ Register shift = temps.Acquire();
+ assm->and_(shift, rhs.gp(), Operand(mask));
+ assm->vdup(sz, tmp, shift);
+ if (dir == kRight) {
+ assm->vneg(sz, tmp, tmp);
+ }
+ assm->vshl(dt, liftoff::GetSimd128Register(dst),
+ liftoff::GetSimd128Register(lhs), tmp);
+}
+
+template <ShiftDirection dir, NeonDataType dt>
+inline void EmitSimdShiftImmediate(LiftoffAssembler* assm, LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ // vshr by 0 is not allowed, so check for it, and only move if dst != lhs.
+ int32_t shift = rhs & MaskFromNeonDataType(dt);
+ if (shift) {
+ if (dir == kLeft) {
+ assm->vshl(dt, liftoff::GetSimd128Register(dst),
+ liftoff::GetSimd128Register(lhs), shift);
+ } else {
+ assm->vshr(dt, liftoff::GetSimd128Register(dst),
+ liftoff::GetSimd128Register(lhs), shift);
+ }
+ } else if (dst != lhs) {
+ assm->vmov(liftoff::GetSimd128Register(dst),
+ liftoff::GetSimd128Register(lhs));
+ }
+}
+
+inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
+ LiftoffRegister src) {
+ UseScratchRegisterScope temps(assm);
+ DwVfpRegister scratch = temps.AcquireD();
+ assm->vpmax(NeonU32, scratch, src.low_fp(), src.high_fp());
+ assm->vpmax(NeonU32, scratch, scratch, scratch);
+ assm->ExtractLane(dst.gp(), scratch, NeonS32, 0);
+ assm->cmp(dst.gp(), Operand(0));
+ assm->mov(dst.gp(), Operand(1), LeaveCC, ne);
+}
+
} // namespace liftoff
int LiftoffAssembler::PrepareStackFrame() {
@@ -437,7 +502,7 @@ void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
vmov(liftoff::GetFloatRegister(reg.fp()), value.to_f32_boxed());
break;
case ValueType::kF64: {
- Register extra_scratch = GetUnusedRegister(kGpReg).gp();
+ Register extra_scratch = GetUnusedRegister(kGpReg, {}).gp();
vmov(reg.fp(), Double(value.to_f64_boxed().get_bits()), extra_scratch);
break;
}
@@ -1171,7 +1236,7 @@ void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
ValueType type) {
DCHECK_NE(dst_offset, src_offset);
- LiftoffRegister reg = GetUnusedRegister(reg_class_for(type));
+ LiftoffRegister reg = GetUnusedRegister(reg_class_for(type), {});
Fill(reg, src_offset, type);
Spill(dst_offset, reg, type);
}
@@ -1216,7 +1281,7 @@ void LiftoffAssembler::Spill(int offset, WasmValue value) {
// The scratch register will be required by str if multiple instructions
// are required to encode the offset, and so we cannot use it in that case.
if (!ImmediateFitsAddrMode2Instruction(dst.offset())) {
- src = GetUnusedRegister(kGpReg).gp();
+ src = GetUnusedRegister(kGpReg, {}).gp();
} else {
src = temps.Acquire();
}
@@ -1758,7 +1823,7 @@ void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
DoubleRegister rhs) {
constexpr uint32_t kF32SignBit = uint32_t{1} << 31;
UseScratchRegisterScope temps(this);
- Register scratch = GetUnusedRegister(kGpReg).gp();
+ Register scratch = GetUnusedRegister(kGpReg, {}).gp();
Register scratch2 = temps.Acquire();
VmovLow(scratch, lhs);
// Clear sign bit in {scratch}.
@@ -1777,7 +1842,7 @@ void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
// On arm, we cannot hold the whole f64 value in a gp register, so we just
// operate on the upper half (UH).
UseScratchRegisterScope temps(this);
- Register scratch = GetUnusedRegister(kGpReg).gp();
+ Register scratch = GetUnusedRegister(kGpReg, {}).gp();
Register scratch2 = temps.Acquire();
VmovHigh(scratch, lhs);
// Clear sign bit in {scratch}.
@@ -1862,6 +1927,38 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
b(trap, ge);
return true;
}
+ case kExprI32SConvertSatF32: {
+ UseScratchRegisterScope temps(this);
+ SwVfpRegister scratch_f = temps.AcquireS();
+ vcvt_s32_f32(
+ scratch_f,
+ liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero.
+ vmov(dst.gp(), scratch_f);
+ return true;
+ }
+ case kExprI32UConvertSatF32: {
+ UseScratchRegisterScope temps(this);
+ SwVfpRegister scratch_f = temps.AcquireS();
+ vcvt_u32_f32(
+ scratch_f,
+ liftoff::GetFloatRegister(src.fp())); // f32 -> u32 round to zero.
+ vmov(dst.gp(), scratch_f);
+ return true;
+ }
+ case kExprI32SConvertSatF64: {
+ UseScratchRegisterScope temps(this);
+ SwVfpRegister scratch_f = temps.AcquireS();
+ vcvt_s32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero.
+ vmov(dst.gp(), scratch_f);
+ return true;
+ }
+ case kExprI32UConvertSatF64: {
+ UseScratchRegisterScope temps(this);
+ SwVfpRegister scratch_f = temps.AcquireS();
+ vcvt_u32_f64(scratch_f, src.fp()); // f64 -> u32 round to zero.
+ vmov(dst.gp(), scratch_f);
+ return true;
+ }
case kExprI32ReinterpretF32:
vmov(dst.gp(), liftoff::GetFloatRegister(src.fp()));
return true;
@@ -1914,10 +2011,14 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
case kExprF64UConvertI64:
case kExprI64SConvertF32:
case kExprI64UConvertF32:
+ case kExprI64SConvertSatF32:
+ case kExprI64UConvertSatF32:
case kExprF32SConvertI64:
case kExprF32UConvertI64:
case kExprI64SConvertF64:
case kExprI64UConvertF64:
+ case kExprI64SConvertSatF64:
+ case kExprI64UConvertSatF64:
// These cases can be handled by the C fallback function.
return false;
default:
@@ -2052,6 +2153,79 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
}
}
+void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
+ Register offset_reg, uint32_t offset_imm,
+ LoadType type,
+ LoadTransformationKind transform,
+ uint32_t* protected_load_pc) {
+ UseScratchRegisterScope temps(this);
+ Register actual_src_addr = liftoff::CalculateActualAddress(
+ this, &temps, src_addr, offset_reg, offset_imm);
+ *protected_load_pc = pc_offset();
+ MachineType memtype = type.mem_type();
+
+ if (transform == LoadTransformationKind::kExtend) {
+ if (memtype == MachineType::Int8()) {
+ vld1(Neon8, NeonListOperand(dst.low_fp()),
+ NeonMemOperand(actual_src_addr));
+ vmovl(NeonS8, liftoff::GetSimd128Register(dst), dst.low_fp());
+ } else if (memtype == MachineType::Uint8()) {
+ vld1(Neon8, NeonListOperand(dst.low_fp()),
+ NeonMemOperand(actual_src_addr));
+ vmovl(NeonU8, liftoff::GetSimd128Register(dst), dst.low_fp());
+ } else if (memtype == MachineType::Int16()) {
+ vld1(Neon16, NeonListOperand(dst.low_fp()),
+ NeonMemOperand(actual_src_addr));
+ vmovl(NeonS16, liftoff::GetSimd128Register(dst), dst.low_fp());
+ } else if (memtype == MachineType::Uint16()) {
+ vld1(Neon16, NeonListOperand(dst.low_fp()),
+ NeonMemOperand(actual_src_addr));
+ vmovl(NeonU16, liftoff::GetSimd128Register(dst), dst.low_fp());
+ } else if (memtype == MachineType::Int32()) {
+ vld1(Neon32, NeonListOperand(dst.low_fp()),
+ NeonMemOperand(actual_src_addr));
+ vmovl(NeonS32, liftoff::GetSimd128Register(dst), dst.low_fp());
+ } else if (memtype == MachineType::Uint32()) {
+ vld1(Neon32, NeonListOperand(dst.low_fp()),
+ NeonMemOperand(actual_src_addr));
+ vmovl(NeonU32, liftoff::GetSimd128Register(dst), dst.low_fp());
+ }
+ } else {
+ DCHECK_EQ(LoadTransformationKind::kSplat, transform);
+ if (memtype == MachineType::Int8()) {
+ vld1r(Neon8, NeonListOperand(liftoff::GetSimd128Register(dst)),
+ NeonMemOperand(actual_src_addr));
+ } else if (memtype == MachineType::Int16()) {
+ vld1r(Neon16, NeonListOperand(liftoff::GetSimd128Register(dst)),
+ NeonMemOperand(actual_src_addr));
+ } else if (memtype == MachineType::Int32()) {
+ vld1r(Neon32, NeonListOperand(liftoff::GetSimd128Register(dst)),
+ NeonMemOperand(actual_src_addr));
+ } else if (memtype == MachineType::Int64()) {
+ vld1(Neon32, NeonListOperand(dst.low_fp()),
+ NeonMemOperand(actual_src_addr));
+ TurboAssembler::Move(dst.high_fp(), dst.low_fp());
+ }
+ }
+}
+
+void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ UseScratchRegisterScope temps(this);
+
+ NeonListOperand table(liftoff::GetSimd128Register(lhs));
+ if (dst == lhs) {
+ // dst will be overwritten, so keep the table somewhere else.
+ QwNeonRegister tbl = temps.AcquireQ();
+ TurboAssembler::Move(tbl, liftoff::GetSimd128Register(lhs));
+ table = NeonListOperand(tbl);
+ }
+
+ vtbl(dst.low_fp(), table, rhs.low_fp());
+ vtbl(dst.high_fp(), table, rhs.high_fp());
+}
+
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
TurboAssembler::Move(dst.low_fp(), src.fp());
@@ -2273,12 +2447,37 @@ void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
- bailout(kSimd, "i64x2_shl");
+ liftoff::EmitSimdShift<liftoff::kLeft, NeonS64, Neon32>(this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
- bailout(kSimd, "i64x2_shli");
+ vshl(NeonS64, liftoff::GetSimd128Register(dst),
+ liftoff::GetSimd128Register(lhs), rhs & 63);
+}
+
+void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShift<liftoff::kRight, NeonS64, Neon32>(this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS64>(this, dst, lhs,
+ rhs);
+}
+
+void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShift<liftoff::kRight, NeonU64, Neon32>(this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU64>(this, dst, lhs,
+ rhs);
}
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
@@ -2306,15 +2505,18 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
QwNeonRegister tmp1 = left;
QwNeonRegister tmp2 = right;
- if (cache_state()->is_used(lhs) && cache_state()->is_used(rhs)) {
+ LiftoffRegList used_plus_dst =
+ cache_state()->used_registers | LiftoffRegList::ForRegs(dst);
+
+ if (used_plus_dst.has(lhs) && used_plus_dst.has(rhs)) {
tmp1 = temps.AcquireQ();
// We only have 1 scratch Q register, so acquire another ourselves.
LiftoffRegList pinned = LiftoffRegList::ForRegs(dst);
LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
tmp2 = liftoff::GetSimd128Register(unused_pair);
- } else if (cache_state()->is_used(lhs)) {
+ } else if (used_plus_dst.has(lhs)) {
tmp1 = temps.AcquireQ();
- } else if (cache_state()->is_used(rhs)) {
+ } else if (used_plus_dst.has(rhs)) {
tmp2 = temps.AcquireQ();
}
@@ -2363,14 +2565,79 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
liftoff::GetSimd128Register(src));
}
+void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst,
+ LiftoffRegister src) {
+ liftoff::EmitAnyTrue(this, dst, src);
+}
+
+void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst,
+ LiftoffRegister src) {
+ UseScratchRegisterScope temps(this);
+ DwVfpRegister scratch = temps.AcquireD();
+ vpmin(NeonU32, scratch, src.low_fp(), src.high_fp());
+ vpmin(NeonU32, scratch, scratch, scratch);
+ ExtractLane(dst.gp(), scratch, NeonS32, 0);
+ cmp(dst.gp(), Operand(0));
+ mov(dst.gp(), Operand(1), LeaveCC, ne);
+}
+
+void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
+ LiftoffRegister src) {
+ UseScratchRegisterScope temps(this);
+ Simd128Register tmp = liftoff::GetSimd128Register(src);
+ Simd128Register mask = temps.AcquireQ();
+
+ if (cache_state()->is_used(src)) {
+ // We only have 1 scratch Q register, so try and reuse src.
+ LiftoffRegList pinned = LiftoffRegList::ForRegs(src);
+ LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
+ mask = liftoff::GetSimd128Register(unused_pair);
+ }
+
+ vshr(NeonS32, tmp, liftoff::GetSimd128Register(src), 31);
+ // Set i-th bit of each lane i. When AND with tmp, the lanes that
+ // are signed will have i-th bit set, unsigned will be 0.
+ vmov(mask.low(), Double((uint64_t)0x0000'0002'0000'0001));
+ vmov(mask.high(), Double((uint64_t)0x0000'0008'0000'0004));
+ vand(tmp, mask, tmp);
+ vpadd(Neon32, tmp.low(), tmp.low(), tmp.high());
+ vpadd(Neon32, tmp.low(), tmp.low(), kDoubleRegZero);
+ VmovLow(dst.gp(), tmp.low());
+}
+
void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
- bailout(kSimd, "i32x4_shl");
+ liftoff::EmitSimdShift<liftoff::kLeft, NeonS32, Neon32>(this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
- bailout(kSimd, "i32x4_shli");
+ vshl(NeonS32, liftoff::GetSimd128Register(dst),
+ liftoff::GetSimd128Register(lhs), rhs & 31);
+}
+
+void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShift<liftoff::kRight, NeonS32, Neon32>(this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS32>(this, dst, lhs,
+ rhs);
+}
+
+void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShift<liftoff::kRight, NeonU32, Neon32>(this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU32>(this, dst, lhs,
+ rhs);
}
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
@@ -2430,14 +2697,81 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
liftoff::GetSimd128Register(src));
}
+void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst,
+ LiftoffRegister src) {
+ liftoff::EmitAnyTrue(this, dst, src);
+}
+
+void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst,
+ LiftoffRegister src) {
+ UseScratchRegisterScope temps(this);
+ DwVfpRegister scratch = temps.AcquireD();
+ vpmin(NeonU16, scratch, src.low_fp(), src.high_fp());
+ vpmin(NeonU16, scratch, scratch, scratch);
+ vpmin(NeonU16, scratch, scratch, scratch);
+ ExtractLane(dst.gp(), scratch, NeonS16, 0);
+ cmp(dst.gp(), Operand(0));
+ mov(dst.gp(), Operand(1), LeaveCC, ne);
+}
+
+void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
+ LiftoffRegister src) {
+ UseScratchRegisterScope temps(this);
+ Simd128Register tmp = liftoff::GetSimd128Register(src);
+ Simd128Register mask = temps.AcquireQ();
+
+ if (cache_state()->is_used(src)) {
+ // We only have 1 scratch Q register, so try and reuse src.
+ LiftoffRegList pinned = LiftoffRegList::ForRegs(src);
+ LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
+ mask = liftoff::GetSimd128Register(unused_pair);
+ }
+
+ vshr(NeonS16, tmp, liftoff::GetSimd128Register(src), 15);
+ // Set i-th bit of each lane i. When AND with tmp, the lanes that
+ // are signed will have i-th bit set, unsigned will be 0.
+ vmov(mask.low(), Double((uint64_t)0x0008'0004'0002'0001));
+ vmov(mask.high(), Double((uint64_t)0x0080'0040'0020'0010));
+ vand(tmp, mask, tmp);
+ vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
+ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
+ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
+ vmov(NeonU16, dst.gp(), tmp.low(), 0);
+}
+
void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
- bailout(kSimd, "i16x8_shl");
+ liftoff::EmitSimdShift<liftoff::kLeft, NeonS16, Neon16>(this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
- bailout(kSimd, "i16x8_shli");
+ vshl(NeonS16, liftoff::GetSimd128Register(dst),
+ liftoff::GetSimd128Register(lhs), rhs & 15);
+}
+
+void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShift<liftoff::kRight, NeonS16, Neon16>(this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS16>(this, dst, lhs,
+ rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShift<liftoff::kRight, NeonU16, Neon16>(this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU16>(this, dst, lhs,
+ rhs);
}
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
@@ -2537,6 +2871,60 @@ void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
imm_lane_idx);
}
+void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs,
+ const uint8_t shuffle[16]) {
+ Simd128Register dest = liftoff::GetSimd128Register(dst);
+ Simd128Register src1 = liftoff::GetSimd128Register(lhs);
+ Simd128Register src2 = liftoff::GetSimd128Register(rhs);
+ UseScratchRegisterScope temps(this);
+ Simd128Register scratch = temps.AcquireQ();
+ if ((src1 != src2) && src1.code() + 1 != src2.code()) {
+ // vtbl requires the operands to be consecutive or the same.
+ // If they are the same, we build a smaller list operand (table_size = 2).
+ // If they are not the same, and not consecutive, we move the src1 and src2
+ // to q14 and q15, which will be unused since they are not allocatable in
+ // Liftoff. If the operands are the same, then we build a smaller list
+ // operand below.
+ static_assert(!(kLiftoffAssemblerFpCacheRegs &
+ (d28.bit() | d29.bit() | d30.bit() | d31.bit())),
+ "This only works if q14-q15 (d28-d31) are not used.");
+ vmov(q14, src1);
+ src1 = q14;
+ vmov(q15, src2);
+ src2 = q15;
+ }
+
+ int table_size = src1 == src2 ? 2 : 4;
+ uint32_t mask = table_size == 2 ? 0x0F0F0F0F : 0x1F1F1F1F;
+
+ int scratch_s_base = scratch.code() * 4;
+ for (int j = 0; j < 4; j++) {
+ uint32_t imm = 0;
+ for (int i = 3; i >= 0; i--) {
+ imm = (imm << 8) | shuffle[j * 4 + i];
+ }
+ uint32_t four_lanes = imm;
+ // Ensure indices are in [0,15] if table_size is 2, or [0,31] if 4.
+ four_lanes &= mask;
+ vmov(SwVfpRegister::from_code(scratch_s_base + j),
+ Float32::FromBits(four_lanes));
+ }
+
+ DwVfpRegister table_base = src1.low();
+ NeonListOperand table(table_base, table_size);
+
+ if (dest != src1 && dest != src2) {
+ vtbl(dest.low(), table, scratch.low());
+ vtbl(dest.high(), table, scratch.high());
+ } else {
+ vtbl(scratch.low(), table, scratch.low());
+ vtbl(scratch.high(), table, scratch.high());
+ vmov(dest, scratch);
+ }
+}
+
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon8, liftoff::GetSimd128Register(dst), src.gp());
@@ -2569,14 +2957,82 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
liftoff::GetSimd128Register(src));
}
+void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst,
+ LiftoffRegister src) {
+ liftoff::EmitAnyTrue(this, dst, src);
+}
+
+void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst,
+ LiftoffRegister src) {
+ UseScratchRegisterScope temps(this);
+ DwVfpRegister scratch = temps.AcquireD();
+ vpmin(NeonU8, scratch, src.low_fp(), src.high_fp());
+ vpmin(NeonU8, scratch, scratch, scratch);
+ vpmin(NeonU8, scratch, scratch, scratch);
+ vpmin(NeonU8, scratch, scratch, scratch);
+ ExtractLane(dst.gp(), scratch, NeonS8, 0);
+ cmp(dst.gp(), Operand(0));
+ mov(dst.gp(), Operand(1), LeaveCC, ne);
+}
+
+void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
+ LiftoffRegister src) {
+ UseScratchRegisterScope temps(this);
+ Simd128Register tmp = liftoff::GetSimd128Register(src);
+ Simd128Register mask = temps.AcquireQ();
+
+ if (cache_state()->is_used(src)) {
+ // We only have 1 scratch Q register, so try and reuse src.
+ LiftoffRegList pinned = LiftoffRegList::ForRegs(src);
+ LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
+ mask = liftoff::GetSimd128Register(unused_pair);
+ }
+
+ vshr(NeonS8, tmp, liftoff::GetSimd128Register(src), 7);
+ // Set i-th bit of each lane i. When AND with tmp, the lanes that
+ // are signed will have i-th bit set, unsigned will be 0.
+ vmov(mask.low(), Double((uint64_t)0x8040'2010'0804'0201));
+ vmov(mask.high(), Double((uint64_t)0x8040'2010'0804'0201));
+ vand(tmp, mask, tmp);
+ vext(mask, tmp, tmp, 8);
+ vzip(Neon8, mask, tmp);
+ vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
+ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
+ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
+ vmov(NeonU16, dst.gp(), tmp.low(), 0);
+}
+
void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
- bailout(kSimd, "i8x16_shl");
+ liftoff::EmitSimdShift<liftoff::kLeft, NeonS8, Neon8>(this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
- bailout(kSimd, "i8x16_shli");
+ vshl(NeonS8, liftoff::GetSimd128Register(dst),
+ liftoff::GetSimd128Register(lhs), rhs & 7);
+}
+
+void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShift<liftoff::kRight, NeonS8, Neon8>(this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS8>(this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShift<liftoff::kRight, NeonU8, Neon8>(this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
+ LiftoffRegister lhs, int32_t rhs) {
+ liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU8>(this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
@@ -2842,6 +3298,30 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
liftoff::GetSimd128Register(src2));
}
+void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
+ LiftoffRegister src) {
+ vcvt_s32_f32(liftoff::GetSimd128Register(dst),
+ liftoff::GetSimd128Register(src));
+}
+
+void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
+ LiftoffRegister src) {
+ vcvt_u32_f32(liftoff::GetSimd128Register(dst),
+ liftoff::GetSimd128Register(src));
+}
+
+void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
+ LiftoffRegister src) {
+ vcvt_f32_s32(liftoff::GetSimd128Register(dst),
+ liftoff::GetSimd128Register(src));
+}
+
+void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
+ LiftoffRegister src) {
+ vcvt_f32_u32(liftoff::GetSimd128Register(dst),
+ liftoff::GetSimd128Register(src));
+}
+
void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {