diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-12 14:27:29 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-13 09:35:20 +0000 |
commit | c30a6232df03e1efbd9f3b226777b07e087a1122 (patch) | |
tree | e992f45784689f373bcc38d1b79a239ebe17ee23 /chromium/v8/src/wasm/baseline/arm | |
parent | 7b5b123ac58f58ffde0f4f6e488bcd09aa4decd3 (diff) | |
download | qtwebengine-chromium-85-based.tar.gz |
BASELINE: Update Chromium to 85.0.4183.14085-based
Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/v8/src/wasm/baseline/arm')
-rw-r--r-- | chromium/v8/src/wasm/baseline/arm/liftoff-assembler-arm.h | 512 |
1 files changed, 496 insertions, 16 deletions
diff --git a/chromium/v8/src/wasm/baseline/arm/liftoff-assembler-arm.h b/chromium/v8/src/wasm/baseline/arm/liftoff-assembler-arm.h index eb91b79ea55..4a9cffb9728 100644 --- a/chromium/v8/src/wasm/baseline/arm/liftoff-assembler-arm.h +++ b/chromium/v8/src/wasm/baseline/arm/liftoff-assembler-arm.h @@ -332,6 +332,71 @@ inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, MemOperand src, } } +constexpr int MaskFromNeonDataType(NeonDataType dt) { + switch (dt) { + case NeonS8: + case NeonU8: + return 7; + case NeonS16: + case NeonU16: + return 15; + case NeonS32: + case NeonU32: + return 31; + case NeonS64: + case NeonU64: + return 63; + } +} + +enum ShiftDirection { kLeft, kRight }; + +template <ShiftDirection dir = kLeft, NeonDataType dt, NeonSize sz> +inline void EmitSimdShift(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister lhs, LiftoffRegister rhs) { + constexpr int mask = MaskFromNeonDataType(dt); + UseScratchRegisterScope temps(assm); + QwNeonRegister tmp = temps.AcquireQ(); + Register shift = temps.Acquire(); + assm->and_(shift, rhs.gp(), Operand(mask)); + assm->vdup(sz, tmp, shift); + if (dir == kRight) { + assm->vneg(sz, tmp, tmp); + } + assm->vshl(dt, liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs), tmp); +} + +template <ShiftDirection dir, NeonDataType dt> +inline void EmitSimdShiftImmediate(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + // vshr by 0 is not allowed, so check for it, and only move if dst != lhs. + int32_t shift = rhs & MaskFromNeonDataType(dt); + if (shift) { + if (dir == kLeft) { + assm->vshl(dt, liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs), shift); + } else { + assm->vshr(dt, liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs), shift); + } + } else if (dst != lhs) { + assm->vmov(liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs)); + } +} + +inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(assm); + DwVfpRegister scratch = temps.AcquireD(); + assm->vpmax(NeonU32, scratch, src.low_fp(), src.high_fp()); + assm->vpmax(NeonU32, scratch, scratch, scratch); + assm->ExtractLane(dst.gp(), scratch, NeonS32, 0); + assm->cmp(dst.gp(), Operand(0)); + assm->mov(dst.gp(), Operand(1), LeaveCC, ne); +} + } // namespace liftoff int LiftoffAssembler::PrepareStackFrame() { @@ -437,7 +502,7 @@ void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value, vmov(liftoff::GetFloatRegister(reg.fp()), value.to_f32_boxed()); break; case ValueType::kF64: { - Register extra_scratch = GetUnusedRegister(kGpReg).gp(); + Register extra_scratch = GetUnusedRegister(kGpReg, {}).gp(); vmov(reg.fp(), Double(value.to_f64_boxed().get_bits()), extra_scratch); break; } @@ -1171,7 +1236,7 @@ void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src, void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset, ValueType type) { DCHECK_NE(dst_offset, src_offset); - LiftoffRegister reg = GetUnusedRegister(reg_class_for(type)); + LiftoffRegister reg = GetUnusedRegister(reg_class_for(type), {}); Fill(reg, src_offset, type); Spill(dst_offset, reg, type); } @@ -1216,7 +1281,7 @@ void LiftoffAssembler::Spill(int offset, WasmValue value) { // The scratch register will be required by str if multiple instructions // are required to encode the offset, and so we cannot use it in that case. if (!ImmediateFitsAddrMode2Instruction(dst.offset())) { - src = GetUnusedRegister(kGpReg).gp(); + src = GetUnusedRegister(kGpReg, {}).gp(); } else { src = temps.Acquire(); } @@ -1758,7 +1823,7 @@ void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs, DoubleRegister rhs) { constexpr uint32_t kF32SignBit = uint32_t{1} << 31; UseScratchRegisterScope temps(this); - Register scratch = GetUnusedRegister(kGpReg).gp(); + Register scratch = GetUnusedRegister(kGpReg, {}).gp(); Register scratch2 = temps.Acquire(); VmovLow(scratch, lhs); // Clear sign bit in {scratch}. @@ -1777,7 +1842,7 @@ void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs, // On arm, we cannot hold the whole f64 value in a gp register, so we just // operate on the upper half (UH). UseScratchRegisterScope temps(this); - Register scratch = GetUnusedRegister(kGpReg).gp(); + Register scratch = GetUnusedRegister(kGpReg, {}).gp(); Register scratch2 = temps.Acquire(); VmovHigh(scratch, lhs); // Clear sign bit in {scratch}. @@ -1862,6 +1927,38 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, b(trap, ge); return true; } + case kExprI32SConvertSatF32: { + UseScratchRegisterScope temps(this); + SwVfpRegister scratch_f = temps.AcquireS(); + vcvt_s32_f32( + scratch_f, + liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero. + vmov(dst.gp(), scratch_f); + return true; + } + case kExprI32UConvertSatF32: { + UseScratchRegisterScope temps(this); + SwVfpRegister scratch_f = temps.AcquireS(); + vcvt_u32_f32( + scratch_f, + liftoff::GetFloatRegister(src.fp())); // f32 -> u32 round to zero. + vmov(dst.gp(), scratch_f); + return true; + } + case kExprI32SConvertSatF64: { + UseScratchRegisterScope temps(this); + SwVfpRegister scratch_f = temps.AcquireS(); + vcvt_s32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero. + vmov(dst.gp(), scratch_f); + return true; + } + case kExprI32UConvertSatF64: { + UseScratchRegisterScope temps(this); + SwVfpRegister scratch_f = temps.AcquireS(); + vcvt_u32_f64(scratch_f, src.fp()); // f64 -> u32 round to zero. + vmov(dst.gp(), scratch_f); + return true; + } case kExprI32ReinterpretF32: vmov(dst.gp(), liftoff::GetFloatRegister(src.fp())); return true; @@ -1914,10 +2011,14 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, case kExprF64UConvertI64: case kExprI64SConvertF32: case kExprI64UConvertF32: + case kExprI64SConvertSatF32: + case kExprI64UConvertSatF32: case kExprF32SConvertI64: case kExprF32UConvertI64: case kExprI64SConvertF64: case kExprI64UConvertF64: + case kExprI64SConvertSatF64: + case kExprI64UConvertSatF64: // These cases can be handled by the C fallback function. return false; default: @@ -2052,6 +2153,79 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, } } +void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, + Register offset_reg, uint32_t offset_imm, + LoadType type, + LoadTransformationKind transform, + uint32_t* protected_load_pc) { + UseScratchRegisterScope temps(this); + Register actual_src_addr = liftoff::CalculateActualAddress( + this, &temps, src_addr, offset_reg, offset_imm); + *protected_load_pc = pc_offset(); + MachineType memtype = type.mem_type(); + + if (transform == LoadTransformationKind::kExtend) { + if (memtype == MachineType::Int8()) { + vld1(Neon8, NeonListOperand(dst.low_fp()), + NeonMemOperand(actual_src_addr)); + vmovl(NeonS8, liftoff::GetSimd128Register(dst), dst.low_fp()); + } else if (memtype == MachineType::Uint8()) { + vld1(Neon8, NeonListOperand(dst.low_fp()), + NeonMemOperand(actual_src_addr)); + vmovl(NeonU8, liftoff::GetSimd128Register(dst), dst.low_fp()); + } else if (memtype == MachineType::Int16()) { + vld1(Neon16, NeonListOperand(dst.low_fp()), + NeonMemOperand(actual_src_addr)); + vmovl(NeonS16, liftoff::GetSimd128Register(dst), dst.low_fp()); + } else if (memtype == MachineType::Uint16()) { + vld1(Neon16, NeonListOperand(dst.low_fp()), + NeonMemOperand(actual_src_addr)); + vmovl(NeonU16, liftoff::GetSimd128Register(dst), dst.low_fp()); + } else if (memtype == MachineType::Int32()) { + vld1(Neon32, NeonListOperand(dst.low_fp()), + NeonMemOperand(actual_src_addr)); + vmovl(NeonS32, liftoff::GetSimd128Register(dst), dst.low_fp()); + } else if (memtype == MachineType::Uint32()) { + vld1(Neon32, NeonListOperand(dst.low_fp()), + NeonMemOperand(actual_src_addr)); + vmovl(NeonU32, liftoff::GetSimd128Register(dst), dst.low_fp()); + } + } else { + DCHECK_EQ(LoadTransformationKind::kSplat, transform); + if (memtype == MachineType::Int8()) { + vld1r(Neon8, NeonListOperand(liftoff::GetSimd128Register(dst)), + NeonMemOperand(actual_src_addr)); + } else if (memtype == MachineType::Int16()) { + vld1r(Neon16, NeonListOperand(liftoff::GetSimd128Register(dst)), + NeonMemOperand(actual_src_addr)); + } else if (memtype == MachineType::Int32()) { + vld1r(Neon32, NeonListOperand(liftoff::GetSimd128Register(dst)), + NeonMemOperand(actual_src_addr)); + } else if (memtype == MachineType::Int64()) { + vld1(Neon32, NeonListOperand(dst.low_fp()), + NeonMemOperand(actual_src_addr)); + TurboAssembler::Move(dst.high_fp(), dst.low_fp()); + } + } +} + +void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + UseScratchRegisterScope temps(this); + + NeonListOperand table(liftoff::GetSimd128Register(lhs)); + if (dst == lhs) { + // dst will be overwritten, so keep the table somewhere else. + QwNeonRegister tbl = temps.AcquireQ(); + TurboAssembler::Move(tbl, liftoff::GetSimd128Register(lhs)); + table = NeonListOperand(tbl); + } + + vtbl(dst.low_fp(), table, rhs.low_fp()); + vtbl(dst.high_fp(), table, rhs.high_fp()); +} + void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, LiftoffRegister src) { TurboAssembler::Move(dst.low_fp(), src.fp()); @@ -2273,12 +2447,37 @@ void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { - bailout(kSimd, "i64x2_shl"); + liftoff::EmitSimdShift<liftoff::kLeft, NeonS64, Neon32>(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { - bailout(kSimd, "i64x2_shli"); + vshl(NeonS64, liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs), rhs & 63); +} + +void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonS64, Neon32>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS64>(this, dst, lhs, + rhs); +} + +void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonU64, Neon32>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU64>(this, dst, lhs, + rhs); } void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, @@ -2306,15 +2505,18 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, QwNeonRegister tmp1 = left; QwNeonRegister tmp2 = right; - if (cache_state()->is_used(lhs) && cache_state()->is_used(rhs)) { + LiftoffRegList used_plus_dst = + cache_state()->used_registers | LiftoffRegList::ForRegs(dst); + + if (used_plus_dst.has(lhs) && used_plus_dst.has(rhs)) { tmp1 = temps.AcquireQ(); // We only have 1 scratch Q register, so acquire another ourselves. LiftoffRegList pinned = LiftoffRegList::ForRegs(dst); LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); tmp2 = liftoff::GetSimd128Register(unused_pair); - } else if (cache_state()->is_used(lhs)) { + } else if (used_plus_dst.has(lhs)) { tmp1 = temps.AcquireQ(); - } else if (cache_state()->is_used(rhs)) { + } else if (used_plus_dst.has(rhs)) { tmp2 = temps.AcquireQ(); } @@ -2363,14 +2565,79 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, liftoff::GetSimd128Register(src)); } +void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + DwVfpRegister scratch = temps.AcquireD(); + vpmin(NeonU32, scratch, src.low_fp(), src.high_fp()); + vpmin(NeonU32, scratch, scratch, scratch); + ExtractLane(dst.gp(), scratch, NeonS32, 0); + cmp(dst.gp(), Operand(0)); + mov(dst.gp(), Operand(1), LeaveCC, ne); +} + +void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + Simd128Register tmp = liftoff::GetSimd128Register(src); + Simd128Register mask = temps.AcquireQ(); + + if (cache_state()->is_used(src)) { + // We only have 1 scratch Q register, so try and reuse src. + LiftoffRegList pinned = LiftoffRegList::ForRegs(src); + LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); + mask = liftoff::GetSimd128Register(unused_pair); + } + + vshr(NeonS32, tmp, liftoff::GetSimd128Register(src), 31); + // Set i-th bit of each lane i. When AND with tmp, the lanes that + // are signed will have i-th bit set, unsigned will be 0. + vmov(mask.low(), Double((uint64_t)0x0000'0002'0000'0001)); + vmov(mask.high(), Double((uint64_t)0x0000'0008'0000'0004)); + vand(tmp, mask, tmp); + vpadd(Neon32, tmp.low(), tmp.low(), tmp.high()); + vpadd(Neon32, tmp.low(), tmp.low(), kDoubleRegZero); + VmovLow(dst.gp(), tmp.low()); +} + void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { - bailout(kSimd, "i32x4_shl"); + liftoff::EmitSimdShift<liftoff::kLeft, NeonS32, Neon32>(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { - bailout(kSimd, "i32x4_shli"); + vshl(NeonS32, liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs), rhs & 31); +} + +void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonS32, Neon32>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS32>(this, dst, lhs, + rhs); +} + +void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonU32, Neon32>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU32>(this, dst, lhs, + rhs); } void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, @@ -2430,14 +2697,81 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst, liftoff::GetSimd128Register(src)); } +void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + DwVfpRegister scratch = temps.AcquireD(); + vpmin(NeonU16, scratch, src.low_fp(), src.high_fp()); + vpmin(NeonU16, scratch, scratch, scratch); + vpmin(NeonU16, scratch, scratch, scratch); + ExtractLane(dst.gp(), scratch, NeonS16, 0); + cmp(dst.gp(), Operand(0)); + mov(dst.gp(), Operand(1), LeaveCC, ne); +} + +void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + Simd128Register tmp = liftoff::GetSimd128Register(src); + Simd128Register mask = temps.AcquireQ(); + + if (cache_state()->is_used(src)) { + // We only have 1 scratch Q register, so try and reuse src. + LiftoffRegList pinned = LiftoffRegList::ForRegs(src); + LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); + mask = liftoff::GetSimd128Register(unused_pair); + } + + vshr(NeonS16, tmp, liftoff::GetSimd128Register(src), 15); + // Set i-th bit of each lane i. When AND with tmp, the lanes that + // are signed will have i-th bit set, unsigned will be 0. + vmov(mask.low(), Double((uint64_t)0x0008'0004'0002'0001)); + vmov(mask.high(), Double((uint64_t)0x0080'0040'0020'0010)); + vand(tmp, mask, tmp); + vpadd(Neon16, tmp.low(), tmp.low(), tmp.high()); + vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); + vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); + vmov(NeonU16, dst.gp(), tmp.low(), 0); +} + void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { - bailout(kSimd, "i16x8_shl"); + liftoff::EmitSimdShift<liftoff::kLeft, NeonS16, Neon16>(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { - bailout(kSimd, "i16x8_shli"); + vshl(NeonS16, liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs), rhs & 15); +} + +void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonS16, Neon16>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS16>(this, dst, lhs, + rhs); +} + +void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonU16, Neon16>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU16>(this, dst, lhs, + rhs); } void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, @@ -2537,6 +2871,60 @@ void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst, imm_lane_idx); } +void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs, + const uint8_t shuffle[16]) { + Simd128Register dest = liftoff::GetSimd128Register(dst); + Simd128Register src1 = liftoff::GetSimd128Register(lhs); + Simd128Register src2 = liftoff::GetSimd128Register(rhs); + UseScratchRegisterScope temps(this); + Simd128Register scratch = temps.AcquireQ(); + if ((src1 != src2) && src1.code() + 1 != src2.code()) { + // vtbl requires the operands to be consecutive or the same. + // If they are the same, we build a smaller list operand (table_size = 2). + // If they are not the same, and not consecutive, we move the src1 and src2 + // to q14 and q15, which will be unused since they are not allocatable in + // Liftoff. If the operands are the same, then we build a smaller list + // operand below. + static_assert(!(kLiftoffAssemblerFpCacheRegs & + (d28.bit() | d29.bit() | d30.bit() | d31.bit())), + "This only works if q14-q15 (d28-d31) are not used."); + vmov(q14, src1); + src1 = q14; + vmov(q15, src2); + src2 = q15; + } + + int table_size = src1 == src2 ? 2 : 4; + uint32_t mask = table_size == 2 ? 0x0F0F0F0F : 0x1F1F1F1F; + + int scratch_s_base = scratch.code() * 4; + for (int j = 0; j < 4; j++) { + uint32_t imm = 0; + for (int i = 3; i >= 0; i--) { + imm = (imm << 8) | shuffle[j * 4 + i]; + } + uint32_t four_lanes = imm; + // Ensure indices are in [0,15] if table_size is 2, or [0,31] if 4. + four_lanes &= mask; + vmov(SwVfpRegister::from_code(scratch_s_base + j), + Float32::FromBits(four_lanes)); + } + + DwVfpRegister table_base = src1.low(); + NeonListOperand table(table_base, table_size); + + if (dest != src1 && dest != src2) { + vtbl(dest.low(), table, scratch.low()); + vtbl(dest.high(), table, scratch.high()); + } else { + vtbl(scratch.low(), table, scratch.low()); + vtbl(scratch.high(), table, scratch.high()); + vmov(dest, scratch); + } +} + void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src) { vdup(Neon8, liftoff::GetSimd128Register(dst), src.gp()); @@ -2569,14 +2957,82 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst, liftoff::GetSimd128Register(src)); } +void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst, + LiftoffRegister src) { + liftoff::EmitAnyTrue(this, dst, src); +} + +void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + DwVfpRegister scratch = temps.AcquireD(); + vpmin(NeonU8, scratch, src.low_fp(), src.high_fp()); + vpmin(NeonU8, scratch, scratch, scratch); + vpmin(NeonU8, scratch, scratch, scratch); + vpmin(NeonU8, scratch, scratch, scratch); + ExtractLane(dst.gp(), scratch, NeonS8, 0); + cmp(dst.gp(), Operand(0)); + mov(dst.gp(), Operand(1), LeaveCC, ne); +} + +void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst, + LiftoffRegister src) { + UseScratchRegisterScope temps(this); + Simd128Register tmp = liftoff::GetSimd128Register(src); + Simd128Register mask = temps.AcquireQ(); + + if (cache_state()->is_used(src)) { + // We only have 1 scratch Q register, so try and reuse src. + LiftoffRegList pinned = LiftoffRegList::ForRegs(src); + LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); + mask = liftoff::GetSimd128Register(unused_pair); + } + + vshr(NeonS8, tmp, liftoff::GetSimd128Register(src), 7); + // Set i-th bit of each lane i. When AND with tmp, the lanes that + // are signed will have i-th bit set, unsigned will be 0. + vmov(mask.low(), Double((uint64_t)0x8040'2010'0804'0201)); + vmov(mask.high(), Double((uint64_t)0x8040'2010'0804'0201)); + vand(tmp, mask, tmp); + vext(mask, tmp, tmp, 8); + vzip(Neon8, mask, tmp); + vpadd(Neon16, tmp.low(), tmp.low(), tmp.high()); + vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); + vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); + vmov(NeonU16, dst.gp(), tmp.low(), 0); +} + void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { - bailout(kSimd, "i8x16_shl"); + liftoff::EmitSimdShift<liftoff::kLeft, NeonS8, Neon8>(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { - bailout(kSimd, "i8x16_shli"); + vshl(NeonS8, liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(lhs), rhs & 7); +} + +void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonS8, Neon8>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS8>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, + LiftoffRegister lhs, + LiftoffRegister rhs) { + liftoff::EmitSimdShift<liftoff::kRight, NeonU8, Neon8>(this, dst, lhs, rhs); +} + +void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, + LiftoffRegister lhs, int32_t rhs) { + liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU8>(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, @@ -2842,6 +3298,30 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst, liftoff::GetSimd128Register(src2)); } +void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + vcvt_s32_f32(liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(src)); +} + +void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst, + LiftoffRegister src) { + vcvt_u32_f32(liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(src)); +} + +void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + vcvt_f32_s32(liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(src)); +} + +void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst, + LiftoffRegister src) { + vcvt_f32_u32(liftoff::GetSimd128Register(dst), + liftoff::GetSimd128Register(src)); +} + void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { |