summaryrefslogtreecommitdiff
path: root/deps/v8/src/codegen/x64/macro-assembler-x64.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/codegen/x64/macro-assembler-x64.cc')
-rw-r--r--deps/v8/src/codegen/x64/macro-assembler-x64.cc518
1 files changed, 474 insertions, 44 deletions
diff --git a/deps/v8/src/codegen/x64/macro-assembler-x64.cc b/deps/v8/src/codegen/x64/macro-assembler-x64.cc
index e696e8b66e..b91e8319ac 100644
--- a/deps/v8/src/codegen/x64/macro-assembler-x64.cc
+++ b/deps/v8/src/codegen/x64/macro-assembler-x64.cc
@@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include <cstdint>
#if V8_TARGET_ARCH_X64
#include "src/base/bits.h"
@@ -203,6 +204,15 @@ void TurboAssembler::LoadTaggedPointerField(Register destination,
}
}
+void TurboAssembler::LoadTaggedSignedField(Register destination,
+ Operand field_operand) {
+ if (COMPRESS_POINTERS_BOOL) {
+ DecompressTaggedSigned(destination, field_operand);
+ } else {
+ mov_tagged(destination, field_operand);
+ }
+}
+
void TurboAssembler::LoadAnyTaggedField(Register destination,
Operand field_operand) {
if (COMPRESS_POINTERS_BOOL) {
@@ -256,6 +266,16 @@ void TurboAssembler::StoreTaggedField(Operand dst_field_operand,
}
}
+void TurboAssembler::StoreTaggedSignedField(Operand dst_field_operand,
+ Smi value) {
+ if (SmiValuesAre32Bits()) {
+ movl(Operand(dst_field_operand, kSmiShift / kBitsPerByte),
+ Immediate(value.value()));
+ } else {
+ StoreTaggedField(dst_field_operand, Immediate(value));
+ }
+}
+
void TurboAssembler::DecompressTaggedSigned(Register destination,
Operand field_operand) {
RecordComment("[ DecompressTaggedSigned");
@@ -694,6 +714,16 @@ int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
return bytes;
}
+void TurboAssembler::Movdqa(XMMRegister dst, Operand src) {
+ // See comments in Movdqa(XMMRegister, XMMRegister).
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope avx_scope(this, AVX);
+ vmovdqa(dst, src);
+ } else {
+ movaps(dst, src);
+ }
+}
+
void TurboAssembler::Movdqa(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
@@ -1078,17 +1108,7 @@ void TurboAssembler::Set(Operand dst, intptr_t x) {
// Smi tagging, untagging and tag detection.
Register TurboAssembler::GetSmiConstant(Smi source) {
- STATIC_ASSERT(kSmiTag == 0);
- int value = source.value();
- if (value == 0) {
- xorl(kScratchRegister, kScratchRegister);
- return kScratchRegister;
- }
- if (SmiValuesAre32Bits()) {
- Move(kScratchRegister, source);
- } else {
- movl(kScratchRegister, Immediate(source));
- }
+ Move(kScratchRegister, source);
return kScratchRegister;
}
@@ -1097,8 +1117,17 @@ void TurboAssembler::Move(Register dst, Smi source) {
int value = source.value();
if (value == 0) {
xorl(dst, dst);
- } else {
+ } else if (SmiValuesAre32Bits() || value < 0) {
Move(dst, source.ptr(), RelocInfo::NONE);
+ } else {
+ uint32_t uvalue = static_cast<uint32_t>(source.ptr());
+ if (uvalue <= 0xFF) {
+ // Emit shorter instructions for small Smis
+ xorl(dst, dst);
+ movb(dst, Immediate(uvalue));
+ } else {
+ movl(dst, Immediate(uvalue));
+ }
}
}
@@ -1340,6 +1369,9 @@ void TurboAssembler::Move(Register dst, Register src) {
}
}
+void TurboAssembler::Move(Register dst, Operand src) { movq(dst, src); }
+void TurboAssembler::Move(Register dst, Immediate src) { movl(dst, src); }
+
void TurboAssembler::Move(XMMRegister dst, XMMRegister src) {
if (dst != src) {
Movaps(dst, src);
@@ -1594,6 +1626,7 @@ void TurboAssembler::Jump(Handle<Code> code_object, RelocInfo::Mode rmode,
Address entry = d.InstructionStartOfBuiltin(builtin_index);
Move(kScratchRegister, entry, RelocInfo::OFF_HEAP_TARGET);
jmp(kScratchRegister);
+ if (FLAG_code_comments) RecordComment("]");
bind(&skip);
return;
}
@@ -1676,6 +1709,18 @@ void TurboAssembler::CallBuiltin(int builtin_index) {
Address entry = d.InstructionStartOfBuiltin(builtin_index);
Move(kScratchRegister, entry, RelocInfo::OFF_HEAP_TARGET);
call(kScratchRegister);
+ if (FLAG_code_comments) RecordComment("]");
+}
+
+void TurboAssembler::TailCallBuiltin(int builtin_index) {
+ DCHECK(Builtins::IsBuiltinId(builtin_index));
+ RecordCommentForOffHeapTrampoline(builtin_index);
+ CHECK_NE(builtin_index, Builtins::kNoBuiltinId);
+ EmbeddedData d = EmbeddedData::FromBlob();
+ Address entry = d.InstructionStartOfBuiltin(builtin_index);
+ Move(kScratchRegister, entry, RelocInfo::OFF_HEAP_TARGET);
+ jmp(kScratchRegister);
+ if (FLAG_code_comments) RecordComment("]");
}
void TurboAssembler::LoadCodeObjectEntry(Register destination,
@@ -1726,9 +1771,17 @@ void TurboAssembler::CallCodeObject(Register code_object) {
call(code_object);
}
-void TurboAssembler::JumpCodeObject(Register code_object) {
+void TurboAssembler::JumpCodeObject(Register code_object, JumpMode jump_mode) {
LoadCodeObjectEntry(code_object, code_object);
- jmp(code_object);
+ switch (jump_mode) {
+ case JumpMode::kJump:
+ jmp(code_object);
+ return;
+ case JumpMode::kPushAndReturn:
+ pushq(code_object);
+ Ret();
+ return;
+ }
}
void TurboAssembler::RetpolineCall(Register reg) {
@@ -1770,29 +1823,69 @@ void TurboAssembler::RetpolineJump(Register reg) {
ret(0);
}
+void TurboAssembler::Pmaddwd(XMMRegister dst, XMMRegister src1, Operand src2) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope avx_scope(this, AVX);
+ vpmaddwd(dst, src1, src2);
+ } else {
+ if (dst != src1) {
+ movaps(dst, src1);
+ }
+ pmaddwd(dst, src2);
+ }
+}
+
void TurboAssembler::Pmaddwd(XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpmaddwd(dst, src1, src2);
} else {
- DCHECK_EQ(dst, src1);
+ if (dst != src1) {
+ movaps(dst, src1);
+ }
pmaddwd(dst, src2);
}
}
void TurboAssembler::Pmaddubsw(XMMRegister dst, XMMRegister src1,
+ Operand src2) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope avx_scope(this, AVX);
+ vpmaddubsw(dst, src1, src2);
+ } else {
+ CpuFeatureScope ssse3_scope(this, SSSE3);
+ if (dst != src1) {
+ movaps(dst, src1);
+ }
+ pmaddubsw(dst, src2);
+ }
+}
+
+void TurboAssembler::Pmaddubsw(XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpmaddubsw(dst, src1, src2);
} else {
CpuFeatureScope ssse3_scope(this, SSSE3);
- DCHECK_EQ(dst, src1);
+ if (dst != src1) {
+ movaps(dst, src1);
+ }
pmaddubsw(dst, src2);
}
}
+void TurboAssembler::Unpcklps(XMMRegister dst, XMMRegister src1, Operand src2) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope avx_scope(this, AVX);
+ vunpcklps(dst, src1, src2);
+ } else {
+ DCHECK_EQ(dst, src1);
+ unpcklps(dst, src2);
+ }
+}
+
void TurboAssembler::Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
@@ -2039,10 +2132,12 @@ void TurboAssembler::Pmulhrsw(XMMRegister dst, XMMRegister src1,
void TurboAssembler::I32x4SConvertI16x8High(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
- // Copy top half (64-bit) of src into both halves of dst.
- vpunpckhqdq(dst, src, src);
- vpmovsxwd(dst, dst);
+ // src = |a|b|c|d|e|f|g|h| (high)
+ // dst = |e|e|f|f|g|g|h|h|
+ vpunpckhwd(dst, src, src);
+ vpsrad(dst, dst, 16);
} else {
+ CpuFeatureScope sse_scope(this, SSE4_1);
if (dst == src) {
// 2 bytes shorter than pshufd, but has depdency on dst.
movhlps(dst, src);
@@ -2065,6 +2160,7 @@ void TurboAssembler::I32x4UConvertI16x8High(XMMRegister dst, XMMRegister src) {
vpxor(scratch, scratch, scratch);
vpunpckhwd(dst, src, scratch);
} else {
+ CpuFeatureScope sse_scope(this, SSE4_1);
if (dst == src) {
// xorps can be executed on more ports than pshufd.
xorps(kScratchDoubleReg, kScratchDoubleReg);
@@ -2080,10 +2176,12 @@ void TurboAssembler::I32x4UConvertI16x8High(XMMRegister dst, XMMRegister src) {
void TurboAssembler::I16x8SConvertI8x16High(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
- // Copy top half (64-bit) of src into both halves of dst.
- vpunpckhqdq(dst, src, src);
- vpmovsxbw(dst, dst);
+ // src = |a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p| (high)
+ // dst = |i|i|j|j|k|k|l|l|m|m|n|n|o|o|p|p|
+ vpunpckhbw(dst, src, src);
+ vpsraw(dst, dst, 8);
} else {
+ CpuFeatureScope sse_scope(this, SSE4_1);
if (dst == src) {
// 2 bytes shorter than pshufd, but has depdency on dst.
movhlps(dst, src);
@@ -2111,6 +2209,7 @@ void TurboAssembler::I16x8UConvertI8x16High(XMMRegister dst, XMMRegister src) {
xorps(kScratchDoubleReg, kScratchDoubleReg);
punpckhbw(dst, kScratchDoubleReg);
} else {
+ CpuFeatureScope sse_scope(this, SSE4_1);
// No dependency on dst.
pshufd(dst, src, 0xEE);
pmovzxbw(dst, dst);
@@ -2118,6 +2217,30 @@ void TurboAssembler::I16x8UConvertI8x16High(XMMRegister dst, XMMRegister src) {
}
}
+void TurboAssembler::I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope avx_scope(this, AVX);
+ vpunpckhqdq(dst, src, src);
+ vpmovsxdq(dst, dst);
+ } else {
+ CpuFeatureScope sse_scope(this, SSE4_1);
+ pshufd(dst, src, 0xEE);
+ pmovsxdq(dst, dst);
+ }
+}
+
+void TurboAssembler::I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope avx_scope(this, AVX);
+ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ vpunpckhdq(dst, src, kScratchDoubleReg);
+ } else {
+ CpuFeatureScope sse_scope(this, SSE4_1);
+ pshufd(dst, src, 0xEE);
+ pmovzxdq(dst, dst);
+ }
+}
+
// 1. Unpack src0, src0 into even-number elements of scratch.
// 2. Unpack src1, src1 into even-number elements of dst.
// 3. Multiply 1. with 2.
@@ -2189,6 +2312,313 @@ void TurboAssembler::I16x8ExtMul(XMMRegister dst, XMMRegister src1,
}
}
+void TurboAssembler::I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1,
+ XMMRegister src2) {
+ // k = i16x8.splat(0x8000)
+ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ Psllw(kScratchDoubleReg, byte{15});
+
+ Pmulhrsw(dst, src1, src2);
+ Pcmpeqw(kScratchDoubleReg, dst);
+ Pxor(dst, kScratchDoubleReg);
+}
+
+void TurboAssembler::S128Store32Lane(Operand dst, XMMRegister src,
+ uint8_t laneidx) {
+ if (laneidx == 0) {
+ Movss(dst, src);
+ } else {
+ DCHECK_GE(3, laneidx);
+ Extractps(dst, src, laneidx);
+ }
+}
+
+void TurboAssembler::S128Store64Lane(Operand dst, XMMRegister src,
+ uint8_t laneidx) {
+ if (laneidx == 0) {
+ Movlps(dst, src);
+ } else {
+ DCHECK_EQ(1, laneidx);
+ Movhps(dst, src);
+ }
+}
+
+void TurboAssembler::I8x16Popcnt(XMMRegister dst, XMMRegister src,
+ XMMRegister tmp) {
+ DCHECK_NE(dst, tmp);
+ DCHECK_NE(src, tmp);
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope avx_scope(this, AVX);
+ vmovdqa(tmp, ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_i8x16_splat_0x0f()));
+ vpandn(kScratchDoubleReg, tmp, src);
+ vpand(dst, tmp, src);
+ vmovdqa(tmp, ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_i8x16_popcnt_mask()));
+ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 4);
+ vpshufb(dst, tmp, dst);
+ vpshufb(kScratchDoubleReg, tmp, kScratchDoubleReg);
+ vpaddb(dst, dst, kScratchDoubleReg);
+ } else if (CpuFeatures::IsSupported(ATOM)) {
+ // Pre-Goldmont low-power Intel microarchitectures have very slow
+ // PSHUFB instruction, thus use PSHUFB-free divide-and-conquer
+ // algorithm on these processors. ATOM CPU feature captures exactly
+ // the right set of processors.
+ xorps(tmp, tmp);
+ pavgb(tmp, src);
+ if (dst != src) {
+ movaps(dst, src);
+ }
+ andps(tmp, ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_i8x16_splat_0x55()));
+ psubb(dst, tmp);
+ Operand splat_0x33 = ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_i8x16_splat_0x33());
+ movaps(tmp, dst);
+ andps(dst, splat_0x33);
+ psrlw(tmp, 2);
+ andps(tmp, splat_0x33);
+ paddb(dst, tmp);
+ movaps(tmp, dst);
+ psrlw(dst, 4);
+ paddb(dst, tmp);
+ andps(dst, ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_i8x16_splat_0x0f()));
+ } else {
+ movaps(tmp, ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_i8x16_splat_0x0f()));
+ Operand mask = ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_i8x16_popcnt_mask());
+ Move(kScratchDoubleReg, tmp);
+ andps(tmp, src);
+ andnps(kScratchDoubleReg, src);
+ psrlw(kScratchDoubleReg, 4);
+ movaps(dst, mask);
+ pshufb(dst, tmp);
+ movaps(tmp, mask);
+ pshufb(tmp, kScratchDoubleReg);
+ paddb(dst, tmp);
+ }
+}
+
+void TurboAssembler::F64x2ConvertLowI32x4U(XMMRegister dst, XMMRegister src) {
+ // dst = [ src_low, 0x43300000, src_high, 0x4330000 ];
+ // 0x43300000'00000000 is a special double where the significand bits
+ // precisely represents all uint32 numbers.
+ Unpcklps(dst, src,
+ ExternalReferenceAsOperand(
+ ExternalReference::
+ address_of_wasm_f64x2_convert_low_i32x4_u_int_mask()));
+ Subpd(dst, ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_double_2_power_52()));
+}
+
+void TurboAssembler::I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope avx_scope(this, AVX);
+ XMMRegister original_dst = dst;
+ // Make sure we don't overwrite src.
+ if (dst == src) {
+ DCHECK_NE(src, kScratchDoubleReg);
+ dst = kScratchDoubleReg;
+ }
+ // dst = 0 if src == NaN, else all ones.
+ vcmpeqpd(dst, src, src);
+ // dst = 0 if src == NaN, else INT32_MAX as double.
+ vandpd(dst, dst,
+ ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_int32_max_as_double()));
+ // dst = 0 if src == NaN, src is saturated to INT32_MAX as double.
+ vminpd(dst, src, dst);
+ // Values > INT32_MAX already saturated, values < INT32_MIN raises an
+ // exception, which is masked and returns 0x80000000.
+ vcvttpd2dq(dst, dst);
+ if (original_dst != dst) {
+ Move(original_dst, dst);
+ }
+ } else {
+ if (dst != src) {
+ Move(dst, src);
+ }
+ Move(kScratchDoubleReg, dst);
+ cmpeqpd(kScratchDoubleReg, dst);
+ andps(kScratchDoubleReg,
+ ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_int32_max_as_double()));
+ minpd(dst, kScratchDoubleReg);
+ cvttpd2dq(dst, dst);
+ }
+}
+
+void TurboAssembler::I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope avx_scope(this, AVX);
+ vxorpd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ // Saturate to 0.
+ vmaxpd(dst, src, kScratchDoubleReg);
+ // Saturate to UINT32_MAX.
+ vminpd(dst, dst,
+ ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_uint32_max_as_double()));
+ // Truncate.
+ vroundpd(dst, dst, kRoundToZero);
+ // Add to special double where significant bits == uint32.
+ vaddpd(dst, dst,
+ ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_double_2_power_52()));
+ // Extract low 32 bits of each double's significand, zero top lanes.
+ // dst = [dst[0], dst[2], 0, 0]
+ vshufps(dst, dst, kScratchDoubleReg, 0x88);
+ } else {
+ CpuFeatureScope scope(this, SSE4_1);
+ if (dst != src) {
+ Move(dst, src);
+ }
+ xorps(kScratchDoubleReg, kScratchDoubleReg);
+ maxpd(dst, kScratchDoubleReg);
+ minpd(dst, ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_uint32_max_as_double()));
+ roundpd(dst, dst, kRoundToZero);
+ addpd(dst, ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_double_2_power_52()));
+ shufps(dst, kScratchDoubleReg, 0x88);
+ }
+}
+
+void TurboAssembler::I64x2Abs(XMMRegister dst, XMMRegister src) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ XMMRegister tmp = dst == src ? kScratchDoubleReg : dst;
+ CpuFeatureScope avx_scope(this, AVX);
+ vpxor(tmp, tmp, tmp);
+ vpsubq(tmp, tmp, src);
+ vblendvpd(dst, src, tmp, src);
+ } else {
+ CpuFeatureScope sse_scope(this, SSE3);
+ movshdup(kScratchDoubleReg, src);
+ if (dst != src) {
+ movaps(dst, src);
+ }
+ psrad(kScratchDoubleReg, 31);
+ xorps(dst, kScratchDoubleReg);
+ psubq(dst, kScratchDoubleReg);
+ }
+}
+
+void TurboAssembler::I64x2GtS(XMMRegister dst, XMMRegister src0,
+ XMMRegister src1) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope avx_scope(this, AVX);
+ vpcmpgtq(dst, src0, src1);
+ } else if (CpuFeatures::IsSupported(SSE4_2)) {
+ CpuFeatureScope sse_scope(this, SSE4_2);
+ DCHECK_EQ(dst, src0);
+ pcmpgtq(dst, src1);
+ } else {
+ DCHECK_NE(dst, src0);
+ DCHECK_NE(dst, src1);
+ movdqa(dst, src1);
+ movdqa(kScratchDoubleReg, src0);
+ psubq(dst, src0);
+ pcmpeqd(kScratchDoubleReg, src1);
+ pand(dst, kScratchDoubleReg);
+ movdqa(kScratchDoubleReg, src0);
+ pcmpgtd(kScratchDoubleReg, src1);
+ por(dst, kScratchDoubleReg);
+ pshufd(dst, dst, 0xF5);
+ }
+}
+
+void TurboAssembler::I64x2GeS(XMMRegister dst, XMMRegister src0,
+ XMMRegister src1) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope avx_scope(this, AVX);
+ vpcmpgtq(dst, src1, src0);
+ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ vpxor(dst, dst, kScratchDoubleReg);
+ } else if (CpuFeatures::IsSupported(SSE4_2)) {
+ CpuFeatureScope sse_scope(this, SSE4_2);
+ DCHECK_NE(dst, src0);
+ if (dst != src1) {
+ movdqa(dst, src1);
+ }
+ pcmpgtq(dst, src0);
+ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ pxor(dst, kScratchDoubleReg);
+ } else {
+ DCHECK_NE(dst, src0);
+ DCHECK_NE(dst, src1);
+ movdqa(dst, src0);
+ movdqa(kScratchDoubleReg, src1);
+ psubq(dst, src1);
+ pcmpeqd(kScratchDoubleReg, src0);
+ pand(dst, kScratchDoubleReg);
+ movdqa(kScratchDoubleReg, src1);
+ pcmpgtd(kScratchDoubleReg, src0);
+ por(dst, kScratchDoubleReg);
+ pshufd(dst, dst, 0xF5);
+ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ pxor(dst, kScratchDoubleReg);
+ }
+}
+
+void TurboAssembler::I16x8ExtAddPairwiseI8x16S(XMMRegister dst,
+ XMMRegister src) {
+ // pmaddubsw treats the first operand as unsigned, so the external reference
+ // to be passed to it as the first operand.
+ Operand op = ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_i8x16_splat_0x01());
+ if (dst == src) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope avx_scope(this, AVX);
+ vmovdqa(kScratchDoubleReg, op);
+ vpmaddubsw(dst, kScratchDoubleReg, src);
+ } else {
+ CpuFeatureScope sse_scope(this, SSSE3);
+ movaps(kScratchDoubleReg, op);
+ pmaddubsw(kScratchDoubleReg, src);
+ movaps(dst, kScratchDoubleReg);
+ }
+ } else {
+ Movdqa(dst, op);
+ Pmaddubsw(dst, dst, src);
+ }
+}
+
+void TurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst,
+ XMMRegister src) {
+ // src = |a|b|c|d|e|f|g|h|
+ // kScratchDoubleReg = i32x4.splat(0x0000FFFF)
+ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ Psrld(kScratchDoubleReg, byte{16});
+ // kScratchDoubleReg =|0|b|0|d|0|f|0|h|
+ Pand(kScratchDoubleReg, src);
+ // dst = |0|a|0|c|0|e|0|g|
+ Psrld(dst, src, byte{16});
+ // dst = |a+b|c+d|e+f|g+h|
+ Paddd(dst, kScratchDoubleReg);
+}
+
+void TurboAssembler::I8x16Swizzle(XMMRegister dst, XMMRegister src,
+ XMMRegister mask) {
+ // Out-of-range indices should return 0, add 112 so that any value > 15
+ // saturates to 128 (top bit set), so pshufb will zero that lane.
+ Operand op = ExternalReferenceAsOperand(
+ ExternalReference::address_of_wasm_i8x16_swizzle_mask());
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope avx_scope(this, AVX);
+ vpaddusb(kScratchDoubleReg, mask, op);
+ vpshufb(dst, src, kScratchDoubleReg);
+ } else {
+ CpuFeatureScope sse_scope(this, SSSE3);
+ movdqa(kScratchDoubleReg, op);
+ if (dst != src) {
+ movaps(dst, src);
+ }
+ paddusb(kScratchDoubleReg, mask);
+ pshufb(dst, kScratchDoubleReg);
+ }
+}
+
void TurboAssembler::Abspd(XMMRegister dst) {
Andps(dst, ExternalReferenceAsOperand(
ExternalReference::address_of_double_abs_constant()));
@@ -2432,6 +2862,15 @@ void MacroAssembler::CmpInstanceType(Register map, InstanceType type) {
cmpw(FieldOperand(map, Map::kInstanceTypeOffset), Immediate(type));
}
+void MacroAssembler::CmpInstanceTypeRange(Register map,
+ InstanceType lower_limit,
+ InstanceType higher_limit) {
+ DCHECK_LT(lower_limit, higher_limit);
+ movzxwl(kScratchRegister, FieldOperand(map, Map::kInstanceTypeOffset));
+ leal(kScratchRegister, Operand(kScratchRegister, 0u - lower_limit));
+ cmpl(kScratchRegister, Immediate(higher_limit - lower_limit));
+}
+
void MacroAssembler::AssertNotSmi(Register object) {
if (emit_debug_code()) {
Condition is_smi = CheckSmi(object);
@@ -2480,9 +2919,10 @@ void MacroAssembler::AssertFunction(Register object) {
testb(object, Immediate(kSmiTagMask));
Check(not_equal, AbortReason::kOperandIsASmiAndNotAFunction);
Push(object);
- CmpObjectType(object, JS_FUNCTION_TYPE, object);
+ LoadMap(object, object);
+ CmpInstanceTypeRange(object, FIRST_JS_FUNCTION_TYPE, LAST_JS_FUNCTION_TYPE);
Pop(object);
- Check(equal, AbortReason::kOperandIsNotAFunction);
+ Check(below_equal, AbortReason::kOperandIsNotAFunction);
}
}
@@ -2753,7 +3193,6 @@ void MacroAssembler::InvokePrologue(Register expected_parameter_count,
Label* done, InvokeFlag flag) {
if (expected_parameter_count != actual_parameter_count) {
Label regular_invoke;
-#ifdef V8_NO_ARGUMENTS_ADAPTOR
// If the expected parameter count is equal to the adaptor sentinel, no need
// to push undefined value as arguments.
cmpl(expected_parameter_count, Immediate(kDontAdaptArgumentsSentinel));
@@ -2811,22 +3250,6 @@ void MacroAssembler::InvokePrologue(Register expected_parameter_count,
CallRuntime(Runtime::kThrowStackOverflow);
int3(); // This should be unreachable.
}
-#else
- // Both expected and actual are in (different) registers. This
- // is the case when we invoke functions using call and apply.
- cmpq(expected_parameter_count, actual_parameter_count);
- j(equal, &regular_invoke, Label::kNear);
- DCHECK_EQ(actual_parameter_count, rax);
- DCHECK_EQ(expected_parameter_count, rbx);
- Handle<Code> adaptor = BUILTIN_CODE(isolate(), ArgumentsAdaptorTrampoline);
- if (flag == CALL_FUNCTION) {
- Call(adaptor, RelocInfo::CODE_TARGET);
- jmp(done, Label::kNear);
- } else {
- Jump(adaptor, RelocInfo::CODE_TARGET);
- }
-#endif
-
bind(&regular_invoke);
} else {
Move(rax, actual_parameter_count);
@@ -2881,11 +3304,16 @@ void TurboAssembler::Prologue() {
void TurboAssembler::EnterFrame(StackFrame::Type type) {
pushq(rbp);
movq(rbp, rsp);
- Push(Immediate(StackFrame::TypeToMarker(type)));
+ if (!StackFrame::IsJavaScript(type)) {
+ Push(Immediate(StackFrame::TypeToMarker(type)));
+ }
}
void TurboAssembler::LeaveFrame(StackFrame::Type type) {
- if (emit_debug_code()) {
+ // TODO(v8:11429): Consider passing BASELINE instead, and checking for
+ // IsJSFrame or similar. Could then unify with manual frame leaves in the
+ // interpreter too.
+ if (emit_debug_code() && !StackFrame::IsJavaScript(type)) {
cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
Immediate(StackFrame::TypeToMarker(type)));
Check(equal, AbortReason::kStackFrameTypesMustMatch);
@@ -2917,11 +3345,13 @@ void TurboAssembler::AllocateStackSpace(Register bytes_scratch) {
}
void TurboAssembler::AllocateStackSpace(int bytes) {
+ DCHECK_GE(bytes, 0);
while (bytes > kStackPageSize) {
subq(rsp, Immediate(kStackPageSize));
movb(Operand(rsp, 0), Immediate(0));
bytes -= kStackPageSize;
}
+ if (bytes == 0) return;
subq(rsp, Immediate(bytes));
}
#endif
@@ -3223,7 +3653,7 @@ void TurboAssembler::ComputeCodeStartAddress(Register dst) {
}
void TurboAssembler::ResetSpeculationPoisonRegister() {
- // TODO(tebbi): Perhaps, we want to put an lfence here.
+ // TODO(turbofan): Perhaps, we want to put an lfence here.
Set(kSpeculationPoisonRegister, -1);
}