diff options
Diffstat (limited to 'chromium/v8/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.cc')
-rw-r--r-- | chromium/v8/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.cc | 195 |
1 files changed, 195 insertions, 0 deletions
diff --git a/chromium/v8/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.cc b/chromium/v8/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.cc index 366d1afac9e..3a73ae09f8a 100644 --- a/chromium/v8/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.cc +++ b/chromium/v8/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.cc @@ -29,6 +29,174 @@ void SharedTurboAssembler::Movapd(XMMRegister dst, XMMRegister src) { } } +void SharedTurboAssembler::Shufps(XMMRegister dst, XMMRegister src1, + XMMRegister src2, uint8_t imm8) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope avx_scope(this, AVX); + vshufps(dst, src1, src2, imm8); + } else { + if (dst != src1) { + movaps(dst, src1); + } + shufps(dst, src2, imm8); + } +} + +void SharedTurboAssembler::F64x2ExtractLane(DoubleRegister dst, XMMRegister src, + uint8_t lane) { + if (lane == 0) { + if (dst != src) { + Movaps(dst, src); + } + } else { + DCHECK_EQ(1, lane); + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope avx_scope(this, AVX); + // Pass src as operand to avoid false-dependency on dst. + vmovhlps(dst, src, src); + } else { + movhlps(dst, src); + } + } +} + +void SharedTurboAssembler::F64x2ReplaceLane(XMMRegister dst, XMMRegister src, + DoubleRegister rep, uint8_t lane) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + if (lane == 0) { + vpblendw(dst, src, rep, 0b00001111); + } else { + vmovlhps(dst, src, rep); + } + } else { + CpuFeatureScope scope(this, SSE4_1); + if (dst != src) { + DCHECK_NE(dst, rep); // Ensure rep is not overwritten. + movaps(dst, src); + } + if (lane == 0) { + pblendw(dst, rep, 0b00001111); + } else { + movlhps(dst, rep); + } + } +} + +void SharedTurboAssembler::F64x2Min(XMMRegister dst, XMMRegister lhs, + XMMRegister rhs, XMMRegister scratch) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + // The minpd instruction doesn't propagate NaNs and +0's in its first + // operand. Perform minpd in both orders, merge the resuls, and adjust. + vminpd(scratch, lhs, rhs); + vminpd(dst, rhs, lhs); + // propagate -0's and NaNs, which may be non-canonical. + vorpd(scratch, scratch, dst); + // Canonicalize NaNs by quieting and clearing the payload. + vcmpunordpd(dst, dst, scratch); + vorpd(scratch, scratch, dst); + vpsrlq(dst, dst, byte{13}); + vandnpd(dst, dst, scratch); + } else { + // Compare lhs with rhs, and rhs with lhs, and have the results in scratch + // and dst. If dst overlaps with lhs or rhs, we can save a move. + if (dst == lhs || dst == rhs) { + XMMRegister src = dst == lhs ? rhs : lhs; + movaps(scratch, src); + minpd(scratch, dst); + minpd(dst, src); + } else { + movaps(scratch, lhs); + movaps(dst, rhs); + minpd(scratch, rhs); + minpd(dst, lhs); + } + orpd(scratch, dst); + cmpunordpd(dst, scratch); + orpd(scratch, dst); + psrlq(dst, byte{13}); + andnpd(dst, scratch); + } +} + +void SharedTurboAssembler::F64x2Max(XMMRegister dst, XMMRegister lhs, + XMMRegister rhs, XMMRegister scratch) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + // The maxpd instruction doesn't propagate NaNs and +0's in its first + // operand. Perform maxpd in both orders, merge the resuls, and adjust. + vmaxpd(scratch, lhs, rhs); + vmaxpd(dst, rhs, lhs); + // Find discrepancies. + vxorpd(dst, dst, scratch); + // Propagate NaNs, which may be non-canonical. + vorpd(scratch, scratch, dst); + // Propagate sign discrepancy and (subtle) quiet NaNs. + vsubpd(scratch, scratch, dst); + // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. + vcmpunordpd(dst, dst, scratch); + vpsrlq(dst, dst, byte{13}); + vandnpd(dst, dst, scratch); + } else { + if (dst == lhs || dst == rhs) { + XMMRegister src = dst == lhs ? rhs : lhs; + movaps(scratch, src); + maxpd(scratch, dst); + maxpd(dst, src); + } else { + movaps(scratch, lhs); + movaps(dst, rhs); + maxpd(scratch, rhs); + maxpd(dst, lhs); + } + xorpd(dst, scratch); + orpd(scratch, dst); + subpd(scratch, dst); + cmpunordpd(dst, scratch); + psrlq(dst, byte{13}); + andnpd(dst, scratch); + } +} + +void SharedTurboAssembler::F32x4Splat(XMMRegister dst, DoubleRegister src) { + if (CpuFeatures::IsSupported(AVX2)) { + CpuFeatureScope avx2_scope(this, AVX2); + vbroadcastss(dst, src); + } else if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope avx_scope(this, AVX); + vshufps(dst, src, src, 0); + } else { + if (dst == src) { + // 1 byte shorter than pshufd. + shufps(dst, src, 0); + } else { + pshufd(dst, src, 0); + } + } +} + +void SharedTurboAssembler::F32x4ExtractLane(FloatRegister dst, XMMRegister src, + uint8_t lane) { + DCHECK_LT(lane, 4); + // These instructions are shorter than insertps, but will leave junk in + // the top lanes of dst. + if (lane == 0) { + if (dst != src) { + Movaps(dst, src); + } + } else if (lane == 1) { + Movshdup(dst, src); + } else if (lane == 2 && dst == src) { + // Check dst == src to avoid false dependency on dst. + Movhlps(dst, src); + } else if (dst == src) { + Shufps(dst, src, src, lane); + } else { + Pshufd(dst, src, lane); + } +} + void SharedTurboAssembler::S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx) { if (laneidx == 0) { @@ -233,6 +401,22 @@ void SharedTurboAssembler::I32x4UConvertI16x8High(XMMRegister dst, } } +void SharedTurboAssembler::I64x2Neg(XMMRegister dst, XMMRegister src, + XMMRegister scratch) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vpxor(scratch, scratch, scratch); + vpsubq(dst, scratch, src); + } else { + if (dst == src) { + movaps(scratch, src); + std::swap(src, scratch); + } + pxor(dst, dst); + psubq(dst, src); + } +} + void SharedTurboAssembler::I64x2Abs(XMMRegister dst, XMMRegister src, XMMRegister scratch) { if (CpuFeatures::IsSupported(AVX)) { @@ -379,6 +563,17 @@ void SharedTurboAssembler::I64x2UConvertI32x4High(XMMRegister dst, } } +void SharedTurboAssembler::S128Not(XMMRegister dst, XMMRegister src, + XMMRegister scratch) { + if (dst == src) { + Pcmpeqd(scratch, scratch); + Pxor(dst, scratch); + } else { + Pcmpeqd(dst, dst); + Pxor(dst, src); + } +} + void SharedTurboAssembler::S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1, XMMRegister src2, XMMRegister scratch) { |