From c30a6232df03e1efbd9f3b226777b07e087a1122 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Mon, 12 Oct 2020 14:27:29 +0200 Subject: BASELINE: Update Chromium to 85.0.4183.140 Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057 Reviewed-by: Allan Sandfeld Jensen --- chromium/v8/src/compiler/access-builder.cc | 38 ++ chromium/v8/src/compiler/access-builder.h | 12 + chromium/v8/src/compiler/access-info.cc | 8 +- chromium/v8/src/compiler/allocation-builder-inl.h | 13 +- chromium/v8/src/compiler/allocation-builder.h | 5 + .../src/compiler/backend/arm/code-generator-arm.cc | 150 +++-- .../compiler/backend/arm/instruction-codes-arm.h | 16 +- .../backend/arm/instruction-scheduler-arm.cc | 16 +- .../backend/arm/instruction-selector-arm.cc | 53 +- .../compiler/backend/arm64/code-generator-arm64.cc | 123 +++- .../backend/arm64/instruction-codes-arm64.h | 29 +- .../backend/arm64/instruction-scheduler-arm64.cc | 29 +- .../backend/arm64/instruction-selector-arm64.cc | 86 ++- chromium/v8/src/compiler/backend/code-generator.cc | 47 +- .../compiler/backend/ia32/code-generator-ia32.cc | 31 +- .../compiler/backend/ia32/instruction-codes-ia32.h | 15 +- .../backend/ia32/instruction-scheduler-ia32.cc | 15 +- .../backend/ia32/instruction-selector-ia32.cc | 68 +- .../compiler/backend/instruction-selector-impl.h | 2 + .../src/compiler/backend/instruction-selector.cc | 97 ++- chromium/v8/src/compiler/backend/instruction.h | 2 +- .../compiler/backend/mips/code-generator-mips.cc | 102 ++- .../compiler/backend/mips/instruction-codes-mips.h | 19 +- .../backend/mips/instruction-scheduler-mips.cc | 19 +- .../backend/mips/instruction-selector-mips.cc | 39 +- .../backend/mips64/code-generator-mips64.cc | 102 ++- .../backend/mips64/instruction-codes-mips64.h | 19 +- .../backend/mips64/instruction-scheduler-mips64.cc | 19 +- .../backend/mips64/instruction-selector-mips64.cc | 39 +- .../src/compiler/backend/ppc/code-generator-ppc.cc | 683 ++++++++++++++++++- .../compiler/backend/ppc/instruction-codes-ppc.h | 93 +++ .../backend/ppc/instruction-scheduler-ppc.cc | 93 +++ .../backend/ppc/instruction-selector-ppc.cc | 307 ++++----- .../v8/src/compiler/backend/register-allocator.cc | 47 +- .../v8/src/compiler/backend/register-allocator.h | 11 +- .../compiler/backend/s390/code-generator-s390.cc | 133 +++- .../compiler/backend/s390/instruction-codes-s390.h | 31 +- .../backend/s390/instruction-scheduler-s390.cc | 31 +- .../backend/s390/instruction-selector-s390.cc | 47 +- .../src/compiler/backend/x64/code-generator-x64.cc | 194 ++++-- .../compiler/backend/x64/instruction-codes-x64.h | 19 +- .../backend/x64/instruction-scheduler-x64.cc | 19 +- .../backend/x64/instruction-selector-x64.cc | 53 +- .../v8/src/compiler/basic-block-instrumentor.cc | 74 ++- .../v8/src/compiler/basic-block-instrumentor.h | 6 +- chromium/v8/src/compiler/bytecode-graph-builder.cc | 319 ++++++--- chromium/v8/src/compiler/bytecode-graph-builder.h | 1 + chromium/v8/src/compiler/code-assembler.cc | 5 +- chromium/v8/src/compiler/code-assembler.h | 3 +- .../v8/src/compiler/effect-control-linearizer.cc | 16 +- chromium/v8/src/compiler/globals.h | 13 + chromium/v8/src/compiler/graph-assembler.cc | 106 +-- chromium/v8/src/compiler/graph-assembler.h | 12 +- chromium/v8/src/compiler/graph-visualizer.cc | 121 +++- chromium/v8/src/compiler/graph-visualizer.h | 26 + chromium/v8/src/compiler/js-call-reducer.cc | 200 +++--- chromium/v8/src/compiler/js-create-lowering.cc | 31 +- chromium/v8/src/compiler/js-generic-lowering.cc | 739 ++++++++++++++------- chromium/v8/src/compiler/js-generic-lowering.h | 18 +- chromium/v8/src/compiler/js-heap-broker.cc | 12 +- chromium/v8/src/compiler/js-heap-broker.h | 29 +- chromium/v8/src/compiler/js-heap-copy-reducer.cc | 44 ++ chromium/v8/src/compiler/js-inlining.cc | 7 +- .../compiler/js-native-context-specialization.cc | 17 +- chromium/v8/src/compiler/js-operator.cc | 206 ++---- chromium/v8/src/compiler/js-operator.h | 169 ++++- chromium/v8/src/compiler/js-type-hint-lowering.cc | 27 +- chromium/v8/src/compiler/js-type-hint-lowering.h | 1 + chromium/v8/src/compiler/js-typed-lowering.cc | 125 +++- chromium/v8/src/compiler/linkage.cc | 17 +- chromium/v8/src/compiler/linkage.h | 21 +- chromium/v8/src/compiler/load-elimination.h | 2 - chromium/v8/src/compiler/machine-graph-verifier.cc | 46 -- chromium/v8/src/compiler/machine-graph.cc | 5 + chromium/v8/src/compiler/machine-graph.h | 1 + .../v8/src/compiler/machine-operator-reducer.cc | 173 ++++- .../v8/src/compiler/machine-operator-reducer.h | 1 + chromium/v8/src/compiler/machine-operator.cc | 25 +- chromium/v8/src/compiler/machine-operator.h | 25 +- chromium/v8/src/compiler/memory-lowering.h | 1 - chromium/v8/src/compiler/node-matchers.h | 2 +- chromium/v8/src/compiler/node.h | 10 + chromium/v8/src/compiler/opcodes.cc | 2 +- chromium/v8/src/compiler/opcodes.h | 119 ++-- chromium/v8/src/compiler/operator-properties.cc | 2 +- chromium/v8/src/compiler/pipeline-statistics.cc | 4 +- chromium/v8/src/compiler/pipeline.cc | 365 +++++----- chromium/v8/src/compiler/representation-change.cc | 68 +- chromium/v8/src/compiler/representation-change.h | 9 + chromium/v8/src/compiler/schedule.cc | 2 +- chromium/v8/src/compiler/scheduler.cc | 4 +- chromium/v8/src/compiler/simd-scalar-lowering.cc | 95 ++- chromium/v8/src/compiler/simd-scalar-lowering.h | 1 + chromium/v8/src/compiler/simplified-lowering.cc | 428 ++++++------ chromium/v8/src/compiler/simplified-operator.cc | 44 +- chromium/v8/src/compiler/simplified-operator.h | 2 + chromium/v8/src/compiler/typed-optimization.cc | 2 +- chromium/v8/src/compiler/typer.cc | 81 +-- chromium/v8/src/compiler/types.cc | 4 +- chromium/v8/src/compiler/wasm-compiler.cc | 456 ++++++++----- chromium/v8/src/compiler/wasm-compiler.h | 24 +- 101 files changed, 5077 insertions(+), 2265 deletions(-) (limited to 'chromium/v8/src/compiler') diff --git a/chromium/v8/src/compiler/access-builder.cc b/chromium/v8/src/compiler/access-builder.cc index e19067f3c1c..656b250a1c2 100644 --- a/chromium/v8/src/compiler/access-builder.cc +++ b/chromium/v8/src/compiler/access-builder.cc @@ -17,6 +17,7 @@ #include "src/objects/objects-inl.h" #include "src/objects/ordered-hash-table.h" #include "src/objects/source-text-module.h" +#include "torque-generated/exported-class-definitions-tq.h" namespace v8 { namespace internal { @@ -529,6 +530,26 @@ FieldAccess AccessBuilder::ForFixedArrayLength() { return access; } +// static +FieldAccess AccessBuilder::ForSloppyArgumentsElementsContext() { + FieldAccess access = { + kTaggedBase, SloppyArgumentsElements::kContextOffset, + MaybeHandle(), MaybeHandle(), + Type::Any(), MachineType::TaggedPointer(), + kPointerWriteBarrier}; + return access; +} + +// static +FieldAccess AccessBuilder::ForSloppyArgumentsElementsArguments() { + FieldAccess access = { + kTaggedBase, SloppyArgumentsElements::kArgumentsOffset, + MaybeHandle(), MaybeHandle(), + Type::Any(), MachineType::TaggedPointer(), + kPointerWriteBarrier}; + return access; +} + // static FieldAccess AccessBuilder::ForPropertyArrayLengthAndHash() { FieldAccess access = { @@ -867,6 +888,14 @@ ElementAccess AccessBuilder::ForFixedArrayElement() { } // static +ElementAccess AccessBuilder::ForSloppyArgumentsElementsMappedEntry() { + ElementAccess access = { + kTaggedBase, SloppyArgumentsElements::kMappedEntriesOffset, Type::Any(), + MachineType::AnyTagged(), kFullWriteBarrier}; + return access; +} + +// statics ElementAccess AccessBuilder::ForFixedArrayElement( ElementsKind kind, LoadSensitivity load_sensitivity) { ElementAccess access = {kTaggedBase, FixedArray::kHeaderSize, @@ -1139,6 +1168,15 @@ FieldAccess AccessBuilder::ForDictionaryObjectHashIndex() { return access; } +// static +FieldAccess AccessBuilder::ForFeedbackCellValue() { + FieldAccess access = {kTaggedBase, FeedbackCell::kValueOffset, + Handle(), MaybeHandle(), + Type::Any(), MachineType::TaggedPointer(), + kFullWriteBarrier}; + return access; +} + } // namespace compiler } // namespace internal } // namespace v8 diff --git a/chromium/v8/src/compiler/access-builder.h b/chromium/v8/src/compiler/access-builder.h index 622dc1d76c2..9edd3272a19 100644 --- a/chromium/v8/src/compiler/access-builder.h +++ b/chromium/v8/src/compiler/access-builder.h @@ -179,6 +179,12 @@ class V8_EXPORT_PRIVATE AccessBuilder final // Provides access to FixedArray::length() field. static FieldAccess ForFixedArrayLength(); + // Provides access to SloppyArgumentsElements::context() field. + static FieldAccess ForSloppyArgumentsElementsContext(); + + // Provides access to SloppyArgumentsElements::arguments() field. + static FieldAccess ForSloppyArgumentsElementsArguments(); + // Provides access to PropertyArray::length() field. static FieldAccess ForPropertyArrayLengthAndHash(); @@ -283,6 +289,9 @@ class V8_EXPORT_PRIVATE AccessBuilder final ElementsKind kind, LoadSensitivity load_sensitivity = LoadSensitivity::kUnsafe); + // Provides access to SloppyArgumentsElements elements. + static ElementAccess ForSloppyArgumentsElementsMappedEntry(); + // Provides access to stack arguments static ElementAccess ForStackArgument(); @@ -318,6 +327,9 @@ class V8_EXPORT_PRIVATE AccessBuilder final static FieldAccess ForDictionaryNextEnumerationIndex(); static FieldAccess ForDictionaryObjectHashIndex(); + // Provides access to a FeedbackCell's value. + static FieldAccess ForFeedbackCellValue(); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(AccessBuilder); }; diff --git a/chromium/v8/src/compiler/access-info.cc b/chromium/v8/src/compiler/access-info.cc index 9a2a56cd8b1..db195c1bf9e 100644 --- a/chromium/v8/src/compiler/access-info.cc +++ b/chromium/v8/src/compiler/access-info.cc @@ -36,7 +36,7 @@ bool CanInlinePropertyAccess(Handle map) { if (map->instance_type() < LAST_PRIMITIVE_HEAP_OBJECT_TYPE) return true; return map->IsJSObjectMap() && !map->is_dictionary_map() && !map->has_named_interceptor() && - // TODO(verwaest): Whitelist contexts to which we have access. + // TODO(verwaest): Allowlist contexts to which we have access. !map->is_access_check_needed(); } @@ -505,8 +505,10 @@ PropertyAccessInfo AccessInfoFactory::ComputePropertyAccessInfo( MaybeHandle holder; while (true) { // Lookup the named property on the {map}. - Handle descriptors(map->instance_descriptors(), isolate()); - InternalIndex const number = descriptors->Search(*name, *map); + Handle descriptors( + map->synchronized_instance_descriptors(), isolate()); + InternalIndex const number = + descriptors->Search(*name, *map, broker()->is_concurrent_inlining()); if (number.is_found()) { PropertyDetails const details = descriptors->GetDetails(number); if (access_mode == AccessMode::kStore || diff --git a/chromium/v8/src/compiler/allocation-builder-inl.h b/chromium/v8/src/compiler/allocation-builder-inl.h index 26fbe503c36..2b6109f49e2 100644 --- a/chromium/v8/src/compiler/allocation-builder-inl.h +++ b/chromium/v8/src/compiler/allocation-builder-inl.h @@ -5,10 +5,11 @@ #ifndef V8_COMPILER_ALLOCATION_BUILDER_INL_H_ #define V8_COMPILER_ALLOCATION_BUILDER_INL_H_ -#include "src/compiler/allocation-builder.h" - #include "src/compiler/access-builder.h" +#include "src/compiler/allocation-builder.h" #include "src/objects/map-inl.h" +#include "torque-generated/exported-class-definitions-tq-inl.h" +#include "torque-generated/exported-class-definitions-tq.h" namespace v8 { namespace internal { @@ -40,6 +41,14 @@ void AllocationBuilder::AllocateArray(int length, MapRef map, Store(AccessBuilder::ForFixedArrayLength(), jsgraph()->Constant(length)); } +void AllocationBuilder::AllocateSloppyArgumentElements( + int length, MapRef map, AllocationType allocation) { + int size = SloppyArgumentsElements::SizeFor(length); + Allocate(size, allocation, Type::OtherInternal()); + Store(AccessBuilder::ForMap(), map); + Store(AccessBuilder::ForFixedArrayLength(), jsgraph()->Constant(length)); +} + } // namespace compiler } // namespace internal } // namespace v8 diff --git a/chromium/v8/src/compiler/allocation-builder.h b/chromium/v8/src/compiler/allocation-builder.h index 040dd014051..709146950c6 100644 --- a/chromium/v8/src/compiler/allocation-builder.h +++ b/chromium/v8/src/compiler/allocation-builder.h @@ -55,6 +55,11 @@ class AllocationBuilder final { inline void AllocateArray(int length, MapRef map, AllocationType allocation = AllocationType::kYoung); + // Compound allocation of a SloppyArgumentsElements + inline void AllocateSloppyArgumentElements( + int length, MapRef map, + AllocationType allocation = AllocationType::kYoung); + // Compound store of a constant into a field. void Store(const FieldAccess& access, const ObjectRef& value) { Store(access, jsgraph()->Constant(value)); diff --git a/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc b/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc index d453cf0188d..f50c0c858a7 100644 --- a/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc +++ b/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc @@ -1456,7 +1456,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; case kArmVrintmF32: { CpuFeatureScope scope(tasm(), ARMv8); - __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0)); + if (instr->InputAt(0)->IsSimd128Register()) { + __ vrintm(NeonS32, i.OutputSimd128Register(), + i.InputSimd128Register(0)); + } else { + __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0)); + } break; } case kArmVrintmF64: { @@ -1466,7 +1471,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmVrintpF32: { CpuFeatureScope scope(tasm(), ARMv8); - __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0)); + if (instr->InputAt(0)->IsSimd128Register()) { + __ vrintp(NeonS32, i.OutputSimd128Register(), + i.InputSimd128Register(0)); + } else { + __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0)); + } break; } case kArmVrintpF64: { @@ -1476,7 +1486,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmVrintzF32: { CpuFeatureScope scope(tasm(), ARMv8); - __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0)); + if (instr->InputAt(0)->IsSimd128Register()) { + __ vrintz(NeonS32, i.OutputSimd128Register(), + i.InputSimd128Register(0)); + } else { + __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0)); + } break; } case kArmVrintzF64: { @@ -1960,43 +1975,61 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArmF64x2Lt: { UseScratchRegisterScope temps(tasm()); Register scratch = temps.Acquire(); - __ mov(scratch, Operand(0)); __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(), i.InputSimd128Register(1).low()); - __ mov(scratch, Operand(-1), LeaveCC, lt); - // Check for NaN. - __ mov(scratch, Operand(0), LeaveCC, vs); + __ mov(scratch, Operand(0), LeaveCC, cs); + __ mov(scratch, Operand(-1), LeaveCC, mi); __ vmov(i.OutputSimd128Register().low(), scratch, scratch); - __ mov(scratch, Operand(0)); __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(), i.InputSimd128Register(1).high()); - __ mov(scratch, Operand(-1), LeaveCC, lt); - // Check for NaN. - __ mov(scratch, Operand(0), LeaveCC, vs); + __ mov(scratch, Operand(0), LeaveCC, cs); + __ mov(scratch, Operand(-1), LeaveCC, mi); __ vmov(i.OutputSimd128Register().high(), scratch, scratch); break; } case kArmF64x2Le: { UseScratchRegisterScope temps(tasm()); Register scratch = temps.Acquire(); - __ mov(scratch, Operand(0)); __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(), i.InputSimd128Register(1).low()); - __ mov(scratch, Operand(-1), LeaveCC, le); - // Check for NaN. - __ mov(scratch, Operand(0), LeaveCC, vs); + __ mov(scratch, Operand(0), LeaveCC, hi); + __ mov(scratch, Operand(-1), LeaveCC, ls); __ vmov(i.OutputSimd128Register().low(), scratch, scratch); - __ mov(scratch, Operand(0)); __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(), i.InputSimd128Register(1).high()); - __ mov(scratch, Operand(-1), LeaveCC, le); - // Check for NaN. - __ mov(scratch, Operand(0), LeaveCC, vs); + __ mov(scratch, Operand(0), LeaveCC, hi); + __ mov(scratch, Operand(-1), LeaveCC, ls); __ vmov(i.OutputSimd128Register().high(), scratch, scratch); break; } + case kArmF64x2Pmin: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + DCHECK_EQ(dst, lhs); + + // Move rhs only when rhs is strictly greater (mi). + __ VFPCompareAndSetFlags(rhs.low(), lhs.low()); + __ vmov(dst.low(), rhs.low(), mi); + __ VFPCompareAndSetFlags(rhs.high(), lhs.high()); + __ vmov(dst.high(), rhs.high(), mi); + break; + } + case kArmF64x2Pmax: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + DCHECK_EQ(dst, lhs); + + // Move rhs only when rhs is strictly greater (mi). + __ VFPCompareAndSetFlags(rhs.low(), lhs.low()); + __ vmov(dst.low(), rhs.low(), gt); + __ VFPCompareAndSetFlags(rhs.high(), lhs.high()); + __ vmov(dst.high(), rhs.high(), gt); + break; + } case kArmI64x2SplatI32Pair: { Simd128Register dst = i.OutputSimd128Register(); __ vdup(Neon32, dst, i.InputRegister(0)); @@ -2068,7 +2101,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI64x2Neg: { Simd128Register dst = i.OutputSimd128Register(); - __ vmov(dst, static_cast(0)); + __ vmov(dst, uint64_t{0}); __ vqsub(NeonS64, dst, dst, i.InputSimd128Register(0)); break; } @@ -2220,6 +2253,33 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(0)); break; } + case kArmF32x4Pmin: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + DCHECK_NE(dst, lhs); + DCHECK_NE(dst, rhs); + + // f32x4.pmin(lhs, rhs) + // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs)) + // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs)) + __ vcgt(dst, lhs, rhs); + __ vbsl(dst, rhs, lhs); + break; + } + case kArmF32x4Pmax: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + DCHECK_NE(dst, lhs); + DCHECK_NE(dst, rhs); + + // f32x4.pmax(lhs, rhs) + // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs)) + __ vcgt(dst, rhs, lhs); + __ vbsl(dst, rhs, lhs); + break; + } case kArmI32x4Splat: { __ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0)); break; @@ -2361,8 +2421,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vshr(NeonS32, tmp2, src, 31); // Set i-th bit of each lane i. When AND with tmp, the lanes that // are signed will have i-th bit set, unsigned will be 0. - __ vmov(mask.low(), Double((uint64_t)0x0000'0002'0000'0001)); - __ vmov(mask.high(), Double((uint64_t)0x0000'0008'0000'0004)); + __ vmov(mask.low(), Double(uint64_t{0x0000'0002'0000'0001})); + __ vmov(mask.high(), Double(uint64_t{0x0000'0008'0000'0004})); __ vand(tmp2, mask, tmp2); __ vpadd(Neon32, tmp2.low(), tmp2.low(), tmp2.high()); __ vpadd(Neon32, tmp2.low(), tmp2.low(), kDoubleRegZero); @@ -2538,8 +2598,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vshr(NeonS16, tmp2, src, 15); // Set i-th bit of each lane i. When AND with tmp, the lanes that // are signed will have i-th bit set, unsigned will be 0. - __ vmov(mask.low(), Double((uint64_t)0x0008'0004'0002'0001)); - __ vmov(mask.high(), Double((uint64_t)0x0080'0040'0020'0010)); + __ vmov(mask.low(), Double(uint64_t{0x0008'0004'0002'0001})); + __ vmov(mask.high(), Double(uint64_t{0x0080'0040'0020'0010})); __ vand(tmp2, mask, tmp2); __ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.high()); __ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low()); @@ -2692,8 +2752,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vshr(NeonS8, tmp2, src, 7); // Set i-th bit of each lane i. When AND with tmp, the lanes that // are signed will have i-th bit set, unsigned will be 0. - __ vmov(mask.low(), Double((uint64_t)0x8040'2010'0804'0201)); - __ vmov(mask.high(), Double((uint64_t)0x8040'2010'0804'0201)); + __ vmov(mask.low(), Double(uint64_t{0x8040'2010'0804'0201})); + __ vmov(mask.high(), Double(uint64_t{0x8040'2010'0804'0201})); __ vand(tmp2, mask, tmp2); __ vext(mask, tmp2, tmp2, 8); __ vzip(Neon8, mask, tmp2); @@ -3028,7 +3088,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); break; } - case kArmS1x4AnyTrue: { + case kArmV32x4AnyTrue: + case kArmV16x8AnyTrue: + case kArmV8x16AnyTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); UseScratchRegisterScope temps(tasm()); DwVfpRegister scratch = temps.AcquireD(); @@ -3039,7 +3101,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); break; } - case kArmS1x4AllTrue: { + case kArmV32x4AllTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); UseScratchRegisterScope temps(tasm()); DwVfpRegister scratch = temps.AcquireD(); @@ -3050,19 +3112,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); break; } - case kArmS1x8AnyTrue: { - const QwNeonRegister& src = i.InputSimd128Register(0); - UseScratchRegisterScope temps(tasm()); - DwVfpRegister scratch = temps.AcquireD(); - __ vpmax(NeonU16, scratch, src.low(), src.high()); - __ vpmax(NeonU16, scratch, scratch, scratch); - __ vpmax(NeonU16, scratch, scratch, scratch); - __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0); - __ cmp(i.OutputRegister(), Operand(0)); - __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); - break; - } - case kArmS1x8AllTrue: { + case kArmV16x8AllTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); UseScratchRegisterScope temps(tasm()); DwVfpRegister scratch = temps.AcquireD(); @@ -3074,23 +3124,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); break; } - case kArmS1x16AnyTrue: { - const QwNeonRegister& src = i.InputSimd128Register(0); - UseScratchRegisterScope temps(tasm()); - QwNeonRegister q_scratch = temps.AcquireQ(); - DwVfpRegister d_scratch = q_scratch.low(); - __ vpmax(NeonU8, d_scratch, src.low(), src.high()); - __ vpmax(NeonU8, d_scratch, d_scratch, d_scratch); - // vtst to detect any bits in the bottom 32 bits of d_scratch. - // This saves an instruction vs. the naive sequence of vpmax. - // kDoubleRegZero is not changed, since it is 0. - __ vtst(Neon32, q_scratch, q_scratch, q_scratch); - __ ExtractLane(i.OutputRegister(), d_scratch, NeonS32, 0); - __ cmp(i.OutputRegister(), Operand(0)); - __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); - break; - } - case kArmS1x16AllTrue: { + case kArmV8x16AllTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); UseScratchRegisterScope temps(tasm()); DwVfpRegister scratch = temps.AcquireD(); diff --git a/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h b/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h index c6365bf7a50..39ed658fc4b 100644 --- a/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h +++ b/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h @@ -144,6 +144,8 @@ namespace compiler { V(ArmF64x2Ne) \ V(ArmF64x2Lt) \ V(ArmF64x2Le) \ + V(ArmF64x2Pmin) \ + V(ArmF64x2Pmax) \ V(ArmF32x4Splat) \ V(ArmF32x4ExtractLane) \ V(ArmF32x4ReplaceLane) \ @@ -165,6 +167,8 @@ namespace compiler { V(ArmF32x4Ne) \ V(ArmF32x4Lt) \ V(ArmF32x4Le) \ + V(ArmF32x4Pmin) \ + V(ArmF32x4Pmax) \ V(ArmI64x2SplatI32Pair) \ V(ArmI64x2ReplaceLaneI32Pair) \ V(ArmI64x2Neg) \ @@ -304,12 +308,12 @@ namespace compiler { V(ArmS8x8Reverse) \ V(ArmS8x4Reverse) \ V(ArmS8x2Reverse) \ - V(ArmS1x4AnyTrue) \ - V(ArmS1x4AllTrue) \ - V(ArmS1x8AnyTrue) \ - V(ArmS1x8AllTrue) \ - V(ArmS1x16AnyTrue) \ - V(ArmS1x16AllTrue) \ + V(ArmV32x4AnyTrue) \ + V(ArmV32x4AllTrue) \ + V(ArmV16x8AnyTrue) \ + V(ArmV16x8AllTrue) \ + V(ArmV8x16AnyTrue) \ + V(ArmV8x16AllTrue) \ V(ArmS8x16LoadSplat) \ V(ArmS16x8LoadSplat) \ V(ArmS32x4LoadSplat) \ diff --git a/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc b/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc index 8c09acd6df8..196aa1ce6c0 100644 --- a/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc +++ b/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc @@ -124,6 +124,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmF64x2Ne: case kArmF64x2Lt: case kArmF64x2Le: + case kArmF64x2Pmin: + case kArmF64x2Pmax: case kArmF32x4Splat: case kArmF32x4ExtractLane: case kArmF32x4ReplaceLane: @@ -145,6 +147,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmF32x4Ne: case kArmF32x4Lt: case kArmF32x4Le: + case kArmF32x4Pmin: + case kArmF32x4Pmax: case kArmI64x2SplatI32Pair: case kArmI64x2ReplaceLaneI32Pair: case kArmI64x2Neg: @@ -284,12 +288,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmS8x8Reverse: case kArmS8x4Reverse: case kArmS8x2Reverse: - case kArmS1x4AnyTrue: - case kArmS1x4AllTrue: - case kArmS1x8AnyTrue: - case kArmS1x8AllTrue: - case kArmS1x16AnyTrue: - case kArmS1x16AllTrue: + case kArmV32x4AnyTrue: + case kArmV32x4AllTrue: + case kArmV16x8AnyTrue: + case kArmV16x8AllTrue: + case kArmV8x16AnyTrue: + case kArmV8x16AllTrue: return kNoOpcodeFlags; case kArmVldrF32: diff --git a/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc b/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc index 74658697b50..de0e7c4162c 100644 --- a/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc +++ b/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc @@ -1495,7 +1495,10 @@ void InstructionSelector::VisitUint32Mod(Node* node) { V(Float64RoundTruncate, kArmVrintzF64) \ V(Float64RoundTiesAway, kArmVrintaF64) \ V(Float32RoundTiesEven, kArmVrintnF32) \ - V(Float64RoundTiesEven, kArmVrintnF64) + V(Float64RoundTiesEven, kArmVrintnF64) \ + V(F32x4Ceil, kArmVrintpF32) \ + V(F32x4Floor, kArmVrintmF32) \ + V(F32x4Trunc, kArmVrintzF32) #define RRR_OP_LIST(V) \ V(Int32MulHigh, kArmSmmul) \ @@ -2525,12 +2528,12 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { V(I8x16Neg, kArmI8x16Neg) \ V(I8x16Abs, kArmI8x16Abs) \ V(S128Not, kArmS128Not) \ - V(S1x4AnyTrue, kArmS1x4AnyTrue) \ - V(S1x4AllTrue, kArmS1x4AllTrue) \ - V(S1x8AnyTrue, kArmS1x8AnyTrue) \ - V(S1x8AllTrue, kArmS1x8AllTrue) \ - V(S1x16AnyTrue, kArmS1x16AnyTrue) \ - V(S1x16AllTrue, kArmS1x16AllTrue) + V(V32x4AnyTrue, kArmV32x4AnyTrue) \ + V(V32x4AllTrue, kArmV32x4AllTrue) \ + V(V16x8AnyTrue, kArmV16x8AnyTrue) \ + V(V16x8AllTrue, kArmV16x8AllTrue) \ + V(V8x16AnyTrue, kArmV8x16AnyTrue) \ + V(V8x16AllTrue, kArmV8x16AllTrue) #define SIMD_SHIFT_OP_LIST(V) \ V(I64x2Shl, 64) \ @@ -2941,6 +2944,42 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) { VisitBitMask(this, node); } +namespace { +void VisitF32x4PminOrPmax(InstructionSelector* selector, ArchOpcode opcode, + Node* node) { + ArmOperandGenerator g(selector); + // Need all unique registers because we first compare the two inputs, then we + // need the inputs to remain unchanged for the bitselect later. + selector->Emit(opcode, g.DefineAsRegister(node), + g.UseUniqueRegister(node->InputAt(0)), + g.UseUniqueRegister(node->InputAt(1))); +} + +void VisitF64x2PminOrPMax(InstructionSelector* selector, ArchOpcode opcode, + Node* node) { + ArmOperandGenerator g(selector); + selector->Emit(opcode, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), + g.UseRegister(node->InputAt(1))); +} +} // namespace + +void InstructionSelector::VisitF32x4Pmin(Node* node) { + VisitF32x4PminOrPmax(this, kArmF32x4Pmin, node); +} + +void InstructionSelector::VisitF32x4Pmax(Node* node) { + VisitF32x4PminOrPmax(this, kArmF32x4Pmax, node); +} + +void InstructionSelector::VisitF64x2Pmin(Node* node) { + VisitF64x2PminOrPMax(this, kArmF64x2Pmin, node); +} + +void InstructionSelector::VisitF64x2Pmax(Node* node) { + VisitF64x2PminOrPMax(this, kArmF64x2Pmax, node); +} + // static MachineOperatorBuilder::Flags InstructionSelector::SupportedMachineOperatorFlags() { diff --git a/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc b/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc index 4cf19a5d802..d21440c35b3 100644 --- a/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc +++ b/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc @@ -502,8 +502,9 @@ void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode, __ asm_imm(i.OutputSimd128Register().format(), \ i.InputSimd128Register(0).format(), i.InputInt##width(1)); \ } else { \ - VRegister tmp = i.TempSimd128Register(0); \ - Register shift = i.TempRegister(1).gp(); \ + UseScratchRegisterScope temps(tasm()); \ + VRegister tmp = temps.AcquireQ(); \ + Register shift = temps.Acquire##gp(); \ constexpr int mask = (1 << width) - 1; \ __ And(shift, i.InputRegister32(1), mask); \ __ Dup(tmp.format(), shift); \ @@ -521,8 +522,9 @@ void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode, __ asm_imm(i.OutputSimd128Register().format(), \ i.InputSimd128Register(0).format(), i.InputInt##width(1)); \ } else { \ - VRegister tmp = i.TempSimd128Register(0); \ - Register shift = i.TempRegister(1).gp(); \ + UseScratchRegisterScope temps(tasm()); \ + VRegister tmp = temps.AcquireQ(); \ + Register shift = temps.Acquire##gp(); \ constexpr int mask = (1 << width) - 1; \ __ And(shift, i.InputRegister32(1), mask); \ __ Dup(tmp.format(), shift); \ @@ -1901,6 +1903,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfma, Fmla, 2D); SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfms, Fmls, 2D); + case kArm64F64x2Pmin: { + VRegister dst = i.OutputSimd128Register().V2D(); + VRegister lhs = i.InputSimd128Register(0).V2D(); + VRegister rhs = i.InputSimd128Register(1).V2D(); + // f64x2.pmin(lhs, rhs) + // = v128.bitselect(rhs, lhs, f64x2.lt(rhs,lhs)) + // = v128.bitselect(rhs, lhs, f64x2.gt(lhs,rhs)) + __ Fcmgt(dst, lhs, rhs); + __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B()); + break; + } + case kArm64F64x2Pmax: { + VRegister dst = i.OutputSimd128Register().V2D(); + VRegister lhs = i.InputSimd128Register(0).V2D(); + VRegister rhs = i.InputSimd128Register(1).V2D(); + // f64x2.pmax(lhs, rhs) + // = v128.bitselect(rhs, lhs, f64x2.gt(rhs, lhs)) + __ Fcmgt(dst, rhs, lhs); + __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B()); + break; + } + case kArm64F64x2RoundUp: + __ Frintp(i.OutputSimd128Register().V2D(), + i.InputSimd128Register(0).V2D()); + break; + case kArm64F64x2RoundDown: + __ Frintm(i.OutputSimd128Register().V2D(), + i.InputSimd128Register(0).V2D()); + break; + case kArm64F64x2RoundTruncate: + __ Frintz(i.OutputSimd128Register().V2D(), + i.InputSimd128Register(0).V2D()); + break; + case kArm64F64x2RoundTiesEven: + __ Frintn(i.OutputSimd128Register().V2D(), + i.InputSimd128Register(0).V2D()); + break; case kArm64F32x4Splat: { __ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0); break; @@ -1953,6 +1992,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfma, Fmla, 4S); SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfms, Fmls, 4S); + case kArm64F32x4Pmin: { + VRegister dst = i.OutputSimd128Register().V4S(); + VRegister lhs = i.InputSimd128Register(0).V4S(); + VRegister rhs = i.InputSimd128Register(1).V4S(); + // f32x4.pmin(lhs, rhs) + // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs)) + // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs)) + __ Fcmgt(dst, lhs, rhs); + __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B()); + break; + } + case kArm64F32x4Pmax: { + VRegister dst = i.OutputSimd128Register().V4S(); + VRegister lhs = i.InputSimd128Register(0).V4S(); + VRegister rhs = i.InputSimd128Register(1).V4S(); + // f32x4.pmax(lhs, rhs) + // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs)) + __ Fcmgt(dst, rhs, lhs); + __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B()); + break; + } + case kArm64F32x4RoundUp: + __ Frintp(i.OutputSimd128Register().V4S(), + i.InputSimd128Register(0).V4S()); + break; + case kArm64F32x4RoundDown: + __ Frintm(i.OutputSimd128Register().V4S(), + i.InputSimd128Register(0).V4S()); + break; + case kArm64F32x4RoundTruncate: + __ Frintz(i.OutputSimd128Register().V4S(), + i.InputSimd128Register(0).V4S()); + break; + case kArm64F32x4RoundTiesEven: + __ Frintn(i.OutputSimd128Register().V4S(), + i.InputSimd128Register(0).V4S()); + break; case kArm64I64x2Splat: { __ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0)); break; @@ -2132,6 +2208,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Mov(dst.W(), tmp.V4S(), 0); break; } + case kArm64I32x4DotI16x8S: { + UseScratchRegisterScope scope(tasm()); + VRegister lhs = i.InputSimd128Register(0); + VRegister rhs = i.InputSimd128Register(1); + VRegister tmp1 = scope.AcquireV(kFormat4S); + VRegister tmp2 = scope.AcquireV(kFormat4S); + __ Smull(tmp1, lhs.V4H(), rhs.V4H()); + __ Smull2(tmp2, lhs.V8H(), rhs.V8H()); + __ Addp(i.OutputSimd128Register().V4S(), tmp1, tmp2); + break; + } case kArm64I16x8Splat: { __ Dup(i.OutputSimd128Register().V8H(), i.InputRegister32(0)); break; @@ -2480,7 +2567,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_UNOP_CASE(kArm64S8x8Reverse, Rev64, 16B); SIMD_UNOP_CASE(kArm64S8x4Reverse, Rev32, 16B); SIMD_UNOP_CASE(kArm64S8x2Reverse, Rev16, 16B); - case kArm64S1x2AllTrue: { + case kArm64V64x2AllTrue: { UseScratchRegisterScope scope(tasm()); VRegister temp1 = scope.AcquireV(kFormat2D); VRegister temp2 = scope.AcquireV(kFormatS); @@ -2508,32 +2595,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArm64I16x8Load8x8S: { - __ ld1(i.OutputSimd128Register().V8B(), i.MemoryOperand(0)); + __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0)); __ Sxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B()); break; } case kArm64I16x8Load8x8U: { - __ ld1(i.OutputSimd128Register().V8B(), i.MemoryOperand(0)); + __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0)); __ Uxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B()); break; } case kArm64I32x4Load16x4S: { - __ ld1(i.OutputSimd128Register().V4H(), i.MemoryOperand(0)); + __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0)); __ Sxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H()); break; } case kArm64I32x4Load16x4U: { - __ ld1(i.OutputSimd128Register().V4H(), i.MemoryOperand(0)); + __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0)); __ Uxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H()); break; } case kArm64I64x2Load32x2S: { - __ ld1(i.OutputSimd128Register().V2S(), i.MemoryOperand(0)); + __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0)); __ Sxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S()); break; } case kArm64I64x2Load32x2U: { - __ ld1(i.OutputSimd128Register().V2S(), i.MemoryOperand(0)); + __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0)); __ Uxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S()); break; } @@ -2548,13 +2635,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; \ } // for AnyTrue, the format does not matter, umaxv does not support 2D - SIMD_REDUCE_OP_CASE(kArm64S1x2AnyTrue, Umaxv, kFormatS, 4S); - SIMD_REDUCE_OP_CASE(kArm64S1x4AnyTrue, Umaxv, kFormatS, 4S); - SIMD_REDUCE_OP_CASE(kArm64S1x4AllTrue, Uminv, kFormatS, 4S); - SIMD_REDUCE_OP_CASE(kArm64S1x8AnyTrue, Umaxv, kFormatH, 8H); - SIMD_REDUCE_OP_CASE(kArm64S1x8AllTrue, Uminv, kFormatH, 8H); - SIMD_REDUCE_OP_CASE(kArm64S1x16AnyTrue, Umaxv, kFormatB, 16B); - SIMD_REDUCE_OP_CASE(kArm64S1x16AllTrue, Uminv, kFormatB, 16B); + SIMD_REDUCE_OP_CASE(kArm64V64x2AnyTrue, Umaxv, kFormatS, 4S); + SIMD_REDUCE_OP_CASE(kArm64V32x4AnyTrue, Umaxv, kFormatS, 4S); + SIMD_REDUCE_OP_CASE(kArm64V32x4AllTrue, Uminv, kFormatS, 4S); + SIMD_REDUCE_OP_CASE(kArm64V16x8AnyTrue, Umaxv, kFormatH, 8H); + SIMD_REDUCE_OP_CASE(kArm64V16x8AllTrue, Uminv, kFormatH, 8H); + SIMD_REDUCE_OP_CASE(kArm64V8x16AnyTrue, Umaxv, kFormatB, 16B); + SIMD_REDUCE_OP_CASE(kArm64V8x16AllTrue, Uminv, kFormatB, 16B); } return kSuccess; } // NOLINT(readability/fn_size) diff --git a/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h b/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h index a8e2b52c028..41f9d78550e 100644 --- a/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h +++ b/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h @@ -186,6 +186,12 @@ namespace compiler { V(Arm64F64x2Le) \ V(Arm64F64x2Qfma) \ V(Arm64F64x2Qfms) \ + V(Arm64F64x2Pmin) \ + V(Arm64F64x2Pmax) \ + V(Arm64F64x2RoundUp) \ + V(Arm64F64x2RoundDown) \ + V(Arm64F64x2RoundTruncate) \ + V(Arm64F64x2RoundTiesEven) \ V(Arm64F32x4Splat) \ V(Arm64F32x4ExtractLane) \ V(Arm64F32x4ReplaceLane) \ @@ -209,6 +215,12 @@ namespace compiler { V(Arm64F32x4Le) \ V(Arm64F32x4Qfma) \ V(Arm64F32x4Qfms) \ + V(Arm64F32x4Pmin) \ + V(Arm64F32x4Pmax) \ + V(Arm64F32x4RoundUp) \ + V(Arm64F32x4RoundDown) \ + V(Arm64F32x4RoundTruncate) \ + V(Arm64F32x4RoundTiesEven) \ V(Arm64I64x2Splat) \ V(Arm64I64x2ExtractLane) \ V(Arm64I64x2ReplaceLane) \ @@ -256,6 +268,7 @@ namespace compiler { V(Arm64I32x4GeU) \ V(Arm64I32x4Abs) \ V(Arm64I32x4BitMask) \ + V(Arm64I32x4DotI16x8S) \ V(Arm64I16x8Splat) \ V(Arm64I16x8ExtractLaneU) \ V(Arm64I16x8ExtractLaneS) \ @@ -361,14 +374,14 @@ namespace compiler { V(Arm64S8x8Reverse) \ V(Arm64S8x4Reverse) \ V(Arm64S8x2Reverse) \ - V(Arm64S1x2AnyTrue) \ - V(Arm64S1x2AllTrue) \ - V(Arm64S1x4AnyTrue) \ - V(Arm64S1x4AllTrue) \ - V(Arm64S1x8AnyTrue) \ - V(Arm64S1x8AllTrue) \ - V(Arm64S1x16AnyTrue) \ - V(Arm64S1x16AllTrue) \ + V(Arm64V64x2AnyTrue) \ + V(Arm64V64x2AllTrue) \ + V(Arm64V32x4AnyTrue) \ + V(Arm64V32x4AllTrue) \ + V(Arm64V16x8AnyTrue) \ + V(Arm64V16x8AllTrue) \ + V(Arm64V8x16AnyTrue) \ + V(Arm64V8x16AllTrue) \ V(Arm64S8x16LoadSplat) \ V(Arm64S16x8LoadSplat) \ V(Arm64S32x4LoadSplat) \ diff --git a/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc b/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc index 128ebdac957..3ea84730801 100644 --- a/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc +++ b/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc @@ -156,6 +156,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64F64x2Le: case kArm64F64x2Qfma: case kArm64F64x2Qfms: + case kArm64F64x2Pmin: + case kArm64F64x2Pmax: + case kArm64F64x2RoundUp: + case kArm64F64x2RoundDown: + case kArm64F64x2RoundTruncate: + case kArm64F64x2RoundTiesEven: case kArm64F32x4Splat: case kArm64F32x4ExtractLane: case kArm64F32x4ReplaceLane: @@ -179,6 +185,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64F32x4Le: case kArm64F32x4Qfma: case kArm64F32x4Qfms: + case kArm64F32x4Pmin: + case kArm64F32x4Pmax: + case kArm64F32x4RoundUp: + case kArm64F32x4RoundDown: + case kArm64F32x4RoundTruncate: + case kArm64F32x4RoundTiesEven: case kArm64I64x2Splat: case kArm64I64x2ExtractLane: case kArm64I64x2ReplaceLane: @@ -226,6 +238,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64I32x4GeU: case kArm64I32x4Abs: case kArm64I32x4BitMask: + case kArm64I32x4DotI16x8S: case kArm64I16x8Splat: case kArm64I16x8ExtractLaneU: case kArm64I16x8ExtractLaneS: @@ -331,14 +344,14 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64S8x8Reverse: case kArm64S8x4Reverse: case kArm64S8x2Reverse: - case kArm64S1x2AnyTrue: - case kArm64S1x2AllTrue: - case kArm64S1x4AnyTrue: - case kArm64S1x4AllTrue: - case kArm64S1x8AnyTrue: - case kArm64S1x8AllTrue: - case kArm64S1x16AnyTrue: - case kArm64S1x16AllTrue: + case kArm64V64x2AnyTrue: + case kArm64V64x2AllTrue: + case kArm64V32x4AnyTrue: + case kArm64V32x4AllTrue: + case kArm64V16x8AnyTrue: + case kArm64V16x8AllTrue: + case kArm64V8x16AnyTrue: + case kArm64V8x16AllTrue: case kArm64TestAndBranch32: case kArm64TestAndBranch: case kArm64CompareAndBranch32: diff --git a/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc b/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc index 06a87a8aab7..2e0d977c3c7 100644 --- a/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc +++ b/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc @@ -163,13 +163,9 @@ void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode, g.UseImmediate(node->InputAt(1))); } } else { - InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()}; - // We only need a unique register for the first input (src), since in - // the codegen we use tmp to store the shifts, and then later use it with - // src. The second input can be the same as the second temp (shift). selector->Emit(opcode, g.DefineAsRegister(node), - g.UseUniqueRegister(node->InputAt(0)), - g.UseRegister(node->InputAt(1)), arraysize(temps), temps); + g.UseRegister(node->InputAt(0)), + g.UseRegister(node->InputAt(1))); } } @@ -608,18 +604,23 @@ void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode, void InstructionSelector::VisitLoadTransform(Node* node) { LoadTransformParameters params = LoadTransformParametersOf(node->op()); InstructionCode opcode = kArchNop; + bool require_add = false; switch (params.transformation) { case LoadTransformation::kS8x16LoadSplat: opcode = kArm64S8x16LoadSplat; + require_add = true; break; case LoadTransformation::kS16x8LoadSplat: opcode = kArm64S16x8LoadSplat; + require_add = true; break; case LoadTransformation::kS32x4LoadSplat: opcode = kArm64S32x4LoadSplat; + require_add = true; break; case LoadTransformation::kS64x2LoadSplat: opcode = kArm64S64x2LoadSplat; + require_add = true; break; case LoadTransformation::kI16x8Load8x8S: opcode = kArm64I16x8Load8x8S; @@ -655,13 +656,17 @@ void InstructionSelector::VisitLoadTransform(Node* node) { inputs[1] = g.UseRegister(index); outputs[0] = g.DefineAsRegister(node); - // ld1r uses post-index, so construct address first. - // TODO(v8:9886) If index can be immediate, use vldr without this add. - InstructionOperand addr = g.TempRegister(); - Emit(kArm64Add, 1, &addr, 2, inputs); - inputs[0] = addr; - inputs[1] = g.TempImmediate(0); - opcode |= AddressingModeField::encode(kMode_MRI); + if (require_add) { + // ld1r uses post-index, so construct address first. + // TODO(v8:9886) If index can be immediate, use vldr without this add. + InstructionOperand addr = g.TempRegister(); + Emit(kArm64Add, 1, &addr, 2, inputs); + inputs[0] = addr; + inputs[1] = g.TempImmediate(0); + opcode |= AddressingModeField::encode(kMode_MRI); + } else { + opcode |= AddressingModeField::encode(kMode_MRR); + } Emit(opcode, 1, outputs, 2, inputs); } @@ -1360,7 +1365,15 @@ void InstructionSelector::VisitWord64Ror(Node* node) { V(Float64RoundTiesEven, kArm64Float64RoundTiesEven) \ V(Float64ExtractLowWord32, kArm64Float64ExtractLowWord32) \ V(Float64ExtractHighWord32, kArm64Float64ExtractHighWord32) \ - V(Float64SilenceNaN, kArm64Float64SilenceNaN) + V(Float64SilenceNaN, kArm64Float64SilenceNaN) \ + V(F32x4Ceil, kArm64F32x4RoundUp) \ + V(F32x4Floor, kArm64F32x4RoundDown) \ + V(F32x4Trunc, kArm64F32x4RoundTruncate) \ + V(F32x4NearestInt, kArm64F32x4RoundTiesEven) \ + V(F64x2Ceil, kArm64F64x2RoundUp) \ + V(F64x2Floor, kArm64F64x2RoundDown) \ + V(F64x2Trunc, kArm64F64x2RoundTruncate) \ + V(F64x2NearestInt, kArm64F64x2RoundTiesEven) #define RRR_OP_LIST(V) \ V(Int32Div, kArm64Idiv32) \ @@ -3184,14 +3197,14 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(I8x16Neg, kArm64I8x16Neg) \ V(I8x16Abs, kArm64I8x16Abs) \ V(S128Not, kArm64S128Not) \ - V(S1x2AnyTrue, kArm64S1x2AnyTrue) \ - V(S1x2AllTrue, kArm64S1x2AllTrue) \ - V(S1x4AnyTrue, kArm64S1x4AnyTrue) \ - V(S1x4AllTrue, kArm64S1x4AllTrue) \ - V(S1x8AnyTrue, kArm64S1x8AnyTrue) \ - V(S1x8AllTrue, kArm64S1x8AllTrue) \ - V(S1x16AnyTrue, kArm64S1x16AnyTrue) \ - V(S1x16AllTrue, kArm64S1x16AllTrue) + V(V64x2AnyTrue, kArm64V64x2AnyTrue) \ + V(V64x2AllTrue, kArm64V64x2AllTrue) \ + V(V32x4AnyTrue, kArm64V32x4AnyTrue) \ + V(V32x4AllTrue, kArm64V32x4AllTrue) \ + V(V16x8AnyTrue, kArm64V16x8AnyTrue) \ + V(V16x8AllTrue, kArm64V16x8AllTrue) \ + V(V8x16AnyTrue, kArm64V8x16AnyTrue) \ + V(V8x16AllTrue, kArm64V8x16AllTrue) #define SIMD_SHIFT_OP_LIST(V) \ V(I64x2Shl, 64) \ @@ -3249,6 +3262,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(I32x4MaxU, kArm64I32x4MaxU) \ V(I32x4GtU, kArm64I32x4GtU) \ V(I32x4GeU, kArm64I32x4GeU) \ + V(I32x4DotI16x8S, kArm64I32x4DotI16x8S) \ V(I16x8SConvertI32x4, kArm64I16x8SConvertI32x4) \ V(I16x8AddSaturateS, kArm64I16x8AddSaturateS) \ V(I16x8AddHoriz, kArm64I16x8AddHoriz) \ @@ -3613,6 +3627,34 @@ void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) { VisitRR(this, kArm64Sxtw, node); } +namespace { +void VisitPminOrPmax(InstructionSelector* selector, ArchOpcode opcode, + Node* node) { + Arm64OperandGenerator g(selector); + // Need all unique registers because we first compare the two inputs, then we + // need the inputs to remain unchanged for the bitselect later. + selector->Emit(opcode, g.DefineAsRegister(node), + g.UseUniqueRegister(node->InputAt(0)), + g.UseUniqueRegister(node->InputAt(1))); +} +} // namespace + +void InstructionSelector::VisitF32x4Pmin(Node* node) { + VisitPminOrPmax(this, kArm64F32x4Pmin, node); +} + +void InstructionSelector::VisitF32x4Pmax(Node* node) { + VisitPminOrPmax(this, kArm64F32x4Pmax, node); +} + +void InstructionSelector::VisitF64x2Pmin(Node* node) { + VisitPminOrPmax(this, kArm64F64x2Pmin, node); +} + +void InstructionSelector::VisitF64x2Pmax(Node* node) { + VisitPminOrPmax(this, kArm64F64x2Pmax, node); +} + // static MachineOperatorBuilder::Flags InstructionSelector::SupportedMachineOperatorFlags() { diff --git a/chromium/v8/src/compiler/backend/code-generator.cc b/chromium/v8/src/compiler/backend/code-generator.cc index 72c5750035a..83dccf69e82 100644 --- a/chromium/v8/src/compiler/backend/code-generator.cc +++ b/chromium/v8/src/compiler/backend/code-generator.cc @@ -55,19 +55,20 @@ CodeGenerator::CodeGenerator( frame_access_state_(nullptr), linkage_(linkage), instructions_(instructions), - unwinding_info_writer_(zone()), + unwinding_info_writer_(codegen_zone), info_(info), - labels_(zone()->NewArray