diff options
author | David L. Jones <dlj@google.com> | 2017-11-15 01:40:05 +0000 |
---|---|---|
committer | David L. Jones <dlj@google.com> | 2017-11-15 01:40:05 +0000 |
commit | d5c2cca72463233df77a065f201db31b140eb44d (patch) | |
tree | 3f9a978131033302a58b7db7db1ecf2a4622bad2 /lib/Target/PowerPC | |
parent | ce7676b8db6bac096dad4c4ad62e9e6bb8aa1064 (diff) | |
parent | dcf64df89bc6d775e266ebd6b0134d135f47a35b (diff) | |
download | llvm-testing.tar.gz |
Creating branches/google/testing and tags/google/testing/2017-11-14 from r317716testing
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/testing@318248 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r-- | lib/Target/PowerPC/PPCBranchCoalescing.cpp | 4 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCFrameLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 164 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 6 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrAltivec.td | 7 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.h | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrVSX.td | 7 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCRegisterInfo.cpp | 4 |
9 files changed, 182 insertions, 16 deletions
diff --git a/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/lib/Target/PowerPC/PPCBranchCoalescing.cpp index 33085a423619..ac28d7ff4978 100644 --- a/lib/Target/PowerPC/PPCBranchCoalescing.cpp +++ b/lib/Target/PowerPC/PPCBranchCoalescing.cpp @@ -21,9 +21,9 @@ #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index fa813db5fef3..f845d5a9ac64 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -15,7 +15,7 @@ #include "PPC.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 8ea3689b08e6..2092748ca1aa 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugLoc.h" @@ -53,7 +54,6 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> #include <cassert> diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f3e7b4af45d9..62ade9661455 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -51,6 +51,7 @@ #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" @@ -82,7 +83,6 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -291,14 +291,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FROUND, MVT::f32, Legal); } - // PowerPC does not have BSWAP + // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd + // to speed up scalar BSWAP64. // CTPOP or CTTZ were introduced in P8/P9 respectivelly setOperationAction(ISD::BSWAP, MVT::i32 , Expand); - setOperationAction(ISD::BSWAP, MVT::i64 , Expand); if (Subtarget.isISA3_0()) { + setOperationAction(ISD::BSWAP, MVT::i64 , Custom); setOperationAction(ISD::CTTZ , MVT::i32 , Legal); setOperationAction(ISD::CTTZ , MVT::i64 , Legal); } else { + setOperationAction(ISD::BSWAP, MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); } @@ -781,6 +783,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SRL, MVT::v1i128, Legal); setOperationAction(ISD::SRA, MVT::v1i128, Expand); } + + if (Subtarget.hasP9Altivec()) { + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); + } } if (Subtarget.hasQPX()) { @@ -7888,6 +7895,107 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, return DAG.getNode(ISD::BITCAST, dl, VT, T); } +/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled +/// by the VINSERTB instruction introduced in ISA 3.0, else just return default +/// SDValue. +SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N, + SelectionDAG &DAG) const { + const unsigned BytesInVector = 16; + bool IsLE = Subtarget.isLittleEndian(); + SDLoc dl(N); + SDValue V1 = N->getOperand(0); + SDValue V2 = N->getOperand(1); + unsigned ShiftElts = 0, InsertAtByte = 0; + bool Swap = false; + + // Shifts required to get the byte we want at element 7. + unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1, + 0, 15, 14, 13, 12, 11, 10, 9}; + unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0, + 1, 2, 3, 4, 5, 6, 7, 8}; + + ArrayRef<int> Mask = N->getMask(); + int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + + // For each mask element, find out if we're just inserting something + // from V2 into V1 or vice versa. + // Possible permutations inserting an element from V2 into V1: + // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + // ... + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X + // Inserting from V1 into V2 will be similar, except mask range will be + // [16,31]. + + bool FoundCandidate = false; + // If both vector operands for the shuffle are the same vector, the mask + // will contain only elements from the first one and the second one will be + // undef. + unsigned VINSERTBSrcElem = IsLE ? 8 : 7; + // Go through the mask of half-words to find an element that's being moved + // from one vector to the other. + for (unsigned i = 0; i < BytesInVector; ++i) { + unsigned CurrentElement = Mask[i]; + // If 2nd operand is undefined, we should only look for element 7 in the + // Mask. + if (V2.isUndef() && CurrentElement != VINSERTBSrcElem) + continue; + + bool OtherElementsInOrder = true; + // Examine the other elements in the Mask to see if they're in original + // order. + for (unsigned j = 0; j < BytesInVector; ++j) { + if (j == i) + continue; + // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be + // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined, + // in which we always assume we're always picking from the 1st operand. + int MaskOffset = + (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0; + if (Mask[j] != OriginalOrder[j] + MaskOffset) { + OtherElementsInOrder = false; + break; + } + } + // If other elements are in original order, we record the number of shifts + // we need to get the element we want into element 7. Also record which byte + // in the vector we should insert into. + if (OtherElementsInOrder) { + // If 2nd operand is undefined, we assume no shifts and no swapping. + if (V2.isUndef()) { + ShiftElts = 0; + Swap = false; + } else { + // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4. + ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF] + : BigEndianShifts[CurrentElement & 0xF]; + Swap = CurrentElement < BytesInVector; + } + InsertAtByte = IsLE ? BytesInVector - (i + 1) : i; + FoundCandidate = true; + break; + } + } + + if (!FoundCandidate) + return SDValue(); + + // Candidate found, construct the proper SDAG sequence with VINSERTB, + // optionally with VECSHL if shift is required. + if (Swap) + std::swap(V1, V2); + if (V2.isUndef()) + V2 = V1; + if (ShiftElts) { + SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2, + DAG.getConstant(ShiftElts, dl, MVT::i32)); + return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl, + DAG.getConstant(InsertAtByte, dl, MVT::i32)); + } + return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2, + DAG.getConstant(InsertAtByte, dl, MVT::i32)); +} + /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled /// by the VINSERTH instruction introduced in ISA 3.0, else just return default /// SDValue. @@ -8035,8 +8143,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, } if (Subtarget.hasP9Altivec()) { - SDValue NewISDNode = lowerToVINSERTH(SVOp, DAG); - if (NewISDNode) + SDValue NewISDNode; + if ((NewISDNode = lowerToVINSERTH(SVOp, DAG))) + return NewISDNode; + + if ((NewISDNode = lowerToVINSERTB(SVOp, DAG))) return NewISDNode; } @@ -8675,6 +8786,23 @@ SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const { return Op; } +// Lower scalar BSWAP64 to xxbrd. +SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + // MTVSRDD + Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0), + Op.getOperand(0)); + // XXBRD + Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op); + // MFVSRD + int VectorIndex = 0; + if (Subtarget.isLittleEndian()) + VectorIndex = 1; + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op, + DAG.getTargetConstant(VectorIndex, dl, MVT::i32)); + return Op; +} + SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -8719,11 +8847,29 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2)); // We have legal lowering for constant indices but not for variable ones. - if (C) - return Op; - return SDValue(); + if (!C) + return SDValue(); + + EVT VT = Op.getValueType(); + SDLoc dl(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types. + if (VT == MVT::v8i16 || VT == MVT::v16i8) { + SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2); + unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8; + unsigned InsertAtElement = C->getZExtValue(); + unsigned InsertAtByte = InsertAtElement * BytesInEachElement; + if (Subtarget.isLittleEndian()) { + InsertAtByte = (16 - BytesInEachElement) - InsertAtByte; + } + return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz, + DAG.getConstant(InsertAtByte, dl, MVT::i32)); + } + return Op; } SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, @@ -9146,6 +9292,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SREM: case ISD::UREM: return LowerREM(Op, DAG); + case ISD::BSWAP: + return LowerBSWAP(Op, DAG); } } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 1a5efeba4cf6..bf9c4b8e63b1 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -953,6 +953,7 @@ namespace llvm { SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; @@ -1079,6 +1080,11 @@ namespace llvm { /// from one vector into the other. SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; + /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be + /// handled by the VINSERTB instruction introduced in ISA 3.0. This is + /// essentially v16i8 vector version of VINSERTH. + SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; + }; // end class PPCTargetLowering namespace PPC { diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 506fac7dfc13..e751c149b0b3 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1312,7 +1312,12 @@ def VEXTUWLX : VX1_RT5_RA5_VB5<1677, "vextuwlx", []>; def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "vextuwrx", []>; // Vector Insert Element Instructions -def VINSERTB : VX1_VT5_UIM5_VB5<781, "vinsertb", []>; +def VINSERTB : VXForm_1<781, (outs vrrc:$vD), + (ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB), + "vinsertb $vD, $vB, $UIM", IIC_VecGeneral, + [(set v16i8:$vD, (PPCvecinsert v16i8:$vDi, v16i8:$vB, + imm32SExt16:$UIM))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; def VINSERTH : VXForm_1<845, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB), "vinserth $vD, $vB, $UIM", IIC_VecGeneral, diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index ab86a54f6fea..565392f76e44 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -16,7 +16,7 @@ #include "PPC.h" #include "PPCRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "PPCGenInstrInfo.inc" diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 1fc50d2c860a..3261bc9bc53e 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -2595,6 +2595,13 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>; } + // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead + // of f64 + def : Pat<(v8i16 (PPCmtvsrz i32:$A)), + (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; + def : Pat<(v16i8 (PPCmtvsrz i32:$A)), + (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; + // Patterns for which instructions from ISA 3.0 are a better match let Predicates = [IsLittleEndian, HasP9Vector] in { def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))), diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index d46c1383297f..78467e81795c 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -28,6 +28,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -37,8 +39,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <cstdlib> |