summaryrefslogtreecommitdiff
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
authorDavid L. Jones <dlj@google.com>2017-11-15 01:40:05 +0000
committerDavid L. Jones <dlj@google.com>2017-11-15 01:40:05 +0000
commitd5c2cca72463233df77a065f201db31b140eb44d (patch)
tree3f9a978131033302a58b7db7db1ecf2a4622bad2 /lib/Target/PowerPC
parentce7676b8db6bac096dad4c4ad62e9e6bb8aa1064 (diff)
parentdcf64df89bc6d775e266ebd6b0134d135f47a35b (diff)
downloadllvm-testing.tar.gz
Creating branches/google/testing and tags/google/testing/2017-11-14 from r317716testing
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/testing@318248 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/PPCBranchCoalescing.cpp4
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h2
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp164
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td7
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td7
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp4
9 files changed, 182 insertions, 16 deletions
diff --git a/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index 33085a423619..ac28d7ff4978 100644
--- a/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -21,9 +21,9 @@
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index fa813db5fef3..f845d5a9ac64 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -15,7 +15,7 @@
#include "PPC.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 8ea3689b08e6..2092748ca1aa 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -36,6 +36,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugLoc.h"
@@ -53,7 +54,6 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
#include <cassert>
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index f3e7b4af45d9..62ade9661455 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -51,6 +51,7 @@
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
@@ -82,7 +83,6 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -291,14 +291,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FROUND, MVT::f32, Legal);
}
- // PowerPC does not have BSWAP
+ // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
+ // to speed up scalar BSWAP64.
// CTPOP or CTTZ were introduced in P8/P9 respectivelly
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
- setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
if (Subtarget.isISA3_0()) {
+ setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
} else {
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
}
@@ -781,6 +783,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SRL, MVT::v1i128, Legal);
setOperationAction(ISD::SRA, MVT::v1i128, Expand);
}
+
+ if (Subtarget.hasP9Altivec()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+ }
}
if (Subtarget.hasQPX()) {
@@ -7888,6 +7895,107 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
return DAG.getNode(ISD::BITCAST, dl, VT, T);
}
+/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
+/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
+/// SDValue.
+SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
+ SelectionDAG &DAG) const {
+ const unsigned BytesInVector = 16;
+ bool IsLE = Subtarget.isLittleEndian();
+ SDLoc dl(N);
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+ unsigned ShiftElts = 0, InsertAtByte = 0;
+ bool Swap = false;
+
+ // Shifts required to get the byte we want at element 7.
+ unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
+ 0, 15, 14, 13, 12, 11, 10, 9};
+ unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
+ 1, 2, 3, 4, 5, 6, 7, 8};
+
+ ArrayRef<int> Mask = N->getMask();
+ int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+
+ // For each mask element, find out if we're just inserting something
+ // from V2 into V1 or vice versa.
+ // Possible permutations inserting an element from V2 into V1:
+ // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ // ...
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
+ // Inserting from V1 into V2 will be similar, except mask range will be
+ // [16,31].
+
+ bool FoundCandidate = false;
+ // If both vector operands for the shuffle are the same vector, the mask
+ // will contain only elements from the first one and the second one will be
+ // undef.
+ unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
+ // Go through the mask of half-words to find an element that's being moved
+ // from one vector to the other.
+ for (unsigned i = 0; i < BytesInVector; ++i) {
+ unsigned CurrentElement = Mask[i];
+ // If 2nd operand is undefined, we should only look for element 7 in the
+ // Mask.
+ if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
+ continue;
+
+ bool OtherElementsInOrder = true;
+ // Examine the other elements in the Mask to see if they're in original
+ // order.
+ for (unsigned j = 0; j < BytesInVector; ++j) {
+ if (j == i)
+ continue;
+ // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
+ // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
+ // in which we always assume we're always picking from the 1st operand.
+ int MaskOffset =
+ (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
+ if (Mask[j] != OriginalOrder[j] + MaskOffset) {
+ OtherElementsInOrder = false;
+ break;
+ }
+ }
+ // If other elements are in original order, we record the number of shifts
+ // we need to get the element we want into element 7. Also record which byte
+ // in the vector we should insert into.
+ if (OtherElementsInOrder) {
+ // If 2nd operand is undefined, we assume no shifts and no swapping.
+ if (V2.isUndef()) {
+ ShiftElts = 0;
+ Swap = false;
+ } else {
+ // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
+ ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
+ : BigEndianShifts[CurrentElement & 0xF];
+ Swap = CurrentElement < BytesInVector;
+ }
+ InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
+ FoundCandidate = true;
+ break;
+ }
+ }
+
+ if (!FoundCandidate)
+ return SDValue();
+
+ // Candidate found, construct the proper SDAG sequence with VINSERTB,
+ // optionally with VECSHL if shift is required.
+ if (Swap)
+ std::swap(V1, V2);
+ if (V2.isUndef())
+ V2 = V1;
+ if (ShiftElts) {
+ SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
+ DAG.getConstant(ShiftElts, dl, MVT::i32));
+ return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
+ DAG.getConstant(InsertAtByte, dl, MVT::i32));
+ }
+ return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
+ DAG.getConstant(InsertAtByte, dl, MVT::i32));
+}
+
/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
/// SDValue.
@@ -8035,8 +8143,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
}
if (Subtarget.hasP9Altivec()) {
- SDValue NewISDNode = lowerToVINSERTH(SVOp, DAG);
- if (NewISDNode)
+ SDValue NewISDNode;
+ if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
+ return NewISDNode;
+
+ if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
return NewISDNode;
}
@@ -8675,6 +8786,23 @@ SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
+// Lower scalar BSWAP64 to xxbrd.
+SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ // MTVSRDD
+ Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
+ Op.getOperand(0));
+ // XXBRD
+ Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op);
+ // MFVSRD
+ int VectorIndex = 0;
+ if (Subtarget.isLittleEndian())
+ VectorIndex = 1;
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
+ DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
+ return Op;
+}
+
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -8719,11 +8847,29 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
"Should only be called for ISD::INSERT_VECTOR_ELT");
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
// We have legal lowering for constant indices but not for variable ones.
- if (C)
- return Op;
- return SDValue();
+ if (!C)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
+ if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+ SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
+ unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
+ unsigned InsertAtElement = C->getZExtValue();
+ unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
+ if (Subtarget.isLittleEndian()) {
+ InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
+ }
+ return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
+ DAG.getConstant(InsertAtByte, dl, MVT::i32));
+ }
+ return Op;
}
SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
@@ -9146,6 +9292,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SREM:
case ISD::UREM:
return LowerREM(Op, DAG);
+ case ISD::BSWAP:
+ return LowerBSWAP(Op, DAG);
}
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 1a5efeba4cf6..bf9c4b8e63b1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -953,6 +953,7 @@ namespace llvm {
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
@@ -1079,6 +1080,11 @@ namespace llvm {
/// from one vector into the other.
SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
+ /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be
+ /// handled by the VINSERTB instruction introduced in ISA 3.0. This is
+ /// essentially v16i8 vector version of VINSERTH.
+ SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
+
}; // end class PPCTargetLowering
namespace PPC {
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 506fac7dfc13..e751c149b0b3 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1312,7 +1312,12 @@ def VEXTUWLX : VX1_RT5_RA5_VB5<1677, "vextuwlx", []>;
def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "vextuwrx", []>;
// Vector Insert Element Instructions
-def VINSERTB : VX1_VT5_UIM5_VB5<781, "vinsertb", []>;
+def VINSERTB : VXForm_1<781, (outs vrrc:$vD),
+ (ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB),
+ "vinsertb $vD, $vB, $UIM", IIC_VecGeneral,
+ [(set v16i8:$vD, (PPCvecinsert v16i8:$vDi, v16i8:$vB,
+ imm32SExt16:$UIM))]>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
def VINSERTH : VXForm_1<845, (outs vrrc:$vD),
(ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB),
"vinserth $vD, $vB, $UIM", IIC_VecGeneral,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index ab86a54f6fea..565392f76e44 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -16,7 +16,7 @@
#include "PPC.h"
#include "PPCRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "PPCGenInstrInfo.inc"
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 1fc50d2c860a..3261bc9bc53e 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2595,6 +2595,13 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
}
+ // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead
+ // of f64
+ def : Pat<(v8i16 (PPCmtvsrz i32:$A)),
+ (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
+ def : Pat<(v16i8 (PPCmtvsrz i32:$A)),
+ (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
+
// Patterns for which instructions from ISA 3.0 are a better match
let Predicates = [IsLittleEndian, HasP9Vector] in {
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index d46c1383297f..78467e81795c 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -28,6 +28,8 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -37,8 +39,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <cstdlib>