summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJiangning Liu <jiangning.liu@arm.com>2014-01-15 05:08:01 +0000
committerJiangning Liu <jiangning.liu@arm.com>2014-01-15 05:08:01 +0000
commitb6db372c96906eac67b26f50202553dab1653dbd (patch)
tree11d1610b96efacfebc3056f45f4879f34fc3dc05 /lib
parent38e6f7301d2306230d5eb200ecdab2b365079096 (diff)
downloadllvm-b6db372c96906eac67b26f50202553dab1653dbd.tar.gz
llvm-b6db372c96906eac67b26f50202553dab1653dbd.tar.bz2
llvm-b6db372c96906eac67b26f50202553dab1653dbd.tar.xz
For AArch64, lowering sext_inreg and generate optimized code by using SXTL.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199296 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp74
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td4
2 files changed, 77 insertions, 1 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3037845762..581c8935c1 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -286,6 +286,15 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setExceptionSelectorRegister(AArch64::X1);
if (Subtarget->hasNEON()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v1i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v16i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Expand);
+
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
@@ -3574,7 +3583,25 @@ static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) {
return (Cnt >= 1 && Cnt <= ElementBits);
}
-/// Checks for immediate versions of vector shifts and lowers them.
+static SDValue GenForSextInreg(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ EVT SrcVT, EVT DestVT, EVT SubRegVT,
+ const int *Mask, SDValue Src) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Bitcast
+ = DAG.getNode(ISD::BITCAST, SDLoc(N), SrcVT, Src);
+ SDValue Sext
+ = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), DestVT, Bitcast);
+ SDValue ShuffleVec
+ = DAG.getVectorShuffle(DestVT, SDLoc(N), Sext, DAG.getUNDEF(DestVT), Mask);
+ SDValue ExtractSubreg
+ = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N),
+ SubRegVT, ShuffleVec,
+ DAG.getTargetConstant(AArch64::sub_64, MVT::i32)), 0);
+ return ExtractSubreg;
+}
+
+/// Checks for vector shifts and lowers them.
static SDValue PerformShiftCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *ST) {
@@ -3583,6 +3610,51 @@ static SDValue PerformShiftCombine(SDNode *N,
if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64))
return PerformSRACombine(N, DCI);
+ // We're looking for an SRA/SHL pair to help generating instruction
+ // sshll v0.8h, v0.8b, #0
+ // The instruction STXL is also the alias of this instruction.
+ //
+ // For example, for DAG like below,
+ // v2i32 = sra (v2i32 (shl v2i32, 16)), 16
+ // we can transform it into
+ // v2i32 = EXTRACT_SUBREG
+ // (v4i32 (suffle_vector
+ // (v4i32 (sext (v4i16 (bitcast v2i32))),
+ // undef, (0, 2, u, u)),
+ // sub_64
+ //
+ // With this transformation we expect to generate "SSHLL + UZIP1"
+ // Sometimes UZIP1 can be optimized away by combining with other context.
+ int64_t ShrCnt, ShlCnt;
+ if (N->getOpcode() == ISD::SRA
+ && (VT == MVT::v2i32 || VT == MVT::v4i16)
+ && isVShiftRImm(N->getOperand(1), VT, ShrCnt)
+ && N->getOperand(0).getOpcode() == ISD::SHL
+ && isVShiftRImm(N->getOperand(0).getOperand(1), VT, ShlCnt)) {
+ SDValue Src = N->getOperand(0).getOperand(0);
+ if (VT == MVT::v2i32 && ShrCnt == 16 && ShlCnt == 16) {
+ // sext_inreg(v2i32, v2i16)
+ // We essentially only care the Mask {0, 2, u, u}
+ int Mask[4] = {0, 2, 4, 6};
+ return GenForSextInreg(N, DCI, MVT::v4i16, MVT::v4i32, MVT::v2i32,
+ Mask, Src);
+ }
+ else if (VT == MVT::v2i32 && ShrCnt == 24 && ShlCnt == 24) {
+ // sext_inreg(v2i16, v2i8)
+ // We essentially only care the Mask {0, u, 4, u, u, u, u, u, u, u, u, u}
+ int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14};
+ return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v2i32,
+ Mask, Src);
+ }
+ else if (VT == MVT::v4i16 && ShrCnt == 8 && ShlCnt == 8) {
+ // sext_inreg(v4i16, v4i8)
+ // We essentially only care the Mask {0, 2, 4, 6, u, u, u, u, u, u, u, u}
+ int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14};
+ return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v4i16,
+ Mask, Src);
+ }
+ }
+
// Nothing to be done for scalar shifts.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!VT.isVector() || !TLI.isTypeLegal(VT))
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index f1cb122eef..81371be066 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -1877,6 +1877,10 @@ def UXTL2vv_16B : NeonI_ext_len_alias<"uxtl2", ".8h", ".16b", USHLLvvi_16B, VPR
def UXTL2vv_8H : NeonI_ext_len_alias<"uxtl2", ".4s", ".8h", USHLLvvi_8H, VPR128, VPR128>;
def UXTL2vv_4S : NeonI_ext_len_alias<"uxtl2", ".2d", ".4s", USHLLvvi_4S, VPR128, VPR128>;
+def : Pat<(v8i16 (anyext (v8i8 VPR64:$Rn))), (USHLLvvi_8B VPR64:$Rn, 0)>;
+def : Pat<(v4i32 (anyext (v4i16 VPR64:$Rn))), (USHLLvvi_4H VPR64:$Rn, 0)>;
+def : Pat<(v2i64 (anyext (v2i32 VPR64:$Rn))), (USHLLvvi_2S VPR64:$Rn, 0)>;
+
// Rounding/Saturating shift
class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
RegisterOperand VPRC, ValueType Ty, Operand ImmTy,