summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2014-04-18 09:31:15 +0000
committerTim Northover <tnorthover@apple.com>2014-04-18 09:31:15 +0000
commite7ec66e56be056243e87c2920e0866d7aa84f6ff (patch)
tree0b43cc0277f9fa533d65e001de1bc935e99aec2c
parent8405c940d3afa5958185f80d8d919f2d008da576 (diff)
downloadllvm-e7ec66e56be056243e87c2920e0866d7aa84f6ff.tar.gz
llvm-e7ec66e56be056243e87c2920e0866d7aa84f6ff.tar.bz2
llvm-e7ec66e56be056243e87c2920e0866d7aa84f6ff.tar.xz
ARM64: spot a vector_shuffle that maps to INS and expand.
Tests will be coming very shortly when all the optimisations needed to support AArch64's neon-copy.ll file are committed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206572 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM64/ARM64ISelLowering.cpp64
1 files changed, 64 insertions, 0 deletions
diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp
index d4898ae523..71b4fcb960 100644
--- a/lib/Target/ARM64/ARM64ISelLowering.cpp
+++ b/lib/Target/ARM64/ARM64ISelLowering.cpp
@@ -4093,6 +4093,45 @@ static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return true;
}
+static bool isINSMask(ArrayRef<int> M, int NumInputElements,
+ bool &BulkIsLeft, int &Anomaly) {
+ if (M.size() != static_cast<size_t>(NumInputElements))
+ return false;
+
+ int NumLHSMatch = 0, NumRHSMatch = 0;
+ int LastLHSMismatch = -1, LastRHSMismatch = -1;
+
+ for (int i = 0; i < NumInputElements; ++i) {
+ if (M[i] == -1) {
+ ++NumLHSMatch;
+ ++NumRHSMatch;
+ continue;
+ }
+
+ if (M[i] == i)
+ ++NumLHSMatch;
+ else
+ LastLHSMismatch = i;
+
+ if (M[i] == i + NumInputElements)
+ ++NumRHSMatch;
+ else
+ LastRHSMismatch = i;
+ }
+
+ if (NumLHSMatch == NumInputElements - 1) {
+ BulkIsLeft = true;
+ Anomaly = LastLHSMismatch;
+ return true;
+ } else if (NumRHSMatch == NumInputElements - 1) {
+ BulkIsLeft = false;
+ Anomaly = LastRHSMismatch;
+ return true;
+ }
+
+ return false;
+}
+
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
/// the specified operations to build the shuffle.
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
@@ -4362,6 +4401,31 @@ SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
}
+ bool DstIsLeft;
+ int Anomaly;
+ int NumInputElements = V1.getValueType().getVectorNumElements();
+ if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
+ SDValue DstVec = DstIsLeft ? V1 : V2;
+ SDValue DstLaneV = DAG.getConstant(Anomaly, MVT::i64);
+
+ SDValue SrcVec = V1;
+ int SrcLane = ShuffleMask[Anomaly];
+ if (SrcLane >= NumInputElements) {
+ SrcVec = V2;
+ SrcLane -= VT.getVectorNumElements();
+ }
+ SDValue SrcLaneV = DAG.getConstant(SrcLane, MVT::i64);
+
+ EVT ScalarVT = VT.getVectorElementType();
+ if (ScalarVT.getSizeInBits() < 32)
+ ScalarVT = MVT::i32;
+
+ return DAG.getNode(
+ ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
+ DstLaneV);
+ }
+
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
unsigned NumElts = VT.getVectorNumElements();