summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBob Wilson <bob.wilson@apple.com>2010-08-17 05:54:34 +0000
committerBob Wilson <bob.wilson@apple.com>2010-08-17 05:54:34 +0000
commit7aaf5bf3db44c94bd630e07d63c3a4a1d92e44f4 (patch)
treed47e06c81bb4c764050efcf537e1225383c95bc4
parente1bcb440dc0ca3c41fda1c0c581abfc4f38ca170 (diff)
downloadllvm-7aaf5bf3db44c94bd630e07d63c3a4a1d92e44f4.tar.gz
llvm-7aaf5bf3db44c94bd630e07d63c3a4a1d92e44f4.tar.bz2
llvm-7aaf5bf3db44c94bd630e07d63c3a4a1d92e44f4.tar.xz
Allow more cases of undef shuffle indices and add tests for them.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@111226 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp34
-rw-r--r--test/CodeGen/ARM/vext.ll20
-rw-r--r--test/CodeGen/ARM/vrev.ll18
-rw-r--r--test/CodeGen/ARM/vtrn.ll27
-rw-r--r--test/CodeGen/ARM/vuzp.ll27
-rw-r--r--test/CodeGen/ARM/vzip.ll27
6 files changed, 141 insertions, 12 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index b05c5dd259..c2bd471faa 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3148,6 +3148,11 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
bool &ReverseVEXT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
ReverseVEXT = false;
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
Imm = M[0];
// If this is a VEXT shuffle, the immediate value is the index of the first
@@ -3163,6 +3168,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
ReverseVEXT = true;
}
+ if (M[i] < 0) continue; // ignore UNDEF indices
if (ExpectedElt != static_cast<unsigned>(M[i]))
return false;
}
@@ -3188,13 +3194,16 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
unsigned BlockElts = M[0] + 1;
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSz;
if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
for (unsigned i = 0; i < NumElts; ++i) {
- if ((unsigned) M[i] !=
- (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
return false;
}
@@ -3210,8 +3219,8 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
- if ((unsigned) M[i] != i + WhichResult ||
- (unsigned) M[i+1] != i + NumElts + WhichResult)
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
return false;
}
return true;
@@ -3229,9 +3238,8 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
- if (M[i] < 0) continue;
- if ((unsigned) M[i] != i + WhichResult ||
- (unsigned) M[i+1] != i + WhichResult)
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
return false;
}
return true;
@@ -3246,6 +3254,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i != NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
if ((unsigned) M[i] != 2 * i + WhichResult)
return false;
}
@@ -3271,7 +3280,8 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
for (unsigned j = 0; j != 2; ++j) {
unsigned Idx = WhichResult;
for (unsigned i = 0; i != Half; ++i) {
- if ((unsigned) M[i + j * Half] != Idx)
+ int MIdx = M[i + j * Half];
+ if (MIdx >= 0 && (unsigned) MIdx != Idx)
return false;
Idx += 2;
}
@@ -3294,8 +3304,8 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
- if ((unsigned) M[i] != Idx ||
- (unsigned) M[i+1] != Idx + NumElts)
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
return false;
Idx += 1;
}
@@ -3320,8 +3330,8 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
- if ((unsigned) M[i] != Idx ||
- (unsigned) M[i+1] != Idx)
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
return false;
Idx += 1;
}
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index c11a67c6c4..e460a84f62 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -54,3 +54,23 @@ define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
ret <4 x i32> %tmp3
}
+; Undef shuffle indices should not prevent matching to VEXT:
+
+define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextd_undef:
+;CHECK: vext
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10>
+ ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextRq_undef:
+;CHECK: vext
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6>
+ ret <16 x i8> %tmp3
+}
+
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index deed554d84..e1fe64b02d 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -111,3 +111,21 @@ define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
ret <16 x i8> %tmp2
}
+
+; Undef shuffle indices should not prevent matching to VREV:
+
+define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev64D8_undef:
+;CHECK: vrev64.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
+ ret <8 x i8> %tmp2
+}
+
+define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev32Q16_undef:
+;CHECK: vrev32.16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
+ ret <8 x i16> %tmp2
+}
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll
index 10bb10ac24..b1c2f93b47 100644
--- a/test/CodeGen/ARM/vtrn.ll
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -95,3 +95,30 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
%tmp5 = fadd <4 x float> %tmp3, %tmp4
ret <4 x float> %tmp5
}
+
+; Undef shuffle indices should not prevent matching to VTRN:
+
+define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtrni8_undef:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
+ %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtrnQi16_undef:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
+ %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
+}
+
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
index 6cef188d76..9130f62891 100644
--- a/test/CodeGen/ARM/vuzp.ll
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -73,3 +73,30 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
%tmp5 = fadd <4 x float> %tmp3, %tmp4
ret <4 x float> %tmp5
}
+
+; Undef shuffle indices should not prevent matching to VUZP:
+
+define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vuzpi8_undef:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
+ %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vuzpQi16_undef:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
+ %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
+}
+
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll
index a9ecdcab42..926970aeb2 100644
--- a/test/CodeGen/ARM/vzip.ll
+++ b/test/CodeGen/ARM/vzip.ll
@@ -73,3 +73,30 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
%tmp5 = fadd <4 x float> %tmp3, %tmp4
ret <4 x float> %tmp5
}
+
+; Undef shuffle indices should not prevent matching to VZIP:
+
+define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vzipi8_undef:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
+ %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vzipQi8_undef:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
+ %tmp5 = add <16 x i8> %tmp3, %tmp4
+ ret <16 x i8> %tmp5
+}
+