diff options
author | Kevin Qin <Kevin.Qin@arm.com> | 2014-01-13 01:56:29 +0000 |
---|---|---|
committer | Kevin Qin <Kevin.Qin@arm.com> | 2014-01-13 01:56:29 +0000 |
commit | 5c8b8e61df9744ec1efbf6760abbcb62058541a8 (patch) | |
tree | ce610618c86a3e3654873031262ca20fb3bc2db6 /lib/Target | |
parent | b6e0946d407acf62a3f55ae22ce578d5af3bc5cc (diff) | |
download | llvm-5c8b8e61df9744ec1efbf6760abbcb62058541a8.tar.gz llvm-5c8b8e61df9744ec1efbf6760abbcb62058541a8.tar.bz2 llvm-5c8b8e61df9744ec1efbf6760abbcb62058541a8.tar.xz |
[AArch64 NEON] Add more scenarios to use perm instructions when lowering shuffle_vector
This patch covered 2 more scenarios:
1. Two operands of shuffle_vector are the same, like
%shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
2. One of operands is undef, like
%shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
After this patch, perm instructions will have chance to be emitted instead of lots of INS.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199069 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 47 |
1 files changed, 37 insertions, 10 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 0d9c516660..ce9ce3aeb5 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4242,7 +4242,7 @@ static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { // isPermuteMask - Check whether the vector shuffle matches to UZP, ZIP and // TRN instruction. -static unsigned isPermuteMask(ArrayRef<int> M, EVT VT) { +static unsigned isPermuteMask(ArrayRef<int> M, EVT VT, bool isV2undef) { unsigned NumElts = VT.getVectorNumElements(); if (NumElts < 4) return 0; @@ -4251,7 +4251,10 @@ static unsigned isPermuteMask(ArrayRef<int> M, EVT VT) { // Check UZP1 for (unsigned i = 0; i < NumElts; ++i) { - if ((unsigned)M[i] != i * 2) { + unsigned answer = i * 2; + if (isV2undef && answer >= NumElts) + answer -= NumElts; + if (M[i] != -1 && (unsigned)M[i] != answer) { ismatch = false; break; } @@ -4262,7 +4265,10 @@ static unsigned isPermuteMask(ArrayRef<int> M, EVT VT) { // Check UZP2 ismatch = true; for (unsigned i = 0; i < NumElts; ++i) { - if ((unsigned)M[i] != i * 2 + 1) { + unsigned answer = i * 2 + 1; + if (isV2undef && answer >= NumElts) + answer -= NumElts; + if (M[i] != -1 && (unsigned)M[i] != answer) { ismatch = false; break; } @@ -4273,7 +4279,10 @@ static unsigned isPermuteMask(ArrayRef<int> M, EVT VT) { // Check ZIP1 ismatch = true; for (unsigned i = 0; i < NumElts; ++i) { - if ((unsigned)M[i] != i / 2 + NumElts * (i % 2)) { + unsigned answer = i / 2 + NumElts * (i % 2); + if (isV2undef && answer >= NumElts) + answer -= NumElts; + if (M[i] != -1 && (unsigned)M[i] != answer) { ismatch = false; break; } @@ -4284,7 +4293,10 @@ static unsigned isPermuteMask(ArrayRef<int> M, EVT VT) { // Check ZIP2 ismatch = true; for (unsigned i = 0; i < NumElts; ++i) { - if ((unsigned)M[i] != (NumElts + i) / 2 + NumElts * (i % 2)) { + unsigned answer = (NumElts + i) / 2 + NumElts * (i % 2); + if (isV2undef && answer >= NumElts) + answer -= NumElts; + if (M[i] != -1 && (unsigned)M[i] != answer) { ismatch = false; break; } @@ -4295,7 +4307,10 @@ static unsigned isPermuteMask(ArrayRef<int> M, EVT VT) { // Check TRN1 ismatch = true; for (unsigned i = 0; i < NumElts; ++i) { - if ((unsigned)M[i] != i + (NumElts - 1) * (i % 2)) { + unsigned answer = i + (NumElts - 1) * (i % 2); + if (isV2undef && answer >= NumElts) + answer -= NumElts; + if (M[i] != -1 && (unsigned)M[i] != answer) { ismatch = false; break; } @@ -4306,7 +4321,10 @@ static unsigned isPermuteMask(ArrayRef<int> M, EVT VT) { // Check TRN2 ismatch = true; for (unsigned i = 0; i < NumElts; ++i) { - if ((unsigned)M[i] != 1 + i + (NumElts - 1) * (i % 2)) { + unsigned answer = 1 + i + (NumElts - 1) * (i % 2); + if (isV2undef && answer >= NumElts) + answer -= NumElts; + if (M[i] != -1 && (unsigned)M[i] != answer) { ismatch = false; break; } @@ -4343,9 +4361,18 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, if (isREVMask(ShuffleMask, VT, 16)) return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1); - unsigned ISDNo = isPermuteMask(ShuffleMask, VT); - if (ISDNo) - return DAG.getNode(ISDNo, dl, VT, V1, V2); + unsigned ISDNo; + if (V2.getOpcode() == ISD::UNDEF) + ISDNo = isPermuteMask(ShuffleMask, VT, true); + else + ISDNo = isPermuteMask(ShuffleMask, VT, false); + + if (ISDNo) { + if (V2.getOpcode() == ISD::UNDEF) + return DAG.getNode(ISDNo, dl, VT, V1, V1); + else + return DAG.getNode(ISDNo, dl, VT, V1, V2); + } // If the element of shuffle mask are all the same constant, we can // transform it into either NEON_VDUP or NEON_VDUPLANE |