summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2012-09-06 12:42:01 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2012-09-06 12:42:01 +0000
commit4178946afba97a9a432a33ab1596f49a1ac090e7 (patch)
treeaab1b714f0321df354847bb7780230d4cddd5fd8
parente757640df0615510dbc42921cf6271aa76c405ee (diff)
downloadllvm-4178946afba97a9a432a33ab1596f49a1ac090e7.tar.gz
llvm-4178946afba97a9a432a33ab1596f49a1ac090e7.tar.bz2
llvm-4178946afba97a9a432a33ab1596f49a1ac090e7.tar.xz
AVX2 optimization.
Added generation of VPSHUB instruction for <32 x i8> vector shuffle when possible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163312 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp40
-rw-r--r--test/CodeGen/X86/avx2-shuffle.ll11
2 files changed, 51 insertions, 0 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 78bc7fa75a..831386f408 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -6011,6 +6011,40 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
}
+// v32i8 shuffles - Translate to VPSHUFB if possible.
+static
+SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG,
+ const X86TargetLowering &TLI) {
+ EVT VT = SVOp->getValueType(0);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ ArrayRef<int> MaskVals = SVOp->getMask();
+
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+
+ if (VT != MVT::v32i8 || !TLI.getSubtarget()->hasAVX2() || !V2IsUndef)
+ return SDValue();
+
+ SmallVector<SDValue,32> pshufbMask;
+ for (unsigned i = 0; i != 32; i++) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0 || EltIdx >= 32)
+ EltIdx = 0x80;
+ else {
+ if ((EltIdx >= 16 && i < 16) || (EltIdx < 16 && i >= 16))
+ // Cross lane is not allowed.
+ return SDValue();
+ EltIdx &= 0xf;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+ }
+ return DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v32i8, &pshufbMask[0], 32));
+}
+
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in
@@ -6837,6 +6871,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return NewOp;
}
+ if (VT == MVT::v32i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv32i8(SVOp, DAG, *this);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
// Handle all 128-bit wide vectors with 4 elements, and match them with
// several different shuffle types.
if (NumElems == 4 && VT.is128BitVector())
diff --git a/test/CodeGen/X86/avx2-shuffle.ll b/test/CodeGen/X86/avx2-shuffle.ll
index c5899fa274..8af9373e50 100644
--- a/test/CodeGen/X86/avx2-shuffle.ll
+++ b/test/CodeGen/X86/avx2-shuffle.ll
@@ -26,3 +26,14 @@ entry:
%shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 3, i32 undef, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
ret <16 x i16> %shuffle.i
}
+
+; CHECK: vpshufb_test
+; CHECK; vpshufb {{.*\(%r.*}}, %ymm
+; CHECK: ret
+define <32 x i8> @vpshufb_test(<32 x i8> %a) nounwind {
+ %S = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15,
+ i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15,
+ i32 18, i32 19, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25,
+ i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
+ ret <32 x i8>%S
+} \ No newline at end of file