From d16c8d0d336638225378466bc17c9db156401817 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sat, 7 Apr 2012 21:19:08 +0000 Subject: 1. Remove the part of r153848 which optimizes shuffle-of-shuffle into a new shuffle node because it could introduce new shuffle nodes that were not supported efficiently by the target. 2. Add a more restrictive shuffle-of-shuffle optimization for cases where the second shuffle reverses the transformation of the first shuffle. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154266 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 ++++++++++++++------------ test/CodeGen/CellSPU/rotate_ops.ll | 2 +- test/CodeGen/X86/2011-10-27-tstore.ll | 10 +++++----- test/CodeGen/X86/SwizzleShuff.ll | 25 +++++++++++++++++++++++++ test/CodeGen/X86/vec_compare-2.ll | 6 ++++-- test/CodeGen/X86/vec_shuffle-37.ll | 10 +++++----- test/CodeGen/X86/widen_shuffle-1.ll | 2 +- 7 files changed, 55 insertions(+), 26 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e0fd3abfb4..ebffb8562e 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7795,19 +7795,20 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // If this shuffle node is simply a swizzle of another shuffle node, - // optimize shuffle(shuffle(x, y), undef) -> shuffle(x, y). + // and it reverses the swizzle of the previous shuffle then we can + // optimize shuffle(shuffle(x, undef), undef) -> x. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && N1.getOpcode() == ISD::UNDEF) { - SmallVector NewMask; ShuffleVectorSDNode *OtherSV = cast(N0); - // If the source shuffle has more than one user then do not try to optimize - // it because it may generate a more complex shuffle node. However, if the - // source shuffle is also a swizzle (a single source shuffle), our - // transformation is still likely to reduce the number of shuffles and only - // generate a simple shuffle node. - if (N0.getOperand(1).getOpcode() != ISD::UNDEF && !N0.hasOneUse()) + // Shuffle nodes can only reverse shuffles with a single non-undef value. + if (N0.getOperand(1).getOpcode() != ISD::UNDEF) + return SDValue(); + + // The incoming shuffle must be of the same type as the result of the current + // shuffle. + if (OtherSV->getOperand(0).getValueType() != VT) return SDValue(); EVT InVT = N0.getValueType(); @@ -7824,11 +7825,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (Idx >= 0) Idx = OtherSV->getMaskElt(Idx); - NewMask.push_back(Idx); + // The combined shuffle must map each index to itself. + if (Idx != i && Idx != -1) + return SDValue(); } - assert(NewMask.size() == VT.getVectorNumElements() && "Invalid mask size"); - return DAG.getVectorShuffle(VT, N->getDebugLoc(), OtherSV->getOperand(0), - OtherSV->getOperand(1), &NewMask[0]); + + return OtherSV->getOperand(0); } return SDValue(); diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll index 8b7af20b4a..9770935276 100644 --- a/test/CodeGen/CellSPU/rotate_ops.ll +++ b/test/CodeGen/CellSPU/rotate_ops.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=cellspu -o %t1.s -; RUN: grep rot %t1.s | count 85 +; RUN: grep rot %t1.s | count 86 ; RUN: grep roth %t1.s | count 8 ; RUN: grep roti.*5 %t1.s | count 1 ; RUN: grep roti.*27 %t1.s | count 1 diff --git a/test/CodeGen/X86/2011-10-27-tstore.ll b/test/CodeGen/X86/2011-10-27-tstore.ll index 1712f34565..6e83f6713a 100644 --- a/test/CodeGen/X86/2011-10-27-tstore.ll +++ b/test/CodeGen/X86/2011-10-27-tstore.ll @@ -4,13 +4,13 @@ target triple = "x86_64-unknown-linux-gnu" ;CHECK: ltstore ;CHECK: movq -;CHECK-NEXT: movq -;CHECK-NEXT: ret -define void @ltstore(<4 x i32>* %pIn, <2 x i32>* %pOut) { +;CHECK: movq +;CHECK: ret +define void @ltstore(<4 x i32>* %pA, <2 x i32>* %pB) { entry: - %in = load <4 x i32>* %pIn + %in = load <4 x i32>* %pA %j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> - store <2 x i32> %j, <2 x i32>* %pOut + store <2 x i32> %j, <2 x i32>* %pB ret void } diff --git a/test/CodeGen/X86/SwizzleShuff.ll b/test/CodeGen/X86/SwizzleShuff.ll index c60d9b899c..100817a676 100644 --- a/test/CodeGen/X86/SwizzleShuff.ll +++ b/test/CodeGen/X86/SwizzleShuff.ll @@ -41,3 +41,28 @@ define <4 x i8> @pull_bitcast2 (<4 x i8>* %pA, <4 x i8>* %pB, <4 x i8>* %pC) { store <4 x i8> %C, <4 x i8>* %pA ret <4 x i8> %C } + + + +; CHECK: reverse_1 +; CHECK-NOT: shuf +; CHECK: ret +define <4 x i32> @reverse_1 (<4 x i32>* %pA, <4 x i32>* %pB) { + %A = load <4 x i32>* %pA + %B = load <4 x i32>* %pB + %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> + ret <4 x i32> %S1 +} + + +; CHECK: no_reverse_shuff +; CHECK: shuf +; CHECK: ret +define <4 x i32> @no_reverse_shuff (<4 x i32>* %pA, <4 x i32>* %pB) { + %A = load <4 x i32>* %pA + %B = load <4 x i32>* %pB + %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> + ret <4 x i32> %S1 +} diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll index 946b126fd0..91777f7aa6 100644 --- a/test/CodeGen/X86/vec_compare-2.ll +++ b/test/CodeGen/X86/vec_compare-2.ll @@ -10,8 +10,10 @@ define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) { entry: ; CHECK: cfi_def_cfa_offset ; CHECK-NOT: set -; CHECK: pcmpgt -; CHECK: blendvps +; CHECK: movzwl +; CHECK: movzwl +; CHECK: pshufd +; CHECK: pshufb %shr.i = ashr <4 x i32> zeroinitializer, ; <<4 x i32>> [#uses=1] %cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1] %sub322.i = sub <4 x i32> %shr.i, zeroinitializer ; <<4 x i32>> [#uses=1] diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll index 619652aff1..430aa046af 100644 --- a/test/CodeGen/X86/vec_shuffle-37.ll +++ b/test/CodeGen/X86/vec_shuffle-37.ll @@ -27,11 +27,11 @@ entry: define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline { entry: ; CHECK: t02 -; CHECK: mov -; CHECK-NEXT: mov -; CHECK-NEXT: mov -; CHECK-NEXT: mov -; CHECK-NEXT: ret +; CHECK: movaps +; CHECK: shufps +; CHECK: pshufd +; CHECK: movq +; CHECK: ret %0 = bitcast <8 x i32>* %source to <4 x i32>* %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3 %tmp2 = load <4 x i32>* %arrayidx, align 16 diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll index 9420053716..7bebb274f6 100644 --- a/test/CodeGen/X86/widen_shuffle-1.ll +++ b/test/CodeGen/X86/widen_shuffle-1.ll @@ -33,7 +33,7 @@ entry: define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind { entry: ; CHECK: shuf3: -; CHECK: shufd +; CHECK: shufps %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> -- cgit v1.2.3