summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNadav Rotem <nadav.rotem@intel.com>2011-10-29 21:23:04 +0000
committerNadav Rotem <nadav.rotem@intel.com>2011-10-29 21:23:04 +0000
commitb00418af67b36dcd7d70a268ebba3480c1011348 (patch)
treebf0c4b39d2d831774d5ec7b7a182a2c29fb3c3f9
parentf86545ecfdb48a43e62ce7dfd312913b0a24240b (diff)
downloadllvm-b00418af67b36dcd7d70a268ebba3480c1011348.tar.gz
llvm-b00418af67b36dcd7d70a268ebba3480c1011348.tar.bz2
llvm-b00418af67b36dcd7d70a268ebba3480c1011348.tar.xz
Add a new DAGCombine optimization for BUILD_VECTOR.
If all of the inputs are zero/any_extended, create a new simple BV which can be further optimized by other BV optimizations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143297 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp83
-rw-r--r--test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll2
-rw-r--r--test/CodeGen/X86/2011-10-27-tstore.ll16
-rw-r--r--test/CodeGen/X86/vec_shuffle-37.ll10
4 files changed, 106 insertions, 5 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b1afbf20c1..d96ce75a83 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6936,7 +6936,90 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned NumInScalars = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
+ // Check to see if this is a BUILD_VECTOR of a bunch of values
+ // which come from any_extend or zero_extend nodes. If so, we can create
+ // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
+ // optimizations.
+ EVT SourceType = MVT::Other;
+ bool allExtend = true;
+ bool allAnyExt = true;
+ for (unsigned i = 0; i < NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+ // Ignore undef inputs.
+ if (In.getOpcode() == ISD::UNDEF) continue;
+
+ bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
+ bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
+
+ // Abort non-extend incoming values.
+ if (!ZeroExt && !AnyExt) {
+ allExtend = false;
+ break;
+ }
+
+ // The input is a ZeroExt or AnyExt. Check the original type.
+ EVT InTy = In.getOperand(0).getValueType();
+
+ // Check that all of the widened source types are the same.
+ if (SourceType == MVT::Other)
+ SourceType = InTy;
+ else if (InTy != SourceType) {
+ // Multiple income types. Abort.
+ allExtend = false;
+ break;
+ }
+
+ // Check if all of the extends are ANY_EXTENDs.
+ allAnyExt &= AnyExt;
+ }
+
+ // And we are post type-legalization,
+ // If all of the values are Ext or undef,
+ // We have a non undef entry.
+ if (LegalTypes && allExtend && SourceType != MVT::Other) {
+ bool isLE = TLI.isLittleEndian();
+ EVT InScalarTy = SourceType.getScalarType();
+ EVT OutScalarTy = N->getValueType(0).getScalarType();
+ unsigned ElemRatio = OutScalarTy.getSizeInBits()/InScalarTy.getSizeInBits();
+ assert(ElemRatio > 1 && "Invalid element size ratio");
+ SDValue Filler = allAnyExt ? DAG.getUNDEF(InScalarTy):
+ DAG.getConstant(0, InScalarTy);
+
+ unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements();
+ SmallVector<SDValue,8> Ops(NewBVElems , Filler);
+
+ // Populate the new build_vector
+ for (unsigned i=0; i < N->getNumOperands(); ++i) {
+ SDValue Cast = N->getOperand(i);
+ assert(Cast.getOpcode() == ISD::ANY_EXTEND ||
+ Cast.getOpcode() == ISD::ZERO_EXTEND ||
+ Cast.getOpcode() == ISD::UNDEF && "Invalid cast opcode");
+ SDValue In;
+ if (Cast.getOpcode() == ISD::UNDEF)
+ In = DAG.getUNDEF(InScalarTy);
+ else
+ In = Cast->getOperand(0);
+ unsigned Index = isLE ? (i * ElemRatio) :
+ (i * ElemRatio + (ElemRatio - 1));
+
+ assert(Index < Ops.size() && "Invalid index");
+ Ops[Index] = In;
+ }
+
+ // The type of the new BUILD_VECTOR node.
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), InScalarTy, NewBVElems);
+ assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() &&
+ "Invalid vector size");
+
+ // Make the new BUILD_VECTOR.
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ VecVT, &Ops[0], Ops.size());
+
+ // Bitcast to the desired type.
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV);
+ }
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
diff --git a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
index 025ab2e7c1..63a7da8755 100644
--- a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
+++ b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+mmx | grep movd | count 3
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+mmx | grep movd | count 2
define i64 @a(i32 %a, i32 %b) nounwind readnone {
entry:
diff --git a/test/CodeGen/X86/2011-10-27-tstore.ll b/test/CodeGen/X86/2011-10-27-tstore.ll
new file mode 100644
index 0000000000..016e02c3d5
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-27-tstore.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK: ltstore
+;CHECK: pshufd
+;CHECK: pshufd
+;CHECK: ret
+define void @ltstore() {
+entry:
+ %in = load <4 x i32>* undef
+ %j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ store <2 x i32> %j, <2 x i32>* undef
+ ret void
+}
+
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
index e91a7347cc..0608398938 100644
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -26,10 +26,12 @@ entry:
define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
entry:
-; CHECK: movl 36({{%rdi|%rcx}})
-; CHECK-NEXT: movl 48({{%rdi|%rcx}})
-; CHECK: punpcklqdq
-; CHECK: movq %xmm0, ({{%rsi|%rdx}})
+; CHECK: t02
+; CHECK: movaps
+; CHECK: shufps
+; CHECK: pshufd
+; CHECK: movq
+; CHECK: ret
%0 = bitcast <8 x i32>* %source to <4 x i32>*
%arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
%tmp2 = load <4 x i32>* %arrayidx, align 16