summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2012-09-30 07:17:56 +0000
committerNadav Rotem <nrotem@apple.com>2012-09-30 07:17:56 +0000
commit73fab91f2cd5fa3dbe4593506ac4a31aa3faf897 (patch)
tree91e463fd0509b2545b7bde7aac893736e625a5e6
parente5f163a3b98a531c3ff35da0e97e7364c0e04b95 (diff)
downloadllvm-73fab91f2cd5fa3dbe4593506ac4a31aa3faf897.tar.gz
llvm-73fab91f2cd5fa3dbe4593506ac4a31aa3faf897.tar.bz2
llvm-73fab91f2cd5fa3dbe4593506ac4a31aa3faf897.tar.xz
Revert r164910 because it causes failures to several phase2 builds.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164911 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp254
-rw-r--r--test/CodeGen/X86/MergeConsecutiveStores.ll150
-rw-r--r--test/CodeGen/X86/loop-strength-reduce-2.ll10
-rw-r--r--test/CodeGen/X86/loop-strength-reduce-3.ll5
-rw-r--r--test/CodeGen/X86/loop-strength-reduce.ll5
5 files changed, 12 insertions, 412 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index db7140d114..da3293bc8c 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -301,10 +301,6 @@ namespace {
/// looking for a better chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
- /// Merge consecutive store operations into a wide store.
- /// \return True if some memory operations were changed.
- bool MergeConsecutiveStores(StoreSDNode *N);
-
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
@@ -7427,250 +7423,6 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
return SDValue();
}
-/// Returns the base pointer and an integer offset from that object.
-static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) {
- if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) {
- int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
- SDValue Base = Ptr->getOperand(0);
- return std::make_pair(Base, Offset);
- }
-
- return std::make_pair(Ptr, 0);
-}
-
-struct ConsecutiveMemoryChainSorter {
- typedef std::pair<LSBaseSDNode*, int64_t> MemLink;
- bool operator()(MemLink LHS, MemLink RHS) {
- return LHS.second < RHS.second;
- }
-};
-
-bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
- EVT MemVT = St->getMemoryVT();
- int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
-
- // Don't handle vectors.
- if (MemVT.isVector() || !MemVT.isSimple())
- return false;
-
- // Perform an early exit check. Do not bother looking at stored values that
- // are not constants or loads.
- SDValue StoredVal = St->getValue();
- if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
- !isa<LoadSDNode>(StoredVal))
- return false;
-
- // Is this a load-to-store or a const-store.
- bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
-
- // Only look at ends of store chains.
- SDValue Chain = SDValue(St, 1);
- if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
- return false;
-
- // This holds the base pointer and the offset in bytes from the base pointer.
- std::pair<SDValue, int64_t> BasePtr =
- GetPointerBaseAndOffset(St->getBasePtr());
-
- // We must have a base and an offset.
- if (!BasePtr.first.getNode())
- return false;
-
- // Do not handle stores to undef base pointers.
- if (BasePtr.first.getOpcode() == ISD::UNDEF)
- return false;
-
- SmallVector<std::pair<StoreSDNode*, int64_t>, 8> StoreNodes;
- // Walk up the chain and look for nodes with offsets from the same
- // base pointer. Stop when reaching an instruction with a different kind
- // or instruction which has a different base pointer.
- StoreSDNode *Index = St;
- while (Index) {
- // If the chain has more than one use, then we can't reorder the mem ops.
- if (Index != St && !SDValue(Index, 1)->hasOneUse())
- break;
-
- // Find the base pointer and offset for this memory node.
- std::pair<SDValue, int64_t> Ptr =
- GetPointerBaseAndOffset(Index->getBasePtr());
-
- // Check that the base pointer is the same as the original one.
- if (Ptr.first.getNode() != BasePtr.first.getNode())
- break;
-
- // Check that the alignment is the same.
- if (Index->getAlignment() != St->getAlignment())
- break;
-
- // The memory operands must not be volatile.
- if (Index->isVolatile() || Index->isIndexed())
- break;
-
- // No truncation.
- if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
- if (St->isTruncatingStore())
- break;
-
- // The stored memory type must be the same.
- if (Index->getMemoryVT() != MemVT)
- break;
-
- // We found a potential memory operand to merge.
- StoreNodes.push_back(std::make_pair(Index,Ptr.second));
-
- // Move up the chain to the next memory operation.
- Index = dyn_cast<StoreSDNode>(Index->getChain().getNode());
- }
-
- // Check if there is anything to merge.
- if (StoreNodes.size() < 2)
- return false;
-
- // Remember which node is the earliest node in the chain.
- LSBaseSDNode *EarliestOp = StoreNodes.back().first;
-
- // Sort the memory operands according to their distance from the base pointer.
- std::sort(StoreNodes.begin(), StoreNodes.end(),
- ConsecutiveMemoryChainSorter());
-
- LSBaseSDNode *FirstInChain = StoreNodes[0].first;
-
- // Scan the memory operations on the chain and find the first non-consecutive
- // store memory address.
- unsigned LastConsecutiveStore = 0;
- int64_t StartAddress = StoreNodes[0].second;
- for (unsigned i=1; i<StoreNodes.size(); ++i) {
- int64_t CurrAddress = StoreNodes[i].second;
- if (CurrAddress - StartAddress != (ElementSizeBytes * i))
- break;
- LastConsecutiveStore = i;
- }
-
- // Store the constants into memory as one consecutive store.
- if (!IsLoadSrc) {
- unsigned LastConst = 0;
- for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
- SDValue StoredVal = StoreNodes[i].first->getValue();
- bool IsConst = (isa<ConstantSDNode>(StoredVal) || isa<ConstantFPSDNode>(StoredVal));
- if (!IsConst)
- break;
- LastConst = i;
- }
- unsigned NumElem = std::min(LastConsecutiveStore + 1, LastConst + 1);
- if (NumElem < 2)
- return false;
-
- EVT JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
- DebugLoc DL = StoreNodes[0].first->getDebugLoc();
- SmallVector<SDValue, 8> Ops;
-
- for (unsigned i = 0; i < NumElem ; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].first);
- Ops.push_back(St->getValue());
- }
-
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL,
- JointMemOpVT, &Ops[0], Ops.size());
-
- SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, BV,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), false, false,
- FirstInChain->getAlignment());
-
- for (unsigned i = 0; i < NumElem ; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].first);
- CombineTo(St, NewStore);
- }
- return true;
- }
-
- // Look for load nodes wich are used by the stored values.
- SmallVector<std::pair<LoadSDNode*, int64_t>, 8> LoadNodes;
-
- // Find acceptible loads. Loads need to have the same chain (token factor),
- // must not be zext, volatile, indexed, and they must be consecutive.
- SDValue LdBasePtr;
- for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
- LoadSDNode *Ld = dyn_cast<LoadSDNode>(StoreNodes[i].first->getValue());
- if (!Ld) break;
-
- // Loads must only have one use.
- if (!Ld->hasNUsesOfValue(1, 0))
- break;
-
- // Check that the alignment is the same as the stores.
- if (Ld->getAlignment() != St->getAlignment())
- break;
-
- // The memory operands must not be volatile.
- if (Ld->isVolatile() || Ld->isIndexed())
- break;
-
- if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
- break;
-
- // The stored memory type must be the same.
- if (Ld->getMemoryVT() != MemVT)
- break;
-
- std::pair<SDValue, int64_t> LdPtr =
- GetPointerBaseAndOffset(Ld->getBasePtr());
-
- // If this is not the first ptr that we check.
- if (LdBasePtr.getNode()) {
- // The base ptr must be the same,
- if (LdPtr.first != LdBasePtr)
- break;
- } else {
- LdBasePtr = LdPtr.first;
- }
-
- // We found a potential memory operand to merge.
- LoadNodes.push_back(std::make_pair(Ld, LdPtr.second));
- }
-
- if (LoadNodes.size() < 2)
- return false;
-
- // Scan the memory operations on the chain and find the first non-consecutive
- // load memory address.
- unsigned LastConsecutiveLoad = 0;
- StartAddress = LoadNodes[0].second;
- for (unsigned i=1; i<LoadNodes.size(); ++i) {
- int64_t CurrAddress = LoadNodes[i].second;
- if (CurrAddress - StartAddress != (ElementSizeBytes * i))
- break;
- LastConsecutiveLoad = i;
- }
-
- unsigned NumElem =
- std::min(LastConsecutiveStore + 1, LastConsecutiveLoad + 1);
-
- EVT JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
- DebugLoc LoadDL = LoadNodes[0].first->getDebugLoc();
- DebugLoc StoreDL = StoreNodes[0].first->getDebugLoc();
-
- LoadSDNode *FirstLoad = LoadNodes[0].first;
- SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
- FirstLoad->getChain(),
- FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(),
- false, false, false,
- FirstLoad->getAlignment());
-
- SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), false, false,
- FirstInChain->getAlignment());
-
- for (unsigned i = 0; i < NumElem ; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].first);
- CombineTo(St, NewStore);
- }
-
- return true;
-}
-
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -7872,12 +7624,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
ST->getAlignment());
}
-
- // Only perform this optimization before the types are legal, because we
- // don't want to generate illegal types in this optimization.
- if (!LegalTypes && MergeConsecutiveStores(ST))
- return SDValue(N, 0);
-
return ReduceLoadOpStoreWidth(N);
}
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll
deleted file mode 100644
index 435f38c8ad..0000000000
--- a/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ /dev/null
@@ -1,150 +0,0 @@
-; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
-
-%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
-
-@a = common global [10000 x %struct.A] zeroinitializer, align 8
-
-; Move all of the constants using a single vector store.
-; CHECK: merge_const_store
-; CHECK: movq %xmm0
-; CHECK: ret
-define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
- %1 = icmp sgt i32 %count, 0
- br i1 %1, label %.lr.ph, label %._crit_edge
-.lr.ph:
- %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
- %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
- %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
- store i8 1, i8* %2, align 1
- %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
- store i8 2, i8* %3, align 1
- %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
- store i8 3, i8* %4, align 1
- %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
- store i8 4, i8* %5, align 1
- %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
- store i8 5, i8* %6, align 1
- %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
- store i8 6, i8* %7, align 1
- %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
- store i8 7, i8* %8, align 1
- %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
- store i8 8, i8* %9, align 1
- %10 = add nsw i32 %i.02, 1
- %11 = getelementptr inbounds %struct.A* %.01, i64 1
- %exitcond = icmp eq i32 %10, %count
- br i1 %exitcond, label %._crit_edge, label %.lr.ph
-._crit_edge:
- ret void
-}
-
-; Move the first 4 constants as a single vector. Move the rest as scalars.
-; CHECK: merge_nonconst_store
-; CHECK: movd %xmm0
-; CHECK: movb
-; CHECK: movb
-; CHECK: movb
-; CHECK: movb
-; CHECK: ret
-define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
- %1 = icmp sgt i32 %count, 0
- br i1 %1, label %.lr.ph, label %._crit_edge
-.lr.ph:
- %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
- %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
- %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
- store i8 1, i8* %2, align 1
- %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
- store i8 2, i8* %3, align 1
- %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
- store i8 3, i8* %4, align 1
- %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
- store i8 4, i8* %5, align 1
- %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
- store i8 %zz, i8* %6, align 1 ; <----------- Not a const;
- %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
- store i8 6, i8* %7, align 1
- %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
- store i8 7, i8* %8, align 1
- %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
- store i8 8, i8* %9, align 1
- %10 = add nsw i32 %i.02, 1
- %11 = getelementptr inbounds %struct.A* %.01, i64 1
- %exitcond = icmp eq i32 %10, %count
- br i1 %exitcond, label %._crit_edge, label %.lr.ph
-._crit_edge:
- ret void
-}
-
-
-;CHECK: merge_loads
-; load:
-;CHECK: movw
-; store:
-;CHECK: movw
-;CHECK: ret
-define void @merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
- %1 = icmp sgt i32 %count, 0
- br i1 %1, label %.lr.ph, label %._crit_edge
-
-.lr.ph: ; preds = %0
- %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
- %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
- br label %4
-
-; <label>:4 ; preds = %4, %.lr.ph
- %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
- %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
- %5 = load i8* %2, align 1
- %6 = load i8* %3, align 1
- %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
- store i8 %5, i8* %7, align 1
- %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
- store i8 %6, i8* %8, align 1
- %9 = add nsw i32 %i.02, 1
- %10 = getelementptr inbounds %struct.A* %.01, i64 1
- %exitcond = icmp eq i32 %9, %count
- br i1 %exitcond, label %._crit_edge, label %4
-
-._crit_edge: ; preds = %4, %0
- ret void
-}
-
-; The loads and the stores are interleved. Can't merge them.
-;CHECK: no_merge_loads
-;CHECK: movb
-;CHECK: movb
-;CHECK: movb
-;CHECK: movb
-;CHECK: ret
-define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
- %1 = icmp sgt i32 %count, 0
- br i1 %1, label %.lr.ph, label %._crit_edge
-
-.lr.ph: ; preds = %0
- %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
- %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
- br label %a4
-
-a4: ; preds = %4, %.lr.ph
- %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
- %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
- %a5 = load i8* %2, align 1
- %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
- store i8 %a5, i8* %a7, align 1
- %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
- %a6 = load i8* %3, align 1
- store i8 %a6, i8* %a8, align 1
- %a9 = add nsw i32 %i.02, 1
- %a10 = getelementptr inbounds %struct.A* %.01, i64 1
- %exitcond = icmp eq i32 %a9, %count
- br i1 %exitcond, label %._crit_edge, label %a4
-
-._crit_edge: ; preds = %4, %0
- ret void
-}
-
-
diff --git a/test/CodeGen/X86/loop-strength-reduce-2.ll b/test/CodeGen/X86/loop-strength-reduce-2.ll
index b094fed2f6..b546462b68 100644
--- a/test/CodeGen/X86/loop-strength-reduce-2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-2.ll
@@ -1,18 +1,20 @@
-; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=pic | FileCheck %s -check-prefix=PIC
-; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s -check-prefix=STATIC
;
; Make sure the common loop invariant A is hoisted up to preheader,
; since too many registers are needed to subsume it into the addressing modes.
; It's safe to sink A in when it's not pic.
; PIC: align
-; PIC: movlpd %xmm0, -4([[REG:%e[a-z]+]])
+; PIC: movl $4, -4([[REG:%e[a-z]+]])
+; PIC: movl $5, ([[REG]])
; PIC: addl $4, [[REG]]
; PIC: decl {{%e[[a-z]+}}
; PIC: jne
; STATIC: align
-; STATIC: movlpd %xmm0, -4(%ecx)
+; STATIC: movl $4, -4(%ecx)
+; STATIC: movl $5, (%ecx)
; STATIC: addl $4, %ecx
; STATIC: decl %eax
; STATIC: jne
diff --git a/test/CodeGen/X86/loop-strength-reduce-3.ll b/test/CodeGen/X86/loop-strength-reduce-3.ll
index e2d77f2a6f..b1c9fb9c07 100644
--- a/test/CodeGen/X86/loop-strength-reduce-3.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-3.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 -relocation-model=dynamic-no-pic | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s
; CHECK: align
-; CHECK: movlpd %xmm0, -4(%ecx)
+; CHECK: movl $4, -4(%ecx)
+; CHECK: movl $5, (%ecx)
; CHECK: addl $4, %ecx
; CHECK: decl %eax
; CHECK: jne
diff --git a/test/CodeGen/X86/loop-strength-reduce.ll b/test/CodeGen/X86/loop-strength-reduce.ll
index d197451eee..42c6ac4983 100644
--- a/test/CodeGen/X86/loop-strength-reduce.ll
+++ b/test/CodeGen/X86/loop-strength-reduce.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=static | FileCheck %s
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s
; CHECK: align
-; CHECK: movlpd %xmm0, -4(%ecx)
+; CHECK: movl $4, -4(%ecx)
+; CHECK: movl $5, (%ecx)
; CHECK: addl $4, %ecx
; CHECK: decl %eax
; CHECK: jne