summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Dunbar <daniel@zuster.org>2011-11-04 00:48:26 +0000
committerDaniel Dunbar <daniel@zuster.org>2011-11-04 00:48:26 +0000
commit28eb1c5217416aa60b06b8b569a5de8047f75514 (patch)
tree5e5886018631e575fa6d6b51eea0c4070e460d61
parentd5574f62d3932ba4adbdfb7e292c3e1e8116a06f (diff)
downloadllvm-28eb1c5217416aa60b06b8b569a5de8047f75514.tar.gz
llvm-28eb1c5217416aa60b06b8b569a5de8047f75514.tar.bz2
llvm-28eb1c5217416aa60b06b8b569a5de8047f75514.tar.xz
Speculatively revert "DeadStoreElimination can now trim the size of a store if
the end of it is dead.", which appears to break bootstrapping LLVM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143668 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp142
-rw-r--r--test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll78
2 files changed, 33 insertions, 187 deletions
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 921bec8f4d..c0738a951c 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -239,24 +239,6 @@ static bool isRemovable(Instruction *I) {
}
}
-
-/// isShortenable - Returns true if this instruction can be safely shortened in
-/// length.
-static bool isShortenable(Instruction *I) {
- // Don't shorten stores for now
- if (isa<StoreInst>(I))
- return false;
-
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default: return false;
- case Intrinsic::memset:
- case Intrinsic::memcpy:
- // Do shorten memory intrinsics.
- return true;
- }
-}
-
/// getStoredPointerOperand - Return the pointer that is being written to.
static Value *getStoredPointerOperand(Instruction *I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
@@ -311,24 +293,11 @@ static bool isObjectPointerWithTrustworthySize(const Value *V) {
return false;
}
-namespace {
- enum OverwriteResult
- {
- OverwriteComplete,
- OverwriteEnd,
- OverwriteUnknown
- };
-}
-
-/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
+/// isCompleteOverwrite - Return true if a store to the 'Later' location
/// completely overwrites a store to the 'Earlier' location.
-/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
-/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
-static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
- const AliasAnalysis::Location &Earlier,
- AliasAnalysis &AA,
- int64_t& EarlierOff,
- int64_t& LaterOff) {
+static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
+ const AliasAnalysis::Location &Earlier,
+ AliasAnalysis &AA) {
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
@@ -342,24 +311,23 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// If we have no TargetData information around, then the size of the store
// is inferrable from the pointee type. If they are the same type, then
// we know that the store is safe.
- if (AA.getTargetData() == 0 &&
- Later.Ptr->getType() == Earlier.Ptr->getType())
- return OverwriteComplete;
-
- return OverwriteUnknown;
+ if (AA.getTargetData() == 0)
+ return Later.Ptr->getType() == Earlier.Ptr->getType();
+ return false;
}
// Make sure that the Later size is >= the Earlier size.
- if (Later.Size >= Earlier.Size)
- return OverwriteComplete;
+ if (Later.Size < Earlier.Size)
+ return false;
+ return true;
}
// Otherwise, we have to have size information, and the later store has to be
// larger than the earlier one.
if (Later.Size == AliasAnalysis::UnknownSize ||
Earlier.Size == AliasAnalysis::UnknownSize ||
- AA.getTargetData() == 0)
- return OverwriteUnknown;
+ Later.Size <= Earlier.Size || AA.getTargetData() == 0)
+ return false;
// Check to see if the later store is to the entire object (either a global,
// an alloca, or a byval argument). If so, then it clearly overwrites any
@@ -372,27 +340,26 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
if (UO1 != UO2)
- return OverwriteUnknown;
+ return false;
// If the "Later" store is to a recognizable object, get its size.
if (isObjectPointerWithTrustworthySize(UO2)) {
uint64_t ObjectSize =
TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());
if (ObjectSize == Later.Size)
- return OverwriteComplete;
+ return true;
}
// Okay, we have stores to two completely different pointers. Try to
// decompose the pointer into a "base + constant_offset" form. If the base
// pointers are equal, then we can reason about the two stores.
- EarlierOff = 0;
- LaterOff = 0;
+ int64_t EarlierOff = 0, LaterOff = 0;
const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
// If the base pointers still differ, we have two completely different stores.
if (BP1 != BP2)
- return OverwriteUnknown;
+ return false;
// The later store completely overlaps the earlier store if:
//
@@ -410,24 +377,11 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
//
// We have to be careful here as *Off is signed while *.Size is unsigned.
if (EarlierOff >= LaterOff &&
- Later.Size > Earlier.Size &&
uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
- return OverwriteComplete;
-
- // The other interesting case is if the later store overwrites the end of
- // the earlier store
- //
- // |--earlier--|
- // |-- later --|
- //
- // In this case we may want to trim the size of earlier to avoid generating
- // writes to addresses which will definitely be overwritten later
- if (LaterOff > EarlierOff &&
- LaterOff + Later.Size >= EarlierOff + Earlier.Size)
- return OverwriteEnd;
+ return true;
// Otherwise, they don't completely overlap.
- return OverwriteUnknown;
+ return false;
}
/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
@@ -551,52 +505,22 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// If we find a write that is a) removable (i.e., non-volatile), b) is
// completely obliterated by the store to 'Loc', and c) which we know that
// 'Inst' doesn't load from, then we can remove it.
- if (isRemovable(DepWrite) &&
+ if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
!isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
- int64_t InstWriteOffset, DepWriteOffset;
- OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
- DepWriteOffset, InstWriteOffset);
- if (OR == OverwriteComplete) {
- DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
- << *DepWrite << "\n KILLER: " << *Inst << '\n');
-
- // Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepWrite, *MD);
- ++NumFastStores;
- MadeChange = true;
-
- // DeleteDeadInstruction can delete the current instruction in loop
- // cases, reset BBI.
- BBI = Inst;
- if (BBI != BB.begin())
- --BBI;
- break;
- } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
- // TODO: base this on the target vector size so that if the earlier
- // store was too small to get vector writes anyway then its likely
- // a good idea to shorten it
- // Power of 2 vector writes are probably always a bad idea to optimize
- // as any store/memset/memcpy is likely using vector instructions so
- // shortening it to not vector size is likely to be slower
- MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
- unsigned DepWriteAlign = DepIntrinsic->getAlignment();
- if (llvm::isPowerOf2_64(InstWriteOffset) ||
- ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
-
- DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: "
- << *DepWrite << "\n KILLER (offset "
- << InstWriteOffset << ", "
- << DepLoc.Size << ")"
- << *Inst << '\n');
-
- Value* DepWriteLength = DepIntrinsic->getLength();
- Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
- InstWriteOffset -
- DepWriteOffset);
- DepIntrinsic->setLength(TrimmedLength);
- MadeChange = true;
- }
- }
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
+ << *DepWrite << "\n KILLER: " << *Inst << '\n');
+
+ // Delete the store and now-dead instructions that feed it.
+ DeleteDeadInstruction(DepWrite, *MD);
+ ++NumFastStores;
+ MadeChange = true;
+
+ // DeleteDeadInstruction can delete the current instruction in loop
+ // cases, reset BBI.
+ BBI = Inst;
+ if (BBI != BB.begin())
+ --BBI;
+ break;
}
// If this is a may-aliased store that is clobbering the store value, we
diff --git a/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
deleted file mode 100644
index 828ccc57a4..0000000000
--- a/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; RUN: opt < %s -basicaa -dse -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-%struct.vec2 = type { <4 x i32>, <4 x i32> }
-%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 }
-
-@glob1 = global %struct.vec2 zeroinitializer, align 16
-@glob2 = global %struct.vec2plusi zeroinitializer, align 16
-
-define void @write24to28(i32* nocapture %p) nounwind uwtable ssp {
-; CHECK: @write24to28
-entry:
- %arrayidx0 = getelementptr inbounds i32* %p, i64 1
- %p3 = bitcast i32* %arrayidx0 to i8*
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 24, i32 4, i1 false)
- call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
- %arrayidx1 = getelementptr inbounds i32* %p, i64 7
- store i32 1, i32* %arrayidx1, align 4
- ret void
-}
-
-define void @write28to32(i32* nocapture %p) nounwind uwtable ssp {
-; CHECK: @write28to32
-entry:
- %p3 = bitcast i32* %p to i8*
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
- call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
- %arrayidx1 = getelementptr inbounds i32* %p, i64 7
- store i32 1, i32* %arrayidx1, align 4
- ret void
-}
-
-define void @dontwrite28to32memset(i32* nocapture %p) nounwind uwtable ssp {
-; CHECK: @dontwrite28to32memset
-entry:
- %p3 = bitcast i32* %p to i8*
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
- call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
- %arrayidx1 = getelementptr inbounds i32* %p, i64 7
- store i32 1, i32* %arrayidx1, align 4
- ret void
-}
-
-define void @write32to36(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp {
-; CHECK: @write32to36
-entry:
- %0 = bitcast %struct.vec2plusi* %p to i8*
-; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 16, i1 false)
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 16, i1 false)
- %c = getelementptr inbounds %struct.vec2plusi* %p, i64 0, i32 2
- store i32 1, i32* %c, align 4
- ret void
-}
-
-define void @write16to32(%struct.vec2* nocapture %p) nounwind uwtable ssp {
-; CHECK: @write16to32
-entry:
- %0 = bitcast %struct.vec2* %p to i8*
-; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 16, i1 false)
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
- %c = getelementptr inbounds %struct.vec2* %p, i64 0, i32 1
- store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %c, align 4
- ret void
-}
-
-define void @dontwrite28to32memcpy(%struct.vec2* nocapture %p) nounwind uwtable ssp {
-; CHECK: @dontwrite28to32memcpy
-entry:
- %0 = bitcast %struct.vec2* %p to i8*
-; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
- %arrayidx1 = getelementptr inbounds %struct.vec2* %p, i64 0, i32 0, i64 7
- store i32 1, i32* %arrayidx1, align 4
- ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind