summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp244
-rw-r--r--test/Transforms/Mem2Reg/ignore-lifetime.ll26
-rw-r--r--test/Transforms/Mem2Reg/use-analysis.ll70
3 files changed, 227 insertions, 113 deletions
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 1b51255251..7afca82d61 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -30,6 +30,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -45,6 +46,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InstVisitor.h"
#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -56,56 +58,13 @@ STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
-bool llvm::isAllocaPromotable(const AllocaInst *AI) {
- // FIXME: If the memory unit is of pointer or integer type, we can permit
- // assignments to subsections of the memory unit.
-
- // Only allow direct and non-volatile loads and stores...
- for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
- UI != UE; ++UI) { // Loop over all of the uses of the alloca
- const User *U = *UI;
- if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
- // Note that atomic loads can be transformed; atomic semantics do
- // not have any meaning for a local alloca.
- if (LI->isVolatile())
- return false;
- } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (SI->getOperand(0) == AI)
- return false; // Don't allow a store OF the AI, only INTO the AI.
- // Note that atomic stores can be transformed; atomic semantics do
- // not have any meaning for a local alloca.
- if (SI->isVolatile())
- return false;
- } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end)
- return false;
- } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
- if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
- return false;
- if (!onlyUsedByLifetimeMarkers(BCI))
- return false;
- } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
- if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
- return false;
- if (!GEPI->hasAllZeroIndices())
- return false;
- if (!onlyUsedByLifetimeMarkers(GEPI))
- return false;
- } else {
- return false;
- }
- }
-
- return true;
-}
-
namespace {
-struct AllocaInfo {
+struct AllocaInfo : private InstVisitor<AllocaInfo, bool> {
SmallVector<BasicBlock *, 32> DefiningBlocks;
SmallVector<BasicBlock *, 32> UsingBlocks;
+ Type *AllocaTy;
StoreInst *OnlyStore;
BasicBlock *OnlyBlock;
bool OnlyUsedInOneBlock;
@@ -116,6 +75,7 @@ struct AllocaInfo {
void clear() {
DefiningBlocks.clear();
UsingBlocks.clear();
+ AllocaTy = 0;
OnlyStore = 0;
OnlyBlock = 0;
OnlyUsedInOneBlock = true;
@@ -125,39 +85,107 @@ struct AllocaInfo {
/// Scan the uses of the specified alloca, filling in the AllocaInfo used
/// by the rest of the pass to reason about the uses of this alloca.
- void AnalyzeAlloca(AllocaInst *AI) {
+ bool analyzeAlloca(AllocaInst &AI) {
clear();
- // As we scan the uses of the alloca instruction, keep track of stores,
- // and decide whether all of the loads and stores to the alloca are within
- // the same basic block.
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
- UI != E;) {
- Instruction *User = cast<Instruction>(*UI++);
-
- if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
- // Remember the basic blocks which define new values for the alloca
- DefiningBlocks.push_back(SI->getParent());
- AllocaPointerVal = SI->getOperand(0);
- OnlyStore = SI;
- } else {
- LoadInst *LI = cast<LoadInst>(User);
- // Otherwise it must be a load instruction, keep track of variable
- // reads.
- UsingBlocks.push_back(LI->getParent());
- AllocaPointerVal = LI;
- }
+ AllocaTy = AI.getAllocatedType();
+ enqueueUsers(AI);
+
+ // Walk queued up uses in the worklist to handle nested uses.
+ while (!UseWorklist.empty()) {
+ U = UseWorklist.pop_back_val();
+ Instruction &I = *cast<Instruction>(U->getUser());
+ if (!visit(I))
+ return false; // Propagate failure to promote up.
if (OnlyUsedInOneBlock) {
if (OnlyBlock == 0)
- OnlyBlock = User->getParent();
- else if (OnlyBlock != User->getParent())
+ OnlyBlock = I.getParent();
+ else if (OnlyBlock != I.getParent())
OnlyUsedInOneBlock = false;
}
}
- DbgDeclare = FindAllocaDbgDeclare(AI);
+ DbgDeclare = FindAllocaDbgDeclare(&AI);
+ return true;
+ }
+
+private:
+ // Befriend the base class so it can call through private visitor methods.
+ friend class InstVisitor<AllocaInfo, bool>;
+
+ /// \brief A use pointer that is non-null when visiting uses.
+ Use *U;
+
+ /// \brief A worklist for recursively visiting all uses of an alloca.
+ SmallVector<Use *, 8> UseWorklist;
+
+ /// \brief A set for preventing cyclic visitation.
+ SmallPtrSet<Use *, 8> VisitedUses;
+
+ void enqueueUsers(Instruction &I) {
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
+ ++UI)
+ if (VisitedUses.insert(&UI.getUse()))
+ UseWorklist.push_back(&UI.getUse());
+ }
+
+ bool visitLoadInst(LoadInst &LI) {
+ if (LI.isVolatile() || LI.getType() != AllocaTy)
+ return false;
+
+ // Keep track of variable reads.
+ UsingBlocks.push_back(LI.getParent());
+ AllocaPointerVal = &LI;
+ return true;
+ }
+
+ bool visitStoreInst(StoreInst &SI) {
+ if (SI.isVolatile() || SI.getValueOperand() == U->get() ||
+ SI.getValueOperand()->getType() != AllocaTy)
+ return false;
+
+ // Remember the basic blocks which define new values for the alloca
+ DefiningBlocks.push_back(SI.getParent());
+ AllocaPointerVal = SI.getOperand(0);
+ OnlyStore = &SI;
+ return true;
+ }
+
+ bool visitBitCastInst(BitCastInst &BC) {
+ enqueueUsers(BC);
+ return true;
}
+
+ bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ if (GEPI.use_empty())
+ return true;
+
+ enqueueUsers(GEPI);
+
+ return GEPI.hasAllZeroIndices();
+ }
+
+ // We can promote through debug info intrinsics as they don't alter the
+ // value stored in memory.
+ bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { return true; }
+
+ bool visitIntrinsicInst(IntrinsicInst &II) {
+ switch (II.getIntrinsicID()) {
+ default:
+ return false;
+
+ // Lifetime intrinsics don't preclude promoting the memory to a register.
+ // FIXME: We should use these to promote to undef when outside of a valid
+ // lifetime.
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ return true;
+ }
+ }
+
+ // The fallback is that the alloca cannot be promoted.
+ bool visitInstruction(Instruction &I) { return false; }
};
// Data package used by RenamePass()
@@ -316,24 +344,56 @@ private:
static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
// Knowing that this alloca is promotable, we know that it's safe to kill all
// instructions except for load and store.
+ // FIXME: This function isn't actually specific to lifetime instrinsics. It
+ // also is redundant with the actual analysis of the loads and stores and
+ // should be folded into that.
+
+ SmallVector<Instruction *, 8> Worklist;
+ SmallPtrSet<Instruction *, 8> Visited;
+ SmallVector<Instruction *, 8> Dead;
+ Worklist.push_back(AI);
+
+ do {
+ Instruction *I = Worklist.pop_back_val();
- for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
- UI != UE;) {
- Instruction *I = cast<Instruction>(*UI);
- ++UI;
- if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ if (I->use_empty()) {
+ Dead.push_back(I);
continue;
+ }
- if (!I->getType()->isVoidTy()) {
- // The only users of this bitcast/GEP instruction are lifetime intrinsics.
- // Follow the use/def chain to erase them now instead of leaving it for
- // dead code elimination later.
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE;) {
- Instruction *Inst = cast<Instruction>(*UI);
- ++UI;
- Inst->eraseFromParent();
- }
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ if (!isa<LoadInst>(*UI) && !isa<StoreInst>(*UI) &&
+ Visited.insert(cast<Instruction>(*UI)))
+ Worklist.push_back(cast<Instruction>(*UI));
+ } while (!Worklist.empty());
+
+ while (!Dead.empty()) {
+ Instruction *I = Dead.pop_back_val();
+
+ // Don't delete the alloca itself.
+ if (I == AI)
+ continue;
+
+ // Note that we open code the deletion algorithm here because we know
+ // apriori that all of the instructions using an alloca that reaches here
+ // are trivially dead when their use list becomes empty (The only risk are
+ // lifetime markers which we specifically want to nuke). By coding it here
+ // we can skip the triviality test and be more efficient.
+ //
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE;
+ ++OI) {
+ Instruction *Op = dyn_cast<Instruction>(*OI);
+ if (!Op)
+ continue;
+
+ OI->set(0);
+ if (!Op->use_empty())
+ continue;
+
+ Dead.push_back(Op);
}
I->eraseFromParent();
}
@@ -540,7 +600,7 @@ void PromoteMem2Reg::run() {
for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
AllocaInst *AI = Allocas[AllocaNum];
- assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
+ //assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
assert(AI->getParent()->getParent() == &F &&
"All allocas should be in the same function, which is same as DF!");
@@ -560,7 +620,9 @@ void PromoteMem2Reg::run() {
// Calculate the set of read and write-locations for each alloca. This is
// analogous to finding the 'uses' and 'definitions' of each variable.
- Info.AnalyzeAlloca(AI);
+ bool Good = Info.analyzeAlloca(*AI);
+ (void)Good;
+ assert(Good && "Cannot promote non-promotable alloca!");
// If there is only a single store to this value, replace any loads of
// it that are directly dominated by the definition with the value stored.
@@ -1087,8 +1149,16 @@ NextIteration:
goto NextIteration;
}
-void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- AliasSetTracker *AST) {
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+ // We cast away constness because we re-use the non-const analysis that the
+ // actual promotion routine uses. While it is non-const, it doesn't actually
+ // mutate anything at this phase, and we discard the non-const results that
+ // promotion uses to mutate the alloca.
+ return AllocaInfo().analyzeAlloca(*const_cast<AllocaInst *>(AI));
+}
+
+void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas,
+ DominatorTree &DT, AliasSetTracker *AST) {
// If there is nothing to do, bail out...
if (Allocas.empty())
return;
diff --git a/test/Transforms/Mem2Reg/ignore-lifetime.ll b/test/Transforms/Mem2Reg/ignore-lifetime.ll
deleted file mode 100644
index 5e4f9bfd8c..0000000000
--- a/test/Transforms/Mem2Reg/ignore-lifetime.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: opt -mem2reg -S -o - < %s | FileCheck %s
-
-declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
-declare void @llvm.lifetime.end(i64 %size, i8* nocapture %ptr)
-
-define void @test1() {
-; CHECK: test1
-; CHECK-NOT: alloca
- %A = alloca i32
- %B = bitcast i32* %A to i8*
- call void @llvm.lifetime.start(i64 2, i8* %B)
- store i32 1, i32* %A
- call void @llvm.lifetime.end(i64 2, i8* %B)
- ret void
-}
-
-define void @test2() {
-; CHECK: test2
-; CHECK-NOT: alloca
- %A = alloca {i8, i16}
- %B = getelementptr {i8, i16}* %A, i32 0, i32 0
- call void @llvm.lifetime.start(i64 2, i8* %B)
- store {i8, i16} zeroinitializer, {i8, i16}* %A
- call void @llvm.lifetime.end(i64 2, i8* %B)
- ret void
-}
diff --git a/test/Transforms/Mem2Reg/use-analysis.ll b/test/Transforms/Mem2Reg/use-analysis.ll
new file mode 100644
index 0000000000..b08b1f191b
--- /dev/null
+++ b/test/Transforms/Mem2Reg/use-analysis.ll
@@ -0,0 +1,70 @@
+; RUN: opt -mem2reg -S -o - < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
+declare void @llvm.lifetime.end(i64 %size, i8* nocapture %ptr)
+
+define void @test1() {
+; Ensure we can look through a bitcast to i8* and the addition of lifetime
+; markers.
+;
+; CHECK-LABEL: @test1(
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+ %A = alloca i32
+ %B = bitcast i32* %A to i8*
+ call void @llvm.lifetime.start(i64 2, i8* %B)
+ store i32 1, i32* %A
+ call void @llvm.lifetime.end(i64 2, i8* %B)
+ ret void
+}
+
+define void @test2() {
+; Ensure we can look through a GEP to i8* and the addition of lifetime
+; markers.
+;
+; CHECK-LABEL: @test2(
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+ %A = alloca {i8, i16}
+ %B = getelementptr {i8, i16}* %A, i32 0, i32 0
+ call void @llvm.lifetime.start(i64 2, i8* %B)
+ store {i8, i16} zeroinitializer, {i8, i16}* %A
+ call void @llvm.lifetime.end(i64 2, i8* %B)
+ ret void
+}
+
+define i32 @test3(i32 %x) {
+; CHECK-LABEL: @test3(
+;
+; Check that we recursively walk the uses of the alloca and thus can see
+; through round trip bitcasts, dead bitcasts, GEPs, multiple GEPs, and lifetime
+; markers.
+entry:
+ %a = alloca i32
+; CHECK-NOT: alloca
+
+ %b = bitcast i32* %a to i8*
+ %b2 = getelementptr inbounds i8* %b, i32 0
+ %b3 = getelementptr inbounds i8* %b2, i32 0
+ call void @llvm.lifetime.start(i64 -1, i8* %b3)
+; CHECK-NOT: call void @llvm.lifetime.start
+
+ store i32 %x, i32* %a
+; CHECK-NOT: store
+
+ %dead = bitcast i32* %a to i4096*
+ %dead1 = bitcast i4096* %dead to i42*
+ %dead2 = getelementptr inbounds i32* %a, i32 %x
+; CHECK-NOT: bitcast
+; CHECK-NOT: getelementptr
+
+ %ret = load i32* %a
+; CHECK-NOT: load
+
+ ret i32 %ret
+; CHECK: ret i32 %x
+}