summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2008-04-22 07:05:46 +0000
committerChris Lattner <sabre@nondot.org>2008-04-22 07:05:46 +0000
commit6bf77500c655520a04ea8640af6dccbcf995a754 (patch)
tree2e1b8b1f71fcc8832c10d50570e5242b2eedbcb0
parentc290a5b9ffa6ade5a02e9d8c46fc521a5b71529e (diff)
downloadllvm-6bf77500c655520a04ea8640af6dccbcf995a754.tar.gz
llvm-6bf77500c655520a04ea8640af6dccbcf995a754.tar.bz2
llvm-6bf77500c655520a04ea8640af6dccbcf995a754.tar.xz
Teach jump threading to thread through blocks like:
br (and X, phi(Y, Z, false)), label L1, label L2 This triggers once on 252.eon and 6 times on 176.gcc. Blocks in question often look like this: bb262: ; preds = %bb261, %bb248 %iftmp.251.0 = phi i1 [ true, %bb261 ], [ false, %bb248 ] ; <i1> [#uses=4] %tmp270 = icmp eq %struct.rtx_def* %tmp.0.i, null ; <i1> [#uses=1] %bothcond = or i1 %iftmp.251.0, %tmp270 ; <i1> [#uses=1] br i1 %bothcond, label %bb288, label %bb273 In this case, it is clear that it doesn't matter if tmp.0.i is null when coming from bb261. When coming from bb248, it is all that matters. Another random example: check_asm_operands.exit: ; preds = %check_asm_operands.exit.thr_comm, %bb30.i, %bb12.i, %bb6.i413 %tmp.0.i420 = phi i1 [ true, %bb6.i413 ], [ true, %bb12.i ], [ true, %bb30.i ], [ false, %check_asm_operands.exit.thr_comm ; <i1> [#uses=1] call void @llvm.stackrestore( i8* %savedstack ) nounwind %tmp4389 = icmp eq i32 %added_sets_1.0, 0 ; <i1> [#uses=1] %tmp4394 = icmp eq i32 %added_sets_2.0, 0 ; <i1> [#uses=1] %bothcond80 = and i1 %tmp4389, %tmp4394 ; <i1> [#uses=1] %bothcond81 = and i1 %bothcond80, %tmp.0.i420 ; <i1> [#uses=1] br i1 %bothcond81, label %bb4398, label %bb4397 Here is the case from 252.eon: bb290.i.i: ; preds = %bb23.i57.i.i, %bb8.i39.i.i, %bb100.i.i, %bb100.i.i, %bb85.i.i110 %myEOF.1.i.i = phi i1 [ true, %bb100.i.i ], [ true, %bb100.i.i ], [ true, %bb85.i.i110 ], [ true, %bb8.i39.i.i ], [ false, %bb23.i57.i.i ] ; <i1> [#uses=2] %i.4.i.i = phi i32 [ %i.1.i.i, %bb85.i.i110 ], [ %i.0.i.i, %bb100.i.i ], [ %i.0.i.i, %bb100.i.i ], [ %i.3.i.i, %bb8.i39.i.i ], [ %i.3.i.i, %bb23.i57.i.i ] ; <i32> [#uses=3] %tmp292.i.i = load i8* %tmp16.i.i100, align 1 ; <i8> [#uses=1] %tmp293.not.i.i = icmp ne i8 %tmp292.i.i, 0 ; <i1> [#uses=1] %bothcond.i.i = and i1 %tmp293.not.i.i, %myEOF.1.i.i ; <i1> [#uses=1] br i1 %bothcond.i.i, label %bb202.i.i, label %bb301.i.i Factoring out 3 common predecessors. On the path from any blocks other than bb23.i57.i.i, the load and compare are dead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@50096 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp126
-rw-r--r--test/Transforms/JumpThreading/and-cond.ll32
2 files changed, 138 insertions, 20 deletions
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index fd75c90d9a..653a83563d 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -57,8 +57,10 @@ namespace {
bool runOnFunction(Function &F);
bool ThreadBlock(BasicBlock *BB);
void ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB);
-
+ BasicBlock *FactorCommonPHIPreds(PHINode *PN, Constant *CstVal);
+
bool ProcessJumpOnPHI(PHINode *PN);
+ bool ProcessJumpOnLogicalPHI(PHINode *PN, bool isAnd);
};
char JumpThreading::ID = 0;
RegisterPass<JumpThreading> X("jump-threading", "Jump Threading");
@@ -85,6 +87,29 @@ bool JumpThreading::runOnFunction(Function &F) {
return EverChanged;
}
+/// FactorCommonPHIPreds - If there are multiple preds with the same incoming
+/// value for the PHI, factor them together so we get one block to thread for
+/// the whole group.
+/// This is important for things like "phi i1 [true, true, false, true, x]"
+/// where we only need to clone the block for the true blocks once.
+///
+BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Constant *CstVal) {
+ SmallVector<BasicBlock*, 16> CommonPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == CstVal)
+ CommonPreds.push_back(PN->getIncomingBlock(i));
+
+ if (CommonPreds.size() == 1)
+ return CommonPreds[0];
+
+ DOUT << " Factoring out " << CommonPreds.size()
+ << " common predecessors.\n";
+ return SplitBlockPredecessors(PN->getParent(),
+ &CommonPreds[0], CommonPreds.size(),
+ ".thr_comm", this);
+}
+
+
/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
/// thread across it.
static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
@@ -163,6 +188,23 @@ bool JumpThreading::ThreadBlock(BasicBlock *BB) {
if (PN && PN->getParent() == BB)
return ProcessJumpOnPHI(PN);
+ // If this is a conditional branch whose condition is and/or of a phi, try to
+ // simplify it.
+ if (BinaryOperator *CondI = dyn_cast<BinaryOperator>(Condition)) {
+ if ((CondI->getOpcode() == Instruction::And ||
+ CondI->getOpcode() == Instruction::Or) &&
+ isa<BranchInst>(BB->getTerminator())) {
+ if (PHINode *PN = dyn_cast<PHINode>(CondI->getOperand(0)))
+ if (PN->getParent() == BB &&
+ ProcessJumpOnLogicalPHI(PN, CondI->getOpcode() == Instruction::And))
+ return true;
+ if (PHINode *PN = dyn_cast<PHINode>(CondI->getOperand(1)))
+ if (PN->getParent() == BB &&
+ ProcessJumpOnLogicalPHI(PN, CondI->getOpcode() == Instruction::And))
+ return true;
+ }
+ }
+
return false;
}
@@ -196,9 +238,11 @@ bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
return false;
}
- // If so, we can actually do this threading. Figure out which predecessor and
- // which successor we are threading for.
- BasicBlock *PredBB = PN->getIncomingBlock(PredNo);
+ // If so, we can actually do this threading. Merge any common predecessors
+ // that will act the same.
+ BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
+
+ // Next, figure out which successor we are threading to.
BasicBlock *SuccBB;
if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
SuccBB = BI->getSuccessor(PredCst == ConstantInt::getFalse());
@@ -207,31 +251,73 @@ bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
SuccBB = SI->getSuccessor(SI->findCaseValue(PredCst));
}
- // If there are multiple preds with the same incoming value for the PHI,
- // factor them together so we get one block to thread for the whole group.
- // This is important for things like "phi i1 [true, true, false, true, x]"
- // where we only need to clone the block for the true blocks once.
- SmallVector<BasicBlock*, 16> CommonPreds;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == PredCst)
- CommonPreds.push_back(PN->getIncomingBlock(i));
- if (CommonPreds.size() != 1) {
- DOUT << " Factoring out " << CommonPreds.size()
- << " common predecessors.\n";
- PredBB = SplitBlockPredecessors(BB, &CommonPreds[0], CommonPreds.size(),
- ".thr_comm", this);
- }
-
+ // And finally, do it!
DOUT << " Threading edge from '" << PredBB->getNameStart() << "' to '"
<< SuccBB->getNameStart() << "' with cost: " << JumpThreadCost
<< ", across block:\n "
- << *BB;
+ << *BB << "\n";
ThreadEdge(BB, PredBB, SuccBB);
++NumThreads;
return true;
}
+/// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch
+/// whose condition is an AND/OR where one side is PN. If PN has constant
+/// operands that permit us to evaluate the condition for some operand, thread
+/// through the block. For example with:
+/// br (and X, phi(Y, Z, false))
+/// the predecessor corresponding to the 'false' will always jump to the false
+/// destination of the branch.
+///
+bool JumpThreading::ProcessJumpOnLogicalPHI(PHINode *PN, bool isAnd) {
+
+ // We can only do the simplification for phi nodes of 'false' with AND or
+ // 'true' with OR. See if we have any entries in the phi for this.
+ unsigned PredNo = ~0U;
+ ConstantInt *PredCst = ConstantInt::get(Type::Int1Ty, !isAnd);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (PN->getIncomingValue(i) == PredCst) {
+ PredNo = i;
+ break;
+ }
+ }
+
+ // If no match, bail out.
+ if (PredNo == ~0U)
+ return false;
+
+ // See if the cost of duplicating this block is low enough.
+ BasicBlock *BB = PN->getParent();
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+ if (JumpThreadCost > Threshold) {
+ DOUT << " Not threading BB '" << BB->getNameStart()
+ << "' - Cost is too high: " << JumpThreadCost << "\n";
+ return false;
+ }
+
+ // If so, we can actually do this threading. Merge any common predecessors
+ // that will act the same.
+ BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
+
+ // Next, figure out which successor we are threading to. If this was an AND,
+ // the constant must be FALSE, and we must be targeting the 'false' block.
+ // If this is an OR, the constant must be TRUE, and we must be targeting the
+ // 'true' block.
+ BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd);
+
+ // And finally, do it!
+ DOUT << " Threading edge through bool from '" << PredBB->getNameStart()
+ << "' to '" << SuccBB->getNameStart() << "' with cost: "
+ << JumpThreadCost << ", across block:\n "
+ << *BB << "\n";
+
+ ThreadEdge(BB, PredBB, SuccBB);
+ ++NumThreads;
+ return true;
+}
+
+
/// ThreadEdge - We have decided that it is safe and profitable to thread an
/// edge from PredBB to SuccBB across BB. Transform the IR to reflect this
/// change.
diff --git a/test/Transforms/JumpThreading/and-cond.ll b/test/Transforms/JumpThreading/and-cond.ll
new file mode 100644
index 0000000000..b01c4baffc
--- /dev/null
+++ b/test/Transforms/JumpThreading/and-cond.ll
@@ -0,0 +1,32 @@
+; RUN: llvm-as < %s | opt -jump-threading -mem2reg -instcombine -simplifycfg | llvm-dis | grep {ret i32 %v1}
+; There should be no uncond branches left.
+; RUN: llvm-as < %s | opt -jump-threading -mem2reg -instcombine -simplifycfg | llvm-dis | not grep {br label}
+
+declare i32 @f1()
+declare i32 @f2()
+declare void @f3()
+
+define i32 @test(i1 %cond, i1 %cond2) {
+ br i1 %cond, label %T1, label %F1
+
+T1:
+ %v1 = call i32 @f1()
+ br label %Merge
+
+F1:
+ %v2 = call i32 @f2()
+ br label %Merge
+
+Merge:
+ %A = phi i1 [true, %T1], [false, %F1]
+ %B = phi i32 [%v1, %T1], [%v2, %F1]
+ %C = and i1 %A, %cond2
+ br i1 %C, label %T2, label %F2
+
+T2:
+ call void @f3()
+ ret i32 %B
+
+F2:
+ ret i32 %B
+}