diff options
-rw-r--r-- | lib/Target/README.txt | 48 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyCFG.cpp | 32 | ||||
-rw-r--r-- | test/Transforms/SimplifyCFG/switch_create.ll | 17 | ||||
-rw-r--r-- | test/Transforms/SimplifyCFG/switch_formation.dbg.ll | 12 |
4 files changed, 39 insertions, 70 deletions
diff --git a/lib/Target/README.txt b/lib/Target/README.txt index b3bc749856..c0a2b760de 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -2274,51 +2274,3 @@ llc time when it gets inlined, because we can use smaller transfers. This also avoids partial register stalls in some important cases. //===---------------------------------------------------------------------===// - -With PR8575 we're now generating better code for: - -static _Bool foo(int x) { return x == 1; } -static _Bool bar(int x) { return x == 2; } -static _Bool baz(int x) { return x == 3; } - -_Bool quux(int x) { - return foo(x) || bar(x) || baz(x); -} - -$ clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer -_quux: ## @quux -## BB#0: ## %entry - decl %edi - cmpl $3, %edi - movb $1, %al - jb LBB0_2 -## BB#1: ## %lor.rhs - xorb %al, %al -LBB0_2: ## %lor.end - movzbl %al, %eax - andl $1, %eax - ret - -But this should use a "setcc" instead of materializing a 0/1 value -the hard way. This looks like #1: simplifycfg should transform the -switch into a sub+icmp+branch, and an instcombine hack to replace -the PHI with a zext of the branch condition. Here's the IR today: - -define zeroext i1 @quux(i32 %x) nounwind readnone ssp noredzone { -entry: - switch i32 %x, label %lor.rhs [ - i32 1, label %lor.end - i32 2, label %lor.end - i32 3, label %lor.end - ] - -lor.rhs: ; preds = %entry - br label %lor.end - -lor.end: ; preds = %lor.rhs, %entry, %entry, %entry - %0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ] - ret i1 %0 -} - -//===---------------------------------------------------------------------===// - diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index b9432c2e6e..bf753dc05d 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2237,6 +2237,34 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { return Changed; } +/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a +/// integer range comparison into a sub, an icmp and a branch. +static bool TurnSwitchRangeIntoICmp(SwitchInst *SI) { + assert(SI->getNumCases() > 2 && "Degenerate switch?"); + // We can do this transform if the switch consists of an ascending series + // and all cases point to the same destination. + for (unsigned I = 2, E = SI->getNumCases(); I != E; ++I) + if (SI->getSuccessor(I-1) != SI->getSuccessor(I) || + SI->getCaseValue(I-1)->getValue()+1 != SI->getCaseValue(I)->getValue()) + return false; + + Constant *Offset = ConstantExpr::getNeg(SI->getCaseValue(1)); + Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1); + + Value *Sub = BinaryOperator::CreateAdd(SI->getCondition(), Offset, "off", SI); + Value *Cmp = new ICmpInst(SI, ICmpInst::ICMP_ULT, Sub, NumCases, "switch"); + BranchInst::Create(SI->getSuccessor(1), SI->getDefaultDest(), Cmp, SI); + + // Prune obsolete incoming values off the successor's PHI nodes. + for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin(); + isa<PHINode>(BBI); ++BBI) { + for (unsigned I = 0, E = SI->getNumCases()-2; I != E; ++I) + cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); + } + SI->eraseFromParent(); + + return true; +} bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) { // If this switch is too complex to want to look at, ignore it. @@ -2260,6 +2288,10 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) { if (SI == &*BBI) if (FoldValueComparisonIntoPredecessors(SI)) return SimplifyCFG(BB) | true; + + // Try to transform the switch into an icmp and a branch. + if (TurnSwitchRangeIntoICmp(SI)) + return SimplifyCFG(BB) | true; return false; } diff --git a/test/Transforms/SimplifyCFG/switch_create.ll b/test/Transforms/SimplifyCFG/switch_create.ll index 4e199bc859..7c153e8682 100644 --- a/test/Transforms/SimplifyCFG/switch_create.ll +++ b/test/Transforms/SimplifyCFG/switch_create.ll @@ -141,14 +141,8 @@ UnifiedReturnBlock: ; preds = %shortcirc_done.4, %shortcirc_next.4 ret i1 %UnifiedRetVal ; CHECK: @test6 -; CHECK: switch i32 %tmp.2.i, label %shortcirc_next.4 [ -; CHECK: i32 14, label %UnifiedReturnBlock -; CHECK: i32 15, label %UnifiedReturnBlock -; CHECK: i32 16, label %UnifiedReturnBlock -; CHECK: i32 17, label %UnifiedReturnBlock -; CHECK: i32 18, label %UnifiedReturnBlock -; CHECK: i32 19, label %UnifiedReturnBlock -; CHECK: ] +; CHECK: %off = add i32 %tmp.2.i, -14 +; CHECK: %switch = icmp ult i32 %off, 6 } define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone { @@ -447,11 +441,8 @@ if.end: define zeroext i1 @test16(i32 %x) nounwind { entry: ; CHECK: @test16 -; CHECK: switch i32 %x, label %lor.rhs [ -; CHECK: i32 1, label %lor.end -; CHECK: i32 2, label %lor.end -; CHECK: i32 3, label %lor.end -; CHECK: ] +; CHECK: %off = add i32 %x, -1 +; CHECK: %switch = icmp ult i32 %off, 3 %cmp.i = icmp eq i32 %x, 1 br i1 %cmp.i, label %lor.end, label %lor.lhs.false diff --git a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll index 357ffb60e1..09bef648ab 100644 --- a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll +++ b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll @@ -13,15 +13,9 @@ declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind define i1 @t({ i32, i32 }* %I) { -; CHECK: t -; CHECK: switch i32 %tmp.2.i, label %shortcirc_next.4 [ -; CHECK: i32 14, label %UnifiedReturnBlock -; CHECK: i32 15, label %UnifiedReturnBlock -; CHECK: i32 16, label %UnifiedReturnBlock -; CHECK: i32 17, label %UnifiedReturnBlock -; CHECK: i32 18, label %UnifiedReturnBlock -; CHECK: i32 19, label %UnifiedReturnBlock -; CHECK: ] +; CHECK: @t +; CHECK: %off = add i32 %tmp.2.i, -14 +; CHECK: %switch = icmp ult i32 %off, 6 entry: %tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1 ; <i32*> [#uses=1] %tmp.2.i = load i32* %tmp.1.i ; <i32> [#uses=6] |