summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Trick <atrick@apple.com>2013-12-23 23:31:49 +0000
committerAndrew Trick <atrick@apple.com>2013-12-23 23:31:49 +0000
commitc7b0b7dc8f2424057b3a84f7b3eb2f893c24ed57 (patch)
treec7e6c806e485ddc70e74c4de32272fd52f35b90f
parent7ed2b702a209ba9780529adcf87ab8b7047483d4 (diff)
downloadllvm-c7b0b7dc8f2424057b3a84f7b3eb2f893c24ed57.tar.gz
llvm-c7b0b7dc8f2424057b3a84f7b3eb2f893c24ed57.tar.bz2
llvm-c7b0b7dc8f2424057b3a84f7b3eb2f893c24ed57.tar.xz
Add support to indvars for optimizing sadd.with.overflow.
Split sadd.with.overflow into add + sadd.with.overflow to allow analysis and optimization. This should ideally be done after InstCombine, which can perform code motion (eventually indvars should run after all canonical instcombines). We want ISEL to recombine the add and the check, at least on x86. This is currently under an option for reducing live induction variables: -liv-reduce. The next step is reducing liveness of IVs that are live out of the overflow check paths. Once the related optimizations are fully developed, reviewed and tested, I do expect this to become default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197926 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Transforms/Utils/SimplifyIndVar.h17
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp14
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp82
-rw-r--r--test/Transforms/IndVarSimplify/overflowcheck.ll56
4 files changed, 165 insertions, 4 deletions
diff --git a/include/llvm/Transforms/Utils/SimplifyIndVar.h b/include/llvm/Transforms/Utils/SimplifyIndVar.h
index 7e97e218fb..3c3de467c4 100644
--- a/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/include/llvm/Transforms/Utils/SimplifyIndVar.h
@@ -22,6 +22,7 @@
namespace llvm {
class CastInst;
+class DominatorTree;
class IVUsers;
class Loop;
class LPPassManager;
@@ -31,9 +32,25 @@ class ScalarEvolution;
/// Interface for visiting interesting IV users that are recognized but not
/// simplified by this utility.
class IVVisitor {
+protected:
+ const DominatorTree *DT;
+ bool ShouldSplitOverflowIntrinsics;
+
virtual void anchor();
public:
+ IVVisitor(): DT(NULL), ShouldSplitOverflowIntrinsics(false) {}
virtual ~IVVisitor() {}
+
+ const DominatorTree *getDomTree() const { return DT; }
+
+ bool shouldSplitOverflowInstrinsics() const {
+ return ShouldSplitOverflowIntrinsics;
+ }
+ void setSplitOverflowIntrinsics() {
+ ShouldSplitOverflowIntrinsics = true;
+ assert(DT && "Splitting overflow intrinsics requires a DomTree.");
+ }
+
virtual void visitCast(CastInst *Cast) = 0;
};
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 235aaaa6f8..c291f68bd6 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -63,6 +63,9 @@ static cl::opt<bool> VerifyIndvars(
"verify-indvars", cl::Hidden,
cl::desc("Verify the ScalarEvolution result after running indvars"));
+static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
+ cl::desc("Reduce live induction variables."));
+
namespace {
class IndVarSimplify : public LoopPass {
LoopInfo *LI;
@@ -643,8 +646,11 @@ namespace {
WideIVInfo WI;
WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
- const DataLayout *TData) :
- SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
+ const DataLayout *TData, const DominatorTree *DTree):
+ SE(SCEV), TD(TData) {
+ DT = DTree;
+ WI.NarrowIV = NarrowIV;
+ }
// Implement the interface used by simplifyUsersOfIV.
virtual void visitCast(CastInst *Cast);
@@ -1114,7 +1120,9 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
- WideIVVisitor WIV(CurrIV, SE, TD);
+ WideIVVisitor WIV(CurrIV, SE, TD, DT);
+ if (ReduceLiveIVs)
+ WIV.setSplitOverflowIntrinsics();
Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index bf3442aeaa..d1f6c5c62a 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -18,12 +18,16 @@
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -75,6 +79,9 @@ namespace {
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
bool IsSigned);
+
+ Instruction *splitOverflowIntrinsic(Instruction *IVUser,
+ const DominatorTree *DT);
};
}
@@ -263,6 +270,71 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
return true;
}
+/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow
+/// analysis and optimization.
+///
+/// \return A new value representing the non-overflowing add if possible,
+/// otherwise return the original value.
+Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser,
+ const DominatorTree *DT) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(IVUser);
+ if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow)
+ return IVUser;
+
+ // Find a branch guarded by the overflow check.
+ BranchInst *Branch = 0;
+ Instruction *AddVal = 0;
+ for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+ UI != E; ++UI) {
+ if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(*UI)) {
+ if (ExtractInst->getNumIndices() != 1)
+ continue;
+ if (ExtractInst->getIndices()[0] == 0)
+ AddVal = ExtractInst;
+ else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse())
+ Branch = dyn_cast<BranchInst>(ExtractInst->use_back());
+ }
+ }
+ if (!AddVal || !Branch)
+ return IVUser;
+
+ BasicBlock *ContinueBB = Branch->getSuccessor(1);
+ if (llvm::next(pred_begin(ContinueBB)) != pred_end(ContinueBB))
+ return IVUser;
+
+ // Check if all users of the add are provably NSW.
+ bool AllNSW = true;
+ for (Value::use_iterator UI = AddVal->use_begin(), E = AddVal->use_end();
+ UI != E; ++UI) {
+ if (Instruction *UseInst = dyn_cast<Instruction>(*UI)) {
+ BasicBlock *UseBB = UseInst->getParent();
+ if (PHINode *PHI = dyn_cast<PHINode>(UseInst))
+ UseBB = PHI->getIncomingBlock(UI);
+ if (!DT->dominates(ContinueBB, UseBB)) {
+ AllNSW = false;
+ break;
+ }
+ }
+ }
+ if (!AllNSW)
+ return IVUser;
+
+ // Go for it...
+ IRBuilder<> Builder(IVUser);
+ Instruction *AddInst = dyn_cast<Instruction>(
+ Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1)));
+
+ // The caller expects the new add to have the same form as the intrinsic. The
+ // IV operand position must be the same.
+ assert((AddInst->getOpcode() == Instruction::Add &&
+ AddInst->getOperand(0) == II->getOperand(0)) &&
+ "Bad add instruction created from overflow intrinsic.");
+
+ AddVal->replaceAllUsesWith(AddInst);
+ DeadInsts.push_back(AddVal);
+ return AddInst;
+}
+
/// pushIVUsers - Add all uses of Def to the current IV's worklist.
///
static void pushIVUsers(
@@ -334,8 +406,16 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
while (!SimpleIVUsers.empty()) {
std::pair<Instruction*, Instruction*> UseOper =
SimpleIVUsers.pop_back_val();
+ Instruction *UseInst = UseOper.first;
+
// Bypass back edges to avoid extra work.
- if (UseOper.first == CurrIV) continue;
+ if (UseInst == CurrIV) continue;
+
+ if (V && V->shouldSplitOverflowInstrinsics()) {
+ UseInst = splitOverflowIntrinsic(UseInst, V->getDomTree());
+ if (!UseInst)
+ continue;
+ }
Instruction *IVOperand = UseOper.second;
for (unsigned N = 0; IVOperand; ++N) {
diff --git a/test/Transforms/IndVarSimplify/overflowcheck.ll b/test/Transforms/IndVarSimplify/overflowcheck.ll
new file mode 100644
index 0000000000..2603f363ab
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/overflowcheck.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -indvars -liv-reduce -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; CHECK-LABEL: @addwithoverflow
+; CHECK-LABEL: loop1:
+; CHECK-NOT: zext
+; CHECK: add nsw
+; CHECK: @llvm.sadd.with.overflow
+; CHECK-LABEL: loop2:
+; CHECK-NOT: extractvalue
+; CHECK: add nuw nsw
+; CHECK: @llvm.sadd.with.overflow
+; CHECK-LABEL: loop3:
+; CHECK-NOT: extractvalue
+; CHECK: ret
+define i64 @addwithoverflow(i32 %n, i64* %a) {
+entry:
+ br label %loop0
+
+loop0:
+ %i = phi i32 [ 0, %entry ], [ %i1val, %loop3 ]
+ %s = phi i32 [ 0, %entry ], [ %addsval, %loop3 ]
+ %bc = icmp ult i32 %i, %n
+ br i1 %bc, label %loop1, label %exit
+
+loop1:
+ %zxt = zext i32 %i to i64
+ %ofs = shl nuw nsw i64 %zxt, 3
+ %gep = getelementptr i64* %a, i64 %zxt
+ %v = load i64* %gep, align 8
+ %truncv = trunc i64 %v to i32
+ %adds = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %s, i32 %truncv)
+ %ovflows = extractvalue { i32, i1 } %adds, 1
+ br i1 %ovflows, label %exit, label %loop2
+
+loop2:
+ %addsval = extractvalue { i32, i1 } %adds, 0
+ %i1 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i, i32 1)
+ %i1check = extractvalue { i32, i1 } %i1, 1
+ br i1 %i1check, label %exit, label %loop3
+
+loop3:
+ %i1val = extractvalue { i32, i1 } %i1, 0
+ %test = icmp slt i32 %i1val, %n
+ br i1 %test, label %return, label %loop0
+
+return:
+ %ret = zext i32 %addsval to i64
+ ret i64 %ret
+
+exit:
+ unreachable
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)