summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDevang Patel <dpatel@apple.com>2008-09-09 21:41:07 +0000
committerDevang Patel <dpatel@apple.com>2008-09-09 21:41:07 +0000
commitd22a849282c45bbf7eb1734c274294d81e49e3a8 (patch)
tree524d3ef9f5fdb159b4d64473f2d9ee4a6bb5cee4
parentbd6dc7a086142fec46b28b8c1f4e75a0d9500f6d (diff)
downloadllvm-d22a849282c45bbf7eb1734c274294d81e49e3a8.tar.gz
llvm-d22a849282c45bbf7eb1734c274294d81e49e3a8.tar.bz2
llvm-d22a849282c45bbf7eb1734c274294d81e49e3a8.tar.xz
if loop induction variable is always sign or zero extended then
extend the type of induction variable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@56017 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp119
-rw-r--r--test/Transforms/IndVarsSimplify/2008-09-02-IVType.ll58
2 files changed, 176 insertions, 1 deletions
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index bfda9cda17..76a07d4601 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -93,6 +93,8 @@ namespace {
void RewriteLoopExitValues(Loop *L, SCEV *IterationCount);
void DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts);
+
+ void OptimizeCanonicalIVType(Loop *L);
};
}
@@ -597,7 +599,122 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
#endif
DeleteTriviallyDeadInstructions(DeadInsts);
-
+ OptimizeCanonicalIVType(L);
assert(L->isLCSSAForm());
return Changed;
}
+
+/// OptimizeCanonicalIVType - If loop induction variable is always
+/// sign or zero extended then extend the type of induction
+/// variable.
+void IndVarSimplify::OptimizeCanonicalIVType(Loop *L) {
+ PHINode *PH = L->getCanonicalInductionVariable();
+ if (!PH) return;
+
+ // Check loop iteration count.
+ SCEVHandle IC = SE->getIterationCount(L);
+ if (isa<SCEVCouldNotCompute>(IC)) return;
+ SCEVConstant *IterationCount = dyn_cast<SCEVConstant>(IC);
+ if (!IterationCount) return;
+
+ unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0));
+ unsigned BackEdge = IncomingEdge^1;
+
+ // Check IV uses. If all IV uses are either SEXT or ZEXT (except
+ // IV increment instruction) then this IV is suitable for this
+ // transformstion.
+ bool isSEXT = false;
+ BinaryOperator *Incr = NULL;
+ const Type *NewType = NULL;
+ for(Value::use_iterator UI = PH->use_begin(), UE = PH->use_end();
+ UI != UE; ++UI) {
+ const Type *CandidateType = NULL;
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(UI))
+ CandidateType = ZI->getDestTy();
+ else if (SExtInst *SI = dyn_cast<SExtInst>(UI)) {
+ CandidateType = SI->getDestTy();
+ isSEXT = true;
+ }
+ else if ((Incr = dyn_cast<BinaryOperator>(UI))) {
+ // Validate IV increment instruction.
+ if (PH->getIncomingValue(BackEdge) == Incr)
+ continue;
+ }
+ if (!CandidateType) {
+ NewType = NULL;
+ break;
+ }
+ if (!NewType)
+ NewType = CandidateType;
+ else if (NewType != CandidateType) {
+ NewType = NULL;
+ break;
+ }
+ }
+
+ // IV uses are not suitable then avoid this transformation.
+ if (!NewType || !Incr)
+ return;
+
+ // IV increment instruction has two uses, one is loop exit condition
+ // and second is the IV (phi node) itself.
+ ICmpInst *Exit = NULL;
+ for(Value::use_iterator II = Incr->use_begin(), IE = Incr->use_end();
+ II != IE; ++II) {
+ if (PH == *II) continue;
+ Exit = dyn_cast<ICmpInst>(*II);
+ break;
+ }
+ if (!Exit) return;
+ ConstantInt *EV = dyn_cast<ConstantInt>(Exit->getOperand(0));
+ if (!EV)
+ EV = dyn_cast<ConstantInt>(Exit->getOperand(1));
+ if (!EV) return;
+
+ // Check iteration count max value to avoid loops that wrap around IV.
+ APInt ICount = IterationCount->getValue()->getValue();
+ if (ICount.isNegative()) return;
+ uint32_t BW = PH->getType()->getPrimitiveSizeInBits();
+ APInt Max = (isSEXT ? APInt::getSignedMaxValue(BW) : APInt::getMaxValue(BW));
+ if (ICount.getZExtValue() > Max.getZExtValue()) return;
+
+ // Extend IV type.
+
+ SCEVExpander Rewriter(*SE, *LI);
+ Value *NewIV = Rewriter.getOrInsertCanonicalInductionVariable(L,NewType);
+ PHINode *NewPH = cast<PHINode>(NewIV);
+ Instruction *NewIncr = cast<Instruction>(NewPH->getIncomingValue(BackEdge));
+
+ // Replace all SEXT or ZEXT uses.
+ SmallVector<Instruction *, 4> PHUses;
+ for(Value::use_iterator UI = PH->use_begin(), UE = PH->use_end();
+ UI != UE; ++UI) {
+ Instruction *I = cast<Instruction>(UI);
+ PHUses.push_back(I);
+ }
+ while (!PHUses.empty()){
+ Instruction *Use = PHUses.back(); PHUses.pop_back();
+ if (Incr == Use) continue;
+
+ SE->deleteValueFromRecords(Use);
+ Use->replaceAllUsesWith(NewIV);
+ Use->eraseFromParent();
+ }
+
+ // Replace exit condition.
+ ConstantInt *NEV = ConstantInt::get(NewType, EV->getZExtValue());
+ Instruction *NE = new ICmpInst(Exit->getPredicate(),
+ NewIncr, NEV, "new.exit",
+ Exit->getParent()->getTerminator());
+ SE->deleteValueFromRecords(Exit);
+ Exit->replaceAllUsesWith(NE);
+ Exit->eraseFromParent();
+
+ // Remove old IV and increment instructions.
+ SE->deleteValueFromRecords(PH);
+ PH->removeIncomingValue((unsigned)0);
+ PH->removeIncomingValue((unsigned)0);
+ SE->deleteValueFromRecords(Incr);
+ Incr->eraseFromParent();
+}
+
diff --git a/test/Transforms/IndVarsSimplify/2008-09-02-IVType.ll b/test/Transforms/IndVarsSimplify/2008-09-02-IVType.ll
new file mode 100644
index 0000000000..8111cbe3a4
--- /dev/null
+++ b/test/Transforms/IndVarsSimplify/2008-09-02-IVType.ll
@@ -0,0 +1,58 @@
+; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep sext | count 1
+; ModuleID = '<stdin>'
+
+ %struct.App1Marker = type <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>
+ %struct.ComponentInstanceRecord = type <{ [1 x i32] }>
+ %struct.DCPredictors = type { [5 x i16] }
+ %struct.DecodeTable = type { i16, i16, i16, i16, i8**, i8** }
+ %struct.ICMDataProcRecord = type <{ i16 (i8**, i32, i32)*, i32 }>
+ %struct.JPEGBitStream = type { i8*, i32, i32, i32, i32, i32, %struct.App1Marker*, i8*, i32, i16, i16, i32 }
+ %struct.JPEGGlobals = type { [2048 x i8], %struct.JPEGBitStream, i8*, i32, i32, %struct.ComponentInstanceRecord*, %struct.ComponentInstanceRecord*, i32, %struct.OpaqueQTMLMutex*, %struct.Rect, i32, i32, %struct.SharedGlobals, %struct.DCPredictors, i8, i8, void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, %struct.YUVGeneralParams, i16, i16, i32, [5 x i16*], [5 x %struct.DecodeTable*], [5 x %struct.DecodeTable*], [5 x i8], [5 x i8], [4 x [65 x i16]], [4 x %struct.DecodeTable], [4 x %struct.DecodeTable], [4 x i8*], [4 x i8*], i16, i16, i32, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, [18 x i8], [18 x i8], [18 x i8], [18 x i8], i32, i32, i8**, i8**, i8, i8, i8, i8, i16, i16, %struct.App1Marker*, i8, i8, i8, i8, i32**, i8*, i16*, i8*, i16*, i8, [3 x i8], i32, [3 x i32], [3 x i32], [3 x i32], [3 x i32], [3 x i32], [3 x i16*], [3 x i16*], [3 x i8**], [3 x %struct.DecodeTable*], [3 x %struct.DecodeTable*], [3 x i32], i32, [3 x i16*], i32, i32, i32, [3 x i32], i8, i8, i8, i8, %struct.ICMDataProcRecord*, i32, i32, i8**, i8**, i8**, i8**, i32, i32, i8*, i32, i32, i16*, i16*, i8*, i32, i32, i32, i32, i32, i32, i32, [16 x <2 x i64>], [1280 x i8], i8 }
+ %struct.OpaqueQTMLMutex = type opaque
+ %struct.Rect = type { i16, i16, i16, i16 }
+ %struct.SharedDGlobals = type { %struct.DecodeTable, %struct.DecodeTable, %struct.DecodeTable, %struct.DecodeTable }
+ %struct.SharedEGlobals = type { i8**, i8**, i8**, i8** }
+ %struct.SharedGlobals = type { %struct.SharedEGlobals*, %struct.SharedDGlobals* }
+ %struct.YUVGeneralParams = type { i16*, i8*, i8*, i8*, i8*, i8*, void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, i16, i16, i16, [6 x i8], void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, i16, i16 }
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i16 (%struct.JPEGGlobals*)* @ExtractBufferedBlocksIgnored to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i16 @ExtractBufferedBlocksIgnored(%struct.JPEGGlobals* %globp) signext nounwind {
+entry:
+ %tmp4311 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 70 ; <i32*> [#uses=1]
+ %tmp4412 = load i32* %tmp4311, align 16 ; <i32> [#uses=2]
+ %tmp4613 = icmp sgt i32 %tmp4412, 0 ; <i1> [#uses=1]
+ br i1 %tmp4613, label %bb, label %bb49
+
+bb: ; preds = %bb28, %entry
+ %component.09 = phi i16 [ 0, %entry ], [ %tmp37, %bb28 ] ; <i16> [#uses=2]
+ %tmp12 = sext i16 %component.09 to i32 ; <i32> [#uses=2]
+ %tmp6 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 77, i32 %tmp12 ; <i16**> [#uses=2]
+ %tmp7 = load i16** %tmp6, align 4 ; <i16*> [#uses=2]
+ %tmp235 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 71, i32 %tmp12 ; <i32*> [#uses=1]
+ %tmp246 = load i32* %tmp235, align 4 ; <i32> [#uses=2]
+ %tmp267 = icmp sgt i32 %tmp246, 0 ; <i1> [#uses=1]
+ br i1 %tmp267, label %bb8, label %bb28
+
+bb8: ; preds = %bb8, %bb
+ %indvar = phi i32 [ 0, %bb ], [ %indvar.next2, %bb8 ] ; <i32> [#uses=3]
+ %theDCTBufferIter.01.rec = shl i32 %indvar, 6 ; <i32> [#uses=1]
+ %tmp10.rec = add i32 %theDCTBufferIter.01.rec, 64 ; <i32> [#uses=1]
+ %tmp10 = getelementptr i16* %tmp7, i32 %tmp10.rec ; <i16*> [#uses=1]
+ %i.02 = trunc i32 %indvar to i16 ; <i16> [#uses=1]
+ %tmp13 = add i16 %i.02, 1 ; <i16> [#uses=1]
+ %phitmp = sext i16 %tmp13 to i32 ; <i32> [#uses=1]
+ %tmp26 = icmp slt i32 %phitmp, %tmp246 ; <i1> [#uses=1]
+ %indvar.next2 = add i32 %indvar, 1 ; <i32> [#uses=1]
+ br i1 %tmp26, label %bb8, label %bb28
+
+bb28: ; preds = %bb8, %bb
+ %theDCTBufferIter.0.lcssa = phi i16* [ %tmp7, %bb ], [ %tmp10, %bb8 ] ; <i16*> [#uses=1]
+ store i16* %theDCTBufferIter.0.lcssa, i16** %tmp6, align 4
+ %tmp37 = add i16 %component.09, 1 ; <i16> [#uses=2]
+ %phitmp15 = sext i16 %tmp37 to i32 ; <i32> [#uses=1]
+ %tmp46 = icmp slt i32 %phitmp15, 42 ; <i1> [#uses=1]
+ br i1 %tmp46, label %bb, label %bb49
+
+bb49: ; preds = %bb28, %entry
+ ret i16 0
+}