summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp9
-rw-r--r--test/Transforms/LoopUnroll/X86/partial.ll47
2 files changed, 53 insertions, 3 deletions
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 3290c6ff45..fc28fd2bdc 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -238,9 +238,12 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
}
uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 && Size > Threshold) {
- DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
- << " because size: " << Size << ">" << Threshold << "\n");
+ if (TripCount != 1 &&
+ (Size > Threshold || (Count != TripCount && Size > PartialThreshold))) {
+ if (Size > Threshold)
+ DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
+ << " because size: " << Size << ">" << Threshold << "\n");
+
bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
if (!AllowPartial && !(Runtime && TripCount == 0)) {
DEBUG(dbgs() << " will not try to unroll partially because "
diff --git a/test/Transforms/LoopUnroll/X86/partial.ll b/test/Transforms/LoopUnroll/X86/partial.ll
index 15867cbea0..75b9c3fb89 100644
--- a/test/Transforms/LoopUnroll/X86/partial.ll
+++ b/test/Transforms/LoopUnroll/X86/partial.ll
@@ -76,5 +76,52 @@ for.end: ; preds = %vector.body
ret void
}
+define zeroext i16 @test1(i16* nocapture readonly %arr, i32 %n) #0 {
+entry:
+ %cmp25 = icmp eq i32 %n, 0
+ br i1 %cmp25, label %for.end, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %reduction.026 = phi i16 [ %add14, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i16* %arr, i64 %indvars.iv
+ %0 = load i16* %arrayidx, align 2
+ %add = add i16 %0, %reduction.026
+ %sext = mul i64 %indvars.iv, 12884901888
+ %idxprom3 = ashr exact i64 %sext, 32
+ %arrayidx4 = getelementptr inbounds i16* %arr, i64 %idxprom3
+ %1 = load i16* %arrayidx4, align 2
+ %add7 = add i16 %add, %1
+ %sext28 = mul i64 %indvars.iv, 21474836480
+ %idxprom10 = ashr exact i64 %sext28, 32
+ %arrayidx11 = getelementptr inbounds i16* %arr, i64 %idxprom10
+ %2 = load i16* %arrayidx11, align 2
+ %add14 = add i16 %add7, %2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %reduction.0.lcssa = phi i16 [ 0, %entry ], [ %add14, %for.body ]
+ ret i16 %reduction.0.lcssa
+
+; This loop is too large to be partially unrolled (size=16)
+
+; CHECK-LABEL: @test1
+; CHECK: br
+; CHECK: br
+; CHECK: br
+; CHECK: br
+; CHECK-NOT: br
+
+; CHECK-NOUNRL-LABEL: @test1
+; CHECK-NOUNRL: br
+; CHECK-NOUNRL: br
+; CHECK-NOUNRL: br
+; CHECK-NOUNRL: br
+; CHECK-NOUNRL-NOT: br
+}
+
attributes #0 = { nounwind uwtable }