summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp13
-rw-r--r--test/Transforms/LoopVectorize/multi-use-reduction-bug.ll42
2 files changed, 53 insertions, 2 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5e758713ed..f9f6b18940 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4191,13 +4191,22 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
continue;
}
- // Process instructions only once (termination).
+ // Process instructions only once (termination). Each reduction cycle
+ // value must only be used once, except by phi nodes and min/max
+ // reductions which are represented as a cmp followed by a select.
+ ReductionInstDesc IgnoredVal(false, 0);
if (VisitedInsts.insert(Usr)) {
if (isa<PHINode>(Usr))
PHIs.push_back(Usr);
else
NonPHIs.push_back(Usr);
- }
+ } else if (!isa<PHINode>(Usr) &&
+ ((!isa<FCmpInst>(Usr) &&
+ !isa<ICmpInst>(Usr) &&
+ !isa<SelectInst>(Usr)) ||
+ !isMinMaxSelectCmpPattern(Usr, IgnoredVal).IsReduction))
+ return false;
+
// Remember that we completed the cycle.
if (Usr == Phi)
FoundStartPHI = true;
diff --git a/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
new file mode 100644
index 0000000000..5fc5ed55a9
--- /dev/null
+++ b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
@@ -0,0 +1,42 @@
+; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; We must not vectorize this loop. %add55 is not reduction. Its value is used
+; multiple times.
+
+; PR18526
+
+; CHECK: multiple_use_of_value
+; CHECK-NOT: <2 x i32>
+
+define void @multiple_use_of_value() {
+entry:
+ %n = alloca i32, align 4
+ %k7 = alloca i32, align 4
+ %nf = alloca i32, align 4
+ %0 = load i32* %k7, align 4
+ %.neg1 = sub i32 0, %0
+ %n.promoted = load i32* %n, align 4
+ %nf.promoted = load i32* %nf, align 4
+ br label %for.body
+
+for.body:
+ %inc107 = phi i32 [ undef, %entry ], [ %inc10, %for.body ]
+ %inc6 = phi i32 [ %nf.promoted, %entry ], [ undef, %for.body ]
+ %add55 = phi i32 [ %n.promoted, %entry ], [ %add5, %for.body ]
+ %.neg2 = sub i32 0, %inc6
+ %add.neg = add i32 0, %add55
+ %add4.neg = add i32 %add.neg, %.neg1
+ %sub = add i32 %add4.neg, %.neg2
+ %add5 = add i32 %sub, %add55
+ %inc10 = add i32 %inc107, 1
+ %cmp = icmp ult i32 %inc10, 61
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ %add5.lcssa = phi i32 [ %add5, %for.body ]
+ store i32 %add5.lcssa, i32* %n, align 4
+ ret void
+}