summaryrefslogtreecommitdiff
path: root/test/Transforms/LoopVectorize/reduction.ll
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-05-07 21:55:37 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-05-07 21:55:37 +0000
commit280e1df8585e62f0c801de8e4b625a7e73178d85 (patch)
treefeb0e6301bebf0284f1d2d0aa79110d117ffecf2 /test/Transforms/LoopVectorize/reduction.ll
parent88535dda9050377cc52dfffa20664484b0bb9f7f (diff)
downloadllvm-280e1df8585e62f0c801de8e4b625a7e73178d85.tar.gz
llvm-280e1df8585e62f0c801de8e4b625a7e73178d85.tar.bz2
llvm-280e1df8585e62f0c801de8e4b625a7e73178d85.tar.xz
LoopVectorizer: Improve reduction variable identification
The two nested loops were confusing and also conservative in identifying reduction variables. This patch replaces them by a worklist based approach. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181369 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms/LoopVectorize/reduction.ll')
-rw-r--r--test/Transforms/LoopVectorize/reduction.ll119
1 files changed, 119 insertions, 0 deletions
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index 08b7b27e42..286b736c92 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -323,3 +323,122 @@ for.end: ; preds = %for.body, %entry
%x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
ret i32 %x.0.lcssa
}
+
+; We can vectorize conditional reductions with multi-input phis.
+; CHECK: reduction_conditional
+; CHECK: fadd <4 x float>
+
+define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+ %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
+ %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv
+ %0 = load float* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds float* %B, i64 %indvars.iv
+ %1 = load float* %arrayidx2, align 4
+ %cmp3 = fcmp ogt float %0, %1
+ br i1 %cmp3, label %if.then, label %for.inc
+
+if.then:
+ %cmp6 = fcmp ogt float %1, 1.000000e+00
+ br i1 %cmp6, label %if.then8, label %if.else
+
+if.then8:
+ %add = fadd fast float %sum.033, %0
+ br label %for.inc
+
+if.else:
+ %cmp14 = fcmp ogt float %0, 2.000000e+00
+ br i1 %cmp14, label %if.then16, label %for.inc
+
+if.then16:
+ %add19 = fadd fast float %sum.033, %1
+ br label %for.inc
+
+for.inc:
+ %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ %sum.033, %if.else ], [ %sum.033, %for.body ]
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 128
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ %sum.1.lcssa = phi float [ %sum.1, %for.inc ]
+ ret float %sum.1.lcssa
+}
+
+; We can't vectorize reductions with phi inputs from outside the reduction.
+; CHECK: noreduction_phi
+; CHECK-NOT: fadd <4 x float>
+define float @noreduction_phi(float* %A, float* %B, float* %C, float %S) {
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+ %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
+ %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv
+ %0 = load float* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds float* %B, i64 %indvars.iv
+ %1 = load float* %arrayidx2, align 4
+ %cmp3 = fcmp ogt float %0, %1
+ br i1 %cmp3, label %if.then, label %for.inc
+
+if.then:
+ %cmp6 = fcmp ogt float %1, 1.000000e+00
+ br i1 %cmp6, label %if.then8, label %if.else
+
+if.then8:
+ %add = fadd fast float %sum.033, %0
+ br label %for.inc
+
+if.else:
+ %cmp14 = fcmp ogt float %0, 2.000000e+00
+ br i1 %cmp14, label %if.then16, label %for.inc
+
+if.then16:
+ %add19 = fadd fast float %sum.033, %1
+ br label %for.inc
+
+for.inc:
+ %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ 0.000000e+00, %if.else ], [ %sum.033, %for.body ]
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 128
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ %sum.1.lcssa = phi float [ %sum.1, %for.inc ]
+ ret float %sum.1.lcssa
+}
+
+; We can't vectorize reductions that feed another header PHI.
+; CHECK: noredux_header_phi
+; CHECK-NOT: fadd <4 x float>
+
+define float @noredux_header_phi(float* %A, float* %B, float* %C, float %S) {
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %sum2.09 = phi float [ 0.000000e+00, %entry ], [ %add1, %for.body ]
+ %sum.08 = phi float [ %S, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
+ %0 = load float* %arrayidx, align 4
+ %add = fadd fast float %sum.08, %0
+ %add1 = fadd fast float %sum2.09, %add
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 128
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ %add1.lcssa = phi float [ %add1, %for.body ]
+ %add.lcssa = phi float [ %add, %for.body ]
+ %add2 = fadd fast float %add.lcssa, %add1.lcssa
+ ret float %add2
+}