summaryrefslogtreecommitdiff
path: root/test/Transforms/LoopVectorize
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-05-15 01:44:30 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-05-15 01:44:30 +0000
commit1386692ef64d3151da8986589eadf0c58aba5c50 (patch)
tree0a0d7b40736b61bca369b8658fa8a669659dc97c /test/Transforms/LoopVectorize
parenta88d974ce274152d2f8f28660ba277906bde2384 (diff)
downloadllvm-1386692ef64d3151da8986589eadf0c58aba5c50.tar.gz
llvm-1386692ef64d3151da8986589eadf0c58aba5c50.tar.bz2
llvm-1386692ef64d3151da8986589eadf0c58aba5c50.tar.xz
LoopVectorize: Hoist conditional loads if possible
InstCombine can be uncooperative to vectorization and sink loads into conditional blocks. This prevents vectorization. Undo this optimization if there are unconditional memory accesses to the same addresses in the loop. radar://13815763 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181860 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms/LoopVectorize')
-rw-r--r--test/Transforms/LoopVectorize/hoist-loads.ll69
1 files changed, 69 insertions, 0 deletions
diff --git a/test/Transforms/LoopVectorize/hoist-loads.ll b/test/Transforms/LoopVectorize/hoist-loads.ll
new file mode 100644
index 0000000000..fad17350cd
--- /dev/null
+++ b/test/Transforms/LoopVectorize/hoist-loads.ll
@@ -0,0 +1,69 @@
+; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@A = common global [1024 x float] zeroinitializer, align 16
+@B = common global [1024 x float] zeroinitializer, align 16
+
+; Make sure we can vectorize in the presence of hoistable conditional loads.
+; CHECK: hoist_cond_load
+; CHECK: load <2 x float>
+
+define void @hoist_cond_load() {
+entry:
+ br label %for.body
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
+ %arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
+ %0 = load float* %arrayidx2, align 4
+ %cmp3 = fcmp oeq float %0, 0.000000e+00
+ br i1 %cmp3, label %if.end9, label %if.else
+
+if.else:
+ %1 = load float* %arrayidx, align 4
+ br label %if.end9
+
+if.end9:
+ %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
+ store float %tmp.0, float* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; However, we can't hoist loads whose address we have not seen unconditionally
+; accessed.
+; CHECK: dont_hoist_cond_load
+; CHECK-NOT: load <2 x float>
+
+define void @dont_hoist_cond_load() {
+entry:
+ br label %for.body
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
+ %arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
+ %0 = load float* %arrayidx2, align 4
+ %cmp3 = fcmp oeq float %0, 0.000000e+00
+ br i1 %cmp3, label %if.end9, label %if.else
+
+if.else:
+ %1 = load float* %arrayidx, align 4
+ br label %if.end9
+
+if.end9:
+ %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
+ store float %tmp.0, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}