LoopVectorize: Vectorize all accesses in address space zero with unit stride

We can vectorize them because in the case where we wrap in the address space the unvectorized code would have had to access a pointer value of zero which is undefined behavior in address space zero according to the LLVM IR semantics. (Thank you Duncan, for pointing this out to me). Fixes PR16592. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186088 91177308-0d34-0410-b5e6-96231b3b80d8
author: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-07-11 15:21:55 +0000
committer: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-07-11 15:21:55 +0000
commit: 11eb51e23935e22e1cb7b346c45713e8c9169c84 (patch)
tree: 5a2fd7b42be639e60baad696a2a46aae78f2ceb1 /test/Transforms/LoopVectorize
parent: 12fa02841ca1f2115d38aadd3e5d7afe08c65337 (diff)
download: llvm-11eb51e23935e22e1cb7b346c45713e8c9169c84.tar.gz
llvm-11eb51e23935e22e1cb7b346c45713e8c9169c84.tar.bz2
llvm-11eb51e23935e22e1cb7b346c45713e8c9169c84.tar.xz
1 files changed, 61 insertions, 0 deletions
diff --git a/test/Transforms/LoopVectorize/safegep.ll b/test/Transforms/LoopVectorize/safegep.ll
new file mode 100644
index 0000000000..46ec28beea
--- /dev/null
+++ b/test/Transforms/LoopVectorize/safegep.ll
@@ -0,0 +1,61 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1  < %s |  FileCheck %s
+target datalayout = "e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f16:16:16-f32:32:32-f64:32:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+
+
+; We can vectorize this code because if the address computation would wrap then
+; a load from 0 would take place which is undefined behaviour in address space 0
+; according to LLVM IR semantics.
+
+; PR16592
+
+; CHECK: safe
+; CHECK: <4 x float>
+
+define void @safe(float* %A, float* %B, float %K) {
+entry:
+  br label %"<bb 3>"
+
+"<bb 3>":
+  %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
+  %pp3 = getelementptr float* %A, i32 %i_15
+  %D.1396_10 = load float* %pp3, align 4
+  %pp24 = getelementptr float* %B, i32 %i_15
+  %D.1398_15 = load float* %pp24, align 4
+  %D.1399_17 = fadd float %D.1398_15, %K
+  %D.1400_18 = fmul float %D.1396_10, %D.1399_17
+  store float %D.1400_18, float* %pp3, align 4
+  %i_19 = add nsw i32 %i_15, 1
+  %exitcond = icmp ne i32 %i_19, 64
+  br i1 %exitcond, label %"<bb 3>", label %return
+
+return:
+  ret void
+}
+
+; In a non-default address space we don't have this rule.
+
+; CHECK: notsafe
+; CHECK-NOT: <4 x float>
+
+define void @notsafe(float addrspace(5) * %A, float* %B, float %K) {
+entry:
+  br label %"<bb 3>"
+
+"<bb 3>":
+  %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
+  %pp3 = getelementptr float addrspace(5) * %A, i32 %i_15
+  %D.1396_10 = load float addrspace(5) * %pp3, align 4
+  %pp24 = getelementptr float* %B, i32 %i_15
+  %D.1398_15 = load float* %pp24, align 4
+  %D.1399_17 = fadd float %D.1398_15, %K
+  %D.1400_18 = fmul float %D.1396_10, %D.1399_17
+  store float %D.1400_18, float addrspace(5) * %pp3, align 4
+  %i_19 = add nsw i32 %i_15, 1
+  %exitcond = icmp ne i32 %i_19, 64
+  br i1 %exitcond, label %"<bb 3>", label %return
+
+return:
+  ret void
+}
+
+
author	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-07-11 15:21:55 +0000
committer	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-07-11 15:21:55 +0000
commit	11eb51e23935e22e1cb7b346c45713e8c9169c84 (patch)
tree	5a2fd7b42be639e60baad696a2a46aae78f2ceb1 /test/Transforms/LoopVectorize
parent	12fa02841ca1f2115d38aadd3e5d7afe08c65337 (diff)
download	llvm-11eb51e23935e22e1cb7b346c45713e8c9169c84.tar.gz llvm-11eb51e23935e22e1cb7b346c45713e8c9169c84.tar.bz2 llvm-11eb51e23935e22e1cb7b346c45713e8c9169c84.tar.xz