Teach loop-idiom about address space pointer sizes

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190491 91177308-0d34-0410-b5e6-96231b3b80d8
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2013-09-11 05:09:42 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2013-09-11 05:09:42 +0000
commit: 11250c1194830aa4cec72788dcd04f06cfe33f50 (patch)
tree: 565b97822af39171d0bed699dfdbd42ab43e3cce /test/Transforms/LoopStrengthReduce
parent: f834dce7c7d13af85be5bc8b789c1d7793db8a58 (diff)
download: llvm-11250c1194830aa4cec72788dcd04f06cfe33f50.tar.gz
llvm-11250c1194830aa4cec72788dcd04f06cfe33f50.tar.bz2
llvm-11250c1194830aa4cec72788dcd04f06cfe33f50.tar.xz
3 files changed, 200 insertions, 0 deletions
diff --git a/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll b/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
new file mode 100644
index 0000000000..6333291aa6
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
@@ -0,0 +1,88 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; CHECK: bb1:
+; CHECK: load double addrspace(1)* [[IV:%[^,]+]]
+; CHECK: store double {{.*}}, double addrspace(1)* [[IV]]
+
+; CHECK-NOT: cast
+; Make sure the GEP has the right index type
+; CHECK: getelementptr double addrspace(1)* [[IV]], i16 1
+; CHECK: br {{.*}} label %bb1
+
+; Make sure the GEP has the right index type
+; CHECK: getelementptr double addrspace(1)* {{.*}}, i16
+
+
+; This test tests several things. The load and store should use the
+; same address instead of having it computed twice, and SCEVExpander should
+; be able to reconstruct the full getelementptr, despite it having a few
+; obstacles set in its way.
+; We only check that the inner loop (bb1-bb2) is "reduced" because LSR
+; currently only operates on inner loops.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-n16:32:64"
+
+define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double addrspace(1)* nocapture %p) nounwind {
+entry:
+	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %bb.nph3, label %return
+
+bb.nph:		; preds = %bb2.preheader
+	%tmp1 = mul i64 %tmp16, %i.02		; <i64> [#uses=1]
+	%tmp2 = mul i64 %tmp19, %i.02		; <i64> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb2, %bb.nph
+	%j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
+	%tmp3 = add i64 %j.01, %tmp1		; <i64> [#uses=1]
+	%tmp4 = add i64 %j.01, %tmp2		; <i64> [#uses=1]
+        %z0 = add i64 %tmp3, 5203
+	%tmp5 = getelementptr double addrspace(1)* %p, i64 %z0		; <double addrspace(1)*> [#uses=1]
+	%tmp6 = load double addrspace(1)* %tmp5, align 8		; <double> [#uses=1]
+	%tmp7 = fdiv double %tmp6, 2.100000e+00		; <double> [#uses=1]
+        %z1 = add i64 %tmp4, 5203
+	%tmp8 = getelementptr double addrspace(1)* %p, i64 %z1		; <double addrspace(1)*> [#uses=1]
+	store double %tmp7, double addrspace(1)* %tmp8, align 8
+	%tmp9 = add i64 %j.01, 1		; <i64> [#uses=2]
+	br label %bb2
+
+bb2:		; preds = %bb1
+	%tmp10 = icmp slt i64 %tmp9, %m		; <i1> [#uses=1]
+	br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
+
+bb2.bb3_crit_edge:		; preds = %bb2
+	br label %bb3
+
+bb3:		; preds = %bb2.preheader, %bb2.bb3_crit_edge
+	%tmp11 = add i64 %i.02, 1		; <i64> [#uses=2]
+	br label %bb4
+
+bb4:		; preds = %bb3
+	%tmp12 = icmp slt i64 %tmp11, %n		; <i1> [#uses=1]
+	br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
+
+bb4.return_crit_edge:		; preds = %bb4
+	br label %bb4.return_crit_edge.split
+
+bb4.return_crit_edge.split:		; preds = %bb.nph3, %bb4.return_crit_edge
+	br label %return
+
+bb.nph3:		; preds = %entry
+	%tmp13 = icmp sgt i64 %m, 0		; <i1> [#uses=1]
+	%tmp14 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp15 = mul i64 %tmp14, %o		; <i64> [#uses=1]
+	%tmp16 = mul i64 %tmp15, %q		; <i64> [#uses=1]
+	%tmp17 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp18 = mul i64 %tmp17, %o		; <i64> [#uses=1]
+	%tmp19 = mul i64 %tmp18, %q		; <i64> [#uses=1]
+	br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
+
+bb.nph3.split:		; preds = %bb.nph3
+	br label %bb2.preheader
+
+bb2.preheader:		; preds = %bb.nph3.split, %bb4
+	%i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ]		; <i64> [#uses=3]
+	br i1 true, label %bb.nph, label %bb3
+
+return:		; preds = %bb4.return_crit_edge.split, %entry
+	ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/address-space-loop.ll b/test/Transforms/LoopStrengthReduce/address-space-loop.ll
new file mode 100644
index 0000000000..9c1b213b59
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/address-space-loop.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+
+; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
+; should be able to form pretty GEPs.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Copy of uglygep with a different address space
+; This tests expandAddToGEP uses the right smaller integer type for
+; another address space
+define void @Z4() nounwind {
+; CHECK-LABEL: @Z4(
+bb:
+  br label %bb3
+
+bb1:                                              ; preds = %bb3
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %t = add i16 %t4, 1                         ; <i16> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ]      ; <i16> [#uses=3]
+  br label %bb1
+
+; CHECK: bb10:
+; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0
+; Host %t2 computation outside the loop.
+; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8 addrspace(1)* undef, i16 %t4
+; CHECK-NEXT: br label %bb14
+bb10:                                             ; preds = %bb9
+  %t7 = icmp eq i16 %t4, 0                    ; <i1> [#uses=1]
+  %t3 = add i16 %t4, 16                     ; <i16> [#uses=1]
+  br label %bb14
+
+; CHECK: bb14:
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]]
+; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef
+; Fold %t3's add within the address.
+; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float addrspace(1)* %t6, i16 4
+; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)*
+; Use the induction variable (%t4) to access the right element
+; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8 addrspace(1)* [[SCEVGEP2]], i16 %t4
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]]
+; CHECK-NEXT: br label %bb14
+bb14:                                             ; preds = %bb14, %bb10
+  %t2 = getelementptr inbounds i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t2
+  %t6 = load float addrspace(1)* addrspace(1)* undef
+  %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)*              ; <i8*> [#uses=1]
+  %t9 = getelementptr inbounds i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t9
+  br label %bb14
+}
+
diff --git a/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll b/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll
new file mode 100644
index 0000000000..2c65261f57
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
+; should be able to form pretty GEPs.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Copy of uglygep with a different address space
+; This tests expandAddToGEP uses the right smaller integer type for
+; another address space
+define void @Z4() nounwind {
+; CHECK: define void @Z4
+bb:
+  br label %bb3
+
+bb1:                                              ; preds = %bb3
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %t = add i16 %t4, 1                         ; <i16> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ]      ; <i16> [#uses=3]
+  br label %bb1
+
+; CHECK: bb10:
+; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0
+; Host %t2 computation outside the loop.
+; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8 addrspace(1)* undef, i16 %t4
+; CHECK-NEXT: br label %bb14
+bb10:                                             ; preds = %bb9
+  %t7 = icmp eq i16 %t4, 0                    ; <i1> [#uses=1]
+  %t3 = add i16 %t4, 16                     ; <i16> [#uses=1]
+  br label %bb14
+
+; CHECK: bb14:
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]]
+; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef
+; Fold %t3's add within the address.
+; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float addrspace(1)* %t6, i16 4
+; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)*
+; Use the induction variable (%t4) to access the right element
+; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8 addrspace(1)* [[SCEVGEP2]], i16 %t4
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]]
+; CHECK-NEXT: br label %bb14
+bb14:                                             ; preds = %bb14, %bb10
+  %t2 = getelementptr inbounds i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t2
+  %t6 = load float addrspace(1)* addrspace(1)* undef
+  %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)*              ; <i8*> [#uses=1]
+  %t9 = getelementptr inbounds i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t9
+  br label %bb14
+}
+
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2013-09-11 05:09:42 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2013-09-11 05:09:42 +0000
commit	11250c1194830aa4cec72788dcd04f06cfe33f50 (patch)
tree	565b97822af39171d0bed699dfdbd42ab43e3cce /test/Transforms/LoopStrengthReduce
parent	f834dce7c7d13af85be5bc8b789c1d7793db8a58 (diff)
download	llvm-11250c1194830aa4cec72788dcd04f06cfe33f50.tar.gz llvm-11250c1194830aa4cec72788dcd04f06cfe33f50.tar.bz2 llvm-11250c1194830aa4cec72788dcd04f06cfe33f50.tar.xz