diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2013-09-11 05:09:42 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2013-09-11 05:09:42 +0000 |
commit | 11250c1194830aa4cec72788dcd04f06cfe33f50 (patch) | |
tree | 565b97822af39171d0bed699dfdbd42ab43e3cce /test/Transforms/LoopStrengthReduce | |
parent | f834dce7c7d13af85be5bc8b789c1d7793db8a58 (diff) | |
download | llvm-11250c1194830aa4cec72788dcd04f06cfe33f50.tar.gz llvm-11250c1194830aa4cec72788dcd04f06cfe33f50.tar.bz2 llvm-11250c1194830aa4cec72788dcd04f06cfe33f50.tar.xz |
Teach loop-idiom about address space pointer sizes
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190491 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms/LoopStrengthReduce')
3 files changed, 200 insertions, 0 deletions
diff --git a/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll b/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll new file mode 100644 index 0000000000..6333291aa6 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll @@ -0,0 +1,88 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s +; CHECK: bb1: +; CHECK: load double addrspace(1)* [[IV:%[^,]+]] +; CHECK: store double {{.*}}, double addrspace(1)* [[IV]] + +; CHECK-NOT: cast +; Make sure the GEP has the right index type +; CHECK: getelementptr double addrspace(1)* [[IV]], i16 1 +; CHECK: br {{.*}} label %bb1 + +; Make sure the GEP has the right index type +; CHECK: getelementptr double addrspace(1)* {{.*}}, i16 + + +; This test tests several things. The load and store should use the +; same address instead of having it computed twice, and SCEVExpander should +; be able to reconstruct the full getelementptr, despite it having a few +; obstacles set in its way. +; We only check that the inner loop (bb1-bb2) is "reduced" because LSR +; currently only operates on inner loops. + +target datalayout = "e-p:64:64:64-p1:16:16:16-n16:32:64" + +define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double addrspace(1)* nocapture %p) nounwind { +entry: + %tmp = icmp sgt i64 %n, 0 ; <i1> [#uses=1] + br i1 %tmp, label %bb.nph3, label %return + +bb.nph: ; preds = %bb2.preheader + %tmp1 = mul i64 %tmp16, %i.02 ; <i64> [#uses=1] + %tmp2 = mul i64 %tmp19, %i.02 ; <i64> [#uses=1] + br label %bb1 + +bb1: ; preds = %bb2, %bb.nph + %j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ] ; <i64> [#uses=3] + %tmp3 = add i64 %j.01, %tmp1 ; <i64> [#uses=1] + %tmp4 = add i64 %j.01, %tmp2 ; <i64> [#uses=1] + %z0 = add i64 %tmp3, 5203 + %tmp5 = getelementptr double addrspace(1)* %p, i64 %z0 ; <double addrspace(1)*> [#uses=1] + %tmp6 = load double addrspace(1)* %tmp5, align 8 ; <double> [#uses=1] + %tmp7 = fdiv double %tmp6, 2.100000e+00 ; <double> [#uses=1] + %z1 = add i64 %tmp4, 5203 + %tmp8 = getelementptr double addrspace(1)* %p, i64 %z1 ; <double addrspace(1)*> [#uses=1] + store double %tmp7, double addrspace(1)* %tmp8, align 8 + %tmp9 = add i64 %j.01, 1 ; <i64> [#uses=2] + br label %bb2 + +bb2: ; preds = %bb1 + %tmp10 = icmp slt i64 %tmp9, %m ; <i1> [#uses=1] + br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge + +bb2.bb3_crit_edge: ; preds = %bb2 + br label %bb3 + +bb3: ; preds = %bb2.preheader, %bb2.bb3_crit_edge + %tmp11 = add i64 %i.02, 1 ; <i64> [#uses=2] + br label %bb4 + +bb4: ; preds = %bb3 + %tmp12 = icmp slt i64 %tmp11, %n ; <i1> [#uses=1] + br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge + +bb4.return_crit_edge: ; preds = %bb4 + br label %bb4.return_crit_edge.split + +bb4.return_crit_edge.split: ; preds = %bb.nph3, %bb4.return_crit_edge + br label %return + +bb.nph3: ; preds = %entry + %tmp13 = icmp sgt i64 %m, 0 ; <i1> [#uses=1] + %tmp14 = mul i64 %n, 37 ; <i64> [#uses=1] + %tmp15 = mul i64 %tmp14, %o ; <i64> [#uses=1] + %tmp16 = mul i64 %tmp15, %q ; <i64> [#uses=1] + %tmp17 = mul i64 %n, 37 ; <i64> [#uses=1] + %tmp18 = mul i64 %tmp17, %o ; <i64> [#uses=1] + %tmp19 = mul i64 %tmp18, %q ; <i64> [#uses=1] + br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split + +bb.nph3.split: ; preds = %bb.nph3 + br label %bb2.preheader + +bb2.preheader: ; preds = %bb.nph3.split, %bb4 + %i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ] ; <i64> [#uses=3] + br i1 true, label %bb.nph, label %bb3 + +return: ; preds = %bb4.return_crit_edge.split, %entry + ret void +} diff --git a/test/Transforms/LoopStrengthReduce/address-space-loop.ll b/test/Transforms/LoopStrengthReduce/address-space-loop.ll new file mode 100644 index 0000000000..9c1b213b59 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/address-space-loop.ll @@ -0,0 +1,56 @@ +; RUN: opt -S -loop-reduce < %s | FileCheck %s + +; LSR shouldn't consider %t8 to be an interesting user of %t6, and it +; should be able to form pretty GEPs. + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +; Copy of uglygep with a different address space +; This tests expandAddToGEP uses the right smaller integer type for +; another address space +define void @Z4() nounwind { +; CHECK-LABEL: @Z4( +bb: + br label %bb3 + +bb1: ; preds = %bb3 + br i1 undef, label %bb10, label %bb2 + +bb2: ; preds = %bb1 + %t = add i16 %t4, 1 ; <i16> [#uses=1] + br label %bb3 + +bb3: ; preds = %bb2, %bb + %t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ] ; <i16> [#uses=3] + br label %bb1 + +; CHECK: bb10: +; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0 +; Host %t2 computation outside the loop. +; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8 addrspace(1)* undef, i16 %t4 +; CHECK-NEXT: br label %bb14 +bb10: ; preds = %bb9 + %t7 = icmp eq i16 %t4, 0 ; <i1> [#uses=1] + %t3 = add i16 %t4, 16 ; <i16> [#uses=1] + br label %bb14 + +; CHECK: bb14: +; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]] +; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef +; Fold %t3's add within the address. +; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float addrspace(1)* %t6, i16 4 +; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)* +; Use the induction variable (%t4) to access the right element +; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8 addrspace(1)* [[SCEVGEP2]], i16 %t4 +; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]] +; CHECK-NEXT: br label %bb14 +bb14: ; preds = %bb14, %bb10 + %t2 = getelementptr inbounds i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1] + store i8 undef, i8 addrspace(1)* %t2 + %t6 = load float addrspace(1)* addrspace(1)* undef + %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)* ; <i8*> [#uses=1] + %t9 = getelementptr inbounds i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1] + store i8 undef, i8 addrspace(1)* %t9 + br label %bb14 +} + diff --git a/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll b/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll new file mode 100644 index 0000000000..2c65261f57 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +; LSR shouldn't consider %t8 to be an interesting user of %t6, and it +; should be able to form pretty GEPs. + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +; Copy of uglygep with a different address space +; This tests expandAddToGEP uses the right smaller integer type for +; another address space +define void @Z4() nounwind { +; CHECK: define void @Z4 +bb: + br label %bb3 + +bb1: ; preds = %bb3 + br i1 undef, label %bb10, label %bb2 + +bb2: ; preds = %bb1 + %t = add i16 %t4, 1 ; <i16> [#uses=1] + br label %bb3 + +bb3: ; preds = %bb2, %bb + %t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ] ; <i16> [#uses=3] + br label %bb1 + +; CHECK: bb10: +; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0 +; Host %t2 computation outside the loop. +; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8 addrspace(1)* undef, i16 %t4 +; CHECK-NEXT: br label %bb14 +bb10: ; preds = %bb9 + %t7 = icmp eq i16 %t4, 0 ; <i1> [#uses=1] + %t3 = add i16 %t4, 16 ; <i16> [#uses=1] + br label %bb14 + +; CHECK: bb14: +; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]] +; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef +; Fold %t3's add within the address. +; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float addrspace(1)* %t6, i16 4 +; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)* +; Use the induction variable (%t4) to access the right element +; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8 addrspace(1)* [[SCEVGEP2]], i16 %t4 +; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]] +; CHECK-NEXT: br label %bb14 +bb14: ; preds = %bb14, %bb10 + %t2 = getelementptr inbounds i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1] + store i8 undef, i8 addrspace(1)* %t2 + %t6 = load float addrspace(1)* addrspace(1)* undef + %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)* ; <i8*> [#uses=1] + %t9 = getelementptr inbounds i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1] + store i8 undef, i8 addrspace(1)* %t9 + br label %bb14 +} + |