diff options
author | Chandler Carruth <chandlerc@gmail.com> | 2014-02-26 08:25:02 +0000 |
---|---|---|
committer | Chandler Carruth <chandlerc@gmail.com> | 2014-02-26 08:25:02 +0000 |
commit | 5b95cec37c7370c5afb4f05c161f1f269f660fe2 (patch) | |
tree | 766123b290718be77735b7e2a37ce5d2af4dfece /test | |
parent | 38e90e3de1c05ccfd469275c08d403f638cdcce1 (diff) | |
download | llvm-5b95cec37c7370c5afb4f05c161f1f269f660fe2.tar.gz llvm-5b95cec37c7370c5afb4f05c161f1f269f660fe2.tar.bz2 llvm-5b95cec37c7370c5afb4f05c161f1f269f660fe2.tar.xz |
[SROA] Teach SROA how to handle pointers from address spaces other than
the default.
Based on the patch by Matt Arsenault, D1764!
I switched one place to use the more direct pointer type to compute the
desired address space, and I reworked the memcpy rewriting section to
reflect significant refactorings that this patch helped inspire.
Thanks to several of the folks who helped review and improve the patch
as well.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@202247 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/Transforms/SROA/address-spaces.ll | 68 | ||||
-rw-r--r-- | test/Transforms/SROA/basictest.ll | 21 | ||||
-rw-r--r-- | test/Transforms/SROA/vector-promotion.ll | 47 |
3 files changed, 135 insertions, 1 deletions
diff --git a/test/Transforms/SROA/address-spaces.ll b/test/Transforms/SROA/address-spaces.ll new file mode 100644 index 0000000000..847f2851bb --- /dev/null +++ b/test/Transforms/SROA/address-spaces.ll @@ -0,0 +1,68 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) +declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture readonly, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1) +declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1) + + +; Make sure an illegal bitcast isn't introduced +define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1)* %b) { +; CHECK-LABEL: @test_address_space_1_1( +; CHECK: load <2 x i64> addrspace(1)* %a, align 2 +; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2 +; CHECK: ret void + %aa = alloca <2 x i64>, align 16 + %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)* + %aaptr = bitcast <2 x i64>* %aa to i8* + call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false) + %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)* + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false) + ret void +} + +define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16* %b) { +; CHECK-LABEL: @test_address_space_1_0( +; CHECK: load <2 x i64> addrspace(1)* %a, align 2 +; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2 +; CHECK: ret void + %aa = alloca <2 x i64>, align 16 + %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)* + %aaptr = bitcast <2 x i64>* %aa to i8* + call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false) + %bptr = bitcast i16* %b to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false) + ret void +} + +define void @test_address_space_0_1(<2 x i64>* %a, i16 addrspace(1)* %b) { +; CHECK-LABEL: @test_address_space_0_1( +; CHECK: load <2 x i64>* %a, align 2 +; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2 +; CHECK: ret void + %aa = alloca <2 x i64>, align 16 + %aptr = bitcast <2 x i64>* %a to i8* + %aaptr = bitcast <2 x i64>* %aa to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false) + %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)* + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false) + ret void +} + +%struct.struct_test_27.0.13 = type { i32, float, i64, i8, [4 x i32] } + +; Function Attrs: nounwind +define void @copy_struct([5 x i64] %in.coerce) { +; CHECK-LABEL: @copy_struct( +; CHECK-NOT: memcpy +for.end: + %in = alloca %struct.struct_test_27.0.13, align 8 + %0 = bitcast %struct.struct_test_27.0.13* %in to [5 x i64]* + store [5 x i64] %in.coerce, [5 x i64]* %0, align 8 + %scevgep9 = getelementptr %struct.struct_test_27.0.13* %in, i32 0, i32 4, i32 0 + %scevgep910 = bitcast i32* %scevgep9 to i8* + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* undef, i8* %scevgep910, i32 16, i32 4, i1 false) + ret void +} + diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll index a3bf15fb7a..efd46da797 100644 --- a/test/Transforms/SROA/basictest.ll +++ b/test/Transforms/SROA/basictest.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -sroa -S | FileCheck %s ; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" declare void @llvm.lifetime.start(i64, i8* nocapture) declare void @llvm.lifetime.end(i64, i8* nocapture) @@ -404,6 +404,7 @@ entry: } declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) nounwind declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind @@ -1150,6 +1151,24 @@ entry: ; CHECK: ret } +define void @PR14105_as1({ [16 x i8] } addrspace(1)* %ptr) { +; Make sure this the right address space pointer is used for type check. +; CHECK-LABEL: @PR14105_as1( + +entry: + %a = alloca { [16 x i8] }, align 8 +; CHECK: alloca [16 x i8], align 8 + + %gep = getelementptr inbounds { [16 x i8] } addrspace(1)* %ptr, i64 -1 +; CHECK-NEXT: getelementptr inbounds { [16 x i8] } addrspace(1)* %ptr, i16 -1, i32 0, i64 0 + + %cast1 = bitcast { [16 x i8 ] } addrspace(1)* %gep to i8 addrspace(1)* + %cast2 = bitcast { [16 x i8 ] }* %a to i8* + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %cast1, i8* %cast2, i32 16, i32 8, i1 true) + ret void +; CHECK: ret +} + define void @PR14465() { ; Ensure that we don't crash when analyzing a alloca larger than the maximum ; integer type width (MAX_INT_BITS) supported by llvm (1048576*32 > (1<<23)-1). diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll index 4f084214d3..9c9f6a1d08 100644 --- a/test/Transforms/SROA/vector-promotion.ll +++ b/test/Transforms/SROA/vector-promotion.ll @@ -150,6 +150,53 @@ entry: ; CHECK-NEXT: ret } +declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) nounwind + +; Same as test4 with a different sized address space pointer source. +define i32 @test4_as1(<4 x i32> %x, <4 x i32> %y, <4 x i32> addrspace(1)* %z) { +; CHECK-LABEL: @test4_as1( +entry: + %a = alloca [2 x <4 x i32>] +; CHECK-NOT: alloca + + %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0 + store <4 x i32> %x, <4 x i32>* %a.x + %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1 + store <4 x i32> %y, <4 x i32>* %a.y +; CHECK-NOT: store + + %a.y.cast = bitcast <4 x i32>* %a.y to i8* + %z.cast = bitcast <4 x i32> addrspace(1)* %z to i8 addrspace(1)* + call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.y.cast, i8 addrspace(1)* %z.cast, i32 16, i32 1, i1 false) +; CHECK-NOT: memcpy + + %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2 + %a.tmp1.cast = bitcast i32* %a.tmp1 to i8* + %z.tmp1 = getelementptr inbounds <4 x i32> addrspace(1)* %z, i16 0, i16 2 + %z.tmp1.cast = bitcast i32 addrspace(1)* %z.tmp1 to i8 addrspace(1)* + call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.tmp1.cast, i8 addrspace(1)* %z.tmp1.cast, i32 4, i32 1, i1 false) + %tmp1 = load i32* %a.tmp1 + %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3 + %tmp2 = load i32* %a.tmp2 + %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0 + %tmp3 = load i32* %a.tmp3 +; CHECK-NOT: memcpy +; CHECK: %[[load:.*]] = load <4 x i32> addrspace(1)* %z +; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32> addrspace(1)* %z, i64 0, i64 2 +; CHECK-NEXT: %[[element_load:.*]] = load i32 addrspace(1)* %[[gep]] +; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2 +; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2 +; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3 +; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0 + + %tmp4 = add i32 %tmp1, %tmp2 + %tmp5 = add i32 %tmp3, %tmp4 + ret i32 %tmp5 +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: ret +} + define i32 @test5(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) { ; CHECK-LABEL: @test5( ; The same as the above, but with reversed source and destination for the |