diff options
author | Pete Cooper <peter_cooper@apple.com> | 2012-06-17 03:58:26 +0000 |
---|---|---|
committer | Pete Cooper <peter_cooper@apple.com> | 2012-06-17 03:58:26 +0000 |
commit | 80f020a34ac60c7552c8e9b9c1f4b84c63941b57 (patch) | |
tree | 91021bdb245066182be157c59e32c53b5ab2c2a6 /test | |
parent | e04690e092fdc4d27a8642775892293f4ae6ede3 (diff) | |
download | llvm-80f020a34ac60c7552c8e9b9c1f4b84c63941b57.tar.gz llvm-80f020a34ac60c7552c8e9b9c1f4b84c63941b57.tar.bz2 llvm-80f020a34ac60c7552c8e9b9c1f4b84c63941b57.tar.xz |
Now that SROA can form alloca's for dynamic vector accesses, further improve it to be able to replace operations on these vector alloca's with insert/extract element insts
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158623 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/Transforms/ScalarRepl/dynamic-vector-gep.ll | 95 |
1 files changed, 62 insertions, 33 deletions
diff --git a/test/Transforms/ScalarRepl/dynamic-vector-gep.ll b/test/Transforms/ScalarRepl/dynamic-vector-gep.ll index af7f4398d1..565cd76164 100644 --- a/test/Transforms/ScalarRepl/dynamic-vector-gep.ll +++ b/test/Transforms/ScalarRepl/dynamic-vector-gep.ll @@ -4,12 +4,14 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 target triple = "x86_64-apple-darwin10.0.0" ; CHECK: @test1 -; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float> -; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float> -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]] +; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float> +; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]] +; CHECK: memset +; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2 ; Split the array but don't replace the memset with an insert ; element as its not a constant offset. +; The load, however, can be replaced with an extract element. define float @test1(i32 %idx1, i32 %idx2) { entry: %0 = alloca [4 x <4 x float>] @@ -23,13 +25,8 @@ entry: } ; CHECK: @test2 -; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float> -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]] -; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc]], i32 0, i32 %idx1 -; CHECK: store float 1.000000e+00, float* %ptr1 -; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc]], i32 0, i32 %idx2 -; CHECK: %ret = load float* %ptr2 -; CHECK: ret float %ret +; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 +; CHECK: extractelement <4 x float> %[[ins]], i32 %idx2 ; Do SROA on the array when it has dynamic vector reads and writes. define float @test2(i32 %idx1, i32 %idx2) { @@ -61,13 +58,34 @@ entry: ret float %ret } -; CHECK: @test4 +; CHECK: test4 +; CHECK: insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 +; CHECK: extractelement <16 x float> %0, i32 %idx2 + +; Don't do SROA on a dynamically indexed vector when it spans +; more than one array element of the alloca array it is within. +; However, unlike test3, the store is on the vector type +; so SROA will convert the large alloca into the large vector +; type and do all accesses with insert/extract element +define float @test4(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca [4 x <4 x float>] + %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>* + store <16 x float> zeroinitializer, <16 x float>* %bigvec + %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1 + store float 1.0, float* %ptr1 + %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2 + %ret = load float* %ptr2 + ret float %ret +} + +; CHECK: @test5 ; CHECK: %0 = alloca [4 x <4 x float>] ; CHECK-NOT: alloca ; Don't do SROA as the is a second dynamically indexed array ; which may span multiple elements of the alloca. -define float @test4(i32 %idx1, i32 %idx2) { +define float @test5(i32 %idx1, i32 %idx2) { entry: %0 = alloca [4 x <4 x float>] store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0 @@ -80,15 +98,9 @@ entry: ret float %ret } -; CHECK: test5 -; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float> -; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float> -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]] -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc1]] -; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc0]], i32 0, i32 %idx1 -; CHECK: store float 1.000000e+00, float* %ptr1 -; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc1]], i32 0, i32 %idx2 -; CHECK: %ret = load float* %ptr2 +; CHECK: test6 +; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 +; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2 %vector.pair = type { %vector.anon, %vector.anon } %vector.anon = type { %vector } @@ -99,7 +111,7 @@ entry: ; the original GEP, just the indices it needs to get to the correct offset of ; some type, not necessarily the dynamic vector. ; This test makes sure we don't have this crash. -define float @test5(i32 %idx1, i32 %idx2) { +define float @test6(i32 %idx1, i32 %idx2) { entry: %0 = alloca %vector.pair store %vector.pair zeroinitializer, %vector.pair* %0 @@ -110,21 +122,15 @@ entry: ret float %ret } -; CHECK: test6 -; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float> -; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float> -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]] -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc1]] -; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc0]], i32 0, i32 %idx1 -; CHECK: store float 1.000000e+00, float* %ptr1 -; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc1]], i32 0, i32 %idx2 -; CHECK: %ret = load float* %ptr2 +; CHECK: test7 +; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 +; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2 %array.pair = type { [2 x %array.anon], %array.anon } %array.anon = type { [2 x %vector] } -; This is the same as test5 and tests the same crash, but on arrays. -define float @test6(i32 %idx1, i32 %idx2) { +; This is the same as test6 and tests the same crash, but on arrays. +define float @test7(i32 %idx1, i32 %idx2) { entry: %0 = alloca %array.pair store %array.pair zeroinitializer, %array.pair* %0 @@ -135,4 +141,27 @@ entry: ret float %ret } +; CHECK: test8 +; CHECK: %[[offset1:[\.a-z0-9]*]] = add i32 %idx1, 1 +; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %[[offset1]] +; CHECK: %[[offset2:[\.a-z0-9]*]] = add i32 %idx2, 2 +; CHECK: extractelement <4 x float> %[[ins]], i32 %[[offset2]] + +; Do SROA on the vector when it has dynamic vector reads and writes +; from a non-zero offset. +define float @test8(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca <4 x float> + store <4 x float> zeroinitializer, <4 x float>* %0 + %ptr1 = getelementptr <4 x float>* %0, i32 0, i32 1 + %ptr2 = bitcast float* %ptr1 to <3 x float>* + %ptr3 = getelementptr <3 x float>* %ptr2, i32 0, i32 %idx1 + store float 1.0, float* %ptr3 + %ptr4 = getelementptr <4 x float>* %0, i32 0, i32 2 + %ptr5 = bitcast float* %ptr4 to <2 x float>* + %ptr6 = getelementptr <2 x float>* %ptr5, i32 0, i32 %idx2 + %ret = load float* %ptr6 + ret float %ret +} + declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) |