summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorPete Cooper <peter_cooper@apple.com>2012-06-15 18:07:29 +0000
committerPete Cooper <peter_cooper@apple.com>2012-06-15 18:07:29 +0000
commitcbf73908f1d4cc1cb7104bfd451af30c177cb7a5 (patch)
treeefdcd3d1241379aec44f3a38002c4c7e9912d5ec /test
parent2f135d4c14d26d00deaa72fd0f36731968161e5e (diff)
downloadllvm-cbf73908f1d4cc1cb7104bfd451af30c177cb7a5.tar.gz
llvm-cbf73908f1d4cc1cb7104bfd451af30c177cb7a5.tar.bz2
llvm-cbf73908f1d4cc1cb7104bfd451af30c177cb7a5.tar.xz
Allow SROA to split up an array of vectors into multiple vectors, even when the vectors are dynamically indexed
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158529 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/Transforms/ScalarRepl/dynamic-vector-gep.ll83
1 files changed, 83 insertions, 0 deletions
diff --git a/test/Transforms/ScalarRepl/dynamic-vector-gep.ll b/test/Transforms/ScalarRepl/dynamic-vector-gep.ll
new file mode 100644
index 0000000000..48a0da9dab
--- /dev/null
+++ b/test/Transforms/ScalarRepl/dynamic-vector-gep.ll
@@ -0,0 +1,83 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK: @test1
+; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float>
+; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float>
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]]
+
+; Split the array but don't replace the memset with an insert
+; element as its not a constant offset.
+define float @test1(i32 %idx1, i32 %idx2) {
+entry:
+ %0 = alloca [4 x <4 x float>]
+ store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
+ %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
+ %cast = bitcast float* %ptr1 to i8*
+ call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i32 4, i1 false)
+ %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 1, i32 %idx2
+ %ret = load float* %ptr2
+ ret float %ret
+}
+
+; CHECK: @test2
+; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float>
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]]
+; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc]], i32 0, i32 %idx1
+; CHECK: store float 1.000000e+00, float* %ptr1
+; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc]], i32 0, i32 %idx2
+; CHECK: %ret = load float* %ptr2
+; CHECK: ret float %ret
+
+; Do SROA on the array when it has dynamic vector reads and writes.
+define float @test2(i32 %idx1, i32 %idx2) {
+entry:
+ %0 = alloca [4 x <4 x float>]
+ store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
+ %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
+ store float 1.0, float* %ptr1
+ %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
+ %ret = load float* %ptr2
+ ret float %ret
+}
+
+; CHECK: test3
+; CHECK: %0 = alloca [4 x <4 x float>]
+; CHECK-NOT: alloca
+
+; Don't do SROA on a dynamically indexed vector when it spans
+; more than one array element of the alloca array it is within.
+define float @test3(i32 %idx1, i32 %idx2) {
+entry:
+ %0 = alloca [4 x <4 x float>]
+ store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
+ %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
+ %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
+ store float 1.0, float* %ptr1
+ %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
+ %ret = load float* %ptr2
+ ret float %ret
+}
+
+; CHECK: @test4
+; CHECK: %0 = alloca [4 x <4 x float>]
+; CHECK-NOT: alloca
+
+; Don't do SROA as the is a second dynamically indexed array
+; which may span multiple elements of the alloca.
+define float @test4(i32 %idx1, i32 %idx2) {
+entry:
+ %0 = alloca [4 x <4 x float>]
+ store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
+ %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
+ %ptr2 = bitcast float* %ptr1 to [1 x <2 x float>]*
+ %ptr3 = getelementptr [1 x <2 x float>]* %ptr2, i32 0, i32 0, i32 %idx1
+ store float 1.0, float* %ptr1
+ %ptr4 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
+ %ret = load float* %ptr4
+ ret float %ret
+}
+
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)