diff options
author | Jim Grosbach <grosbach@apple.com> | 2013-07-08 18:18:52 +0000 |
---|---|---|
committer | Jim Grosbach <grosbach@apple.com> | 2013-07-08 18:18:52 +0000 |
commit | dc2d418dd29ad9396aea06f2b72c9a7d29b30940 (patch) | |
tree | fee86f8ce5d436cac3a36e32d6fa71637efb781c /test | |
parent | 66f464ee266b31bb02058c49a5abe3a6b77f080b (diff) | |
download | llvm-dc2d418dd29ad9396aea06f2b72c9a7d29b30940.tar.gz llvm-dc2d418dd29ad9396aea06f2b72c9a7d29b30940.tar.bz2 llvm-dc2d418dd29ad9396aea06f2b72c9a7d29b30940.tar.xz |
ARM: Improve codegen for generic vselect.
Fall back to by-element insert rather than building it up on the stack.
rdar://14351991
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185846 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/ARM/vext.ll | 23 | ||||
-rw-r--r-- | test/CodeGen/ARM/vselect_imax.ll | 46 |
2 files changed, 39 insertions, 30 deletions
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll index f404eb8be5..ef22a3ba53 100644 --- a/test/CodeGen/ARM/vext.ll +++ b/test/CodeGen/ARM/vext.ll @@ -136,20 +136,26 @@ define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; We should ignore a build_vector with more than two sources. ; Use illegal <32 x i16> type to produce such a shuffle after legalizing types. -; Try to look for fallback to stack expansion. +; Try to look for fallback to by-element inserts. define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind { ;CHECK: test_multisource: -;CHECK: vst1.16 +;CHECK: vmov.16 [[REG:d[0-9]+]][0] +;CHECK: vmov.16 [[REG]][1] +;CHECK: vmov.16 [[REG]][2] +;CHECK: vmov.16 [[REG]][3] %tmp1 = load <32 x i16>* %B %tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24> ret <4 x i16> %tmp2 } ; We don't handle shuffles using more than half of a 128-bit vector. -; Again, test for fallback to stack expansion +; Again, test for fallback to by-element inserts. define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind { ;CHECK: test_largespan: -;CHECK: vst1.16 +;CHECK: vmov.16 [[REG:d[0-9]+]][0] +;CHECK: vmov.16 [[REG]][1] +;CHECK: vmov.16 [[REG]][2] +;CHECK: vmov.16 [[REG]][3] %tmp1 = load <8 x i16>* %B %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> ret <4 x i16> %tmp2 @@ -160,7 +166,14 @@ define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind { ; lowering loop can result otherwise). define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: test_illegal: -;CHECK: vst1.16 +;CHECK: vmov.16 [[REG:d[0-9]+]][0] +;CHECK: vmov.16 [[REG]][1] +;CHECK: vmov.16 [[REG]][2] +;CHECK: vmov.16 [[REG]][3] +;CHECK: vmov.16 [[REG2:d[0-9]+]][0] +;CHECK: vmov.16 [[REG2]][1] +;CHECK: vmov.16 [[REG2]][2] +;CHECK: vmov.16 [[REG2]][3] %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 13, i32 3, i32 2, i32 2, i32 9> diff --git a/test/CodeGen/ARM/vselect_imax.ll b/test/CodeGen/ARM/vselect_imax.ll index 7e79d6c68c..9744f4dde8 100644 --- a/test/CodeGen/ARM/vselect_imax.ll +++ b/test/CodeGen/ARM/vselect_imax.ll @@ -1,3 +1,4 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s ; Make sure that ARM backend with NEON handles vselect. @@ -20,11 +21,8 @@ define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2, %v0 = load %T0_10* %loadaddr %v1 = load %T0_10* %loadaddr2 %c = icmp slt %T0_10 %v0, %v1 -; CHECK: vst1 -; CHECK: vst1 -; CHECK: vst1 -; CHECK: vst1 -; CHECK: vld +; CHECK: vbsl +; CHECK: vbsl ; COST: func_blend10 ; COST: cost of 40 {{.*}} select %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1 @@ -39,10 +37,8 @@ define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2, %v0 = load %T0_14* %loadaddr %v1 = load %T0_14* %loadaddr2 %c = icmp slt %T0_14 %v0, %v1 -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb +; CHECK: vbsl +; CHECK: vbsl ; COST: func_blend14 ; COST: cost of 41 {{.*}} select %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1 @@ -54,13 +50,11 @@ define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2, ; CHECK: func_blend15: define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2, %T1_15* %blend, %T0_15* %storeaddr) { +; CHECK: vbsl +; CHECK: vbsl %v0 = load %T0_15* %loadaddr %v1 = load %T0_15* %loadaddr2 %c = icmp slt %T0_15 %v0, %v1 -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb ; COST: func_blend15 ; COST: cost of 82 {{.*}} select %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1 @@ -72,13 +66,11 @@ define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2, ; CHECK: func_blend18: define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2, %T1_18* %blend, %T0_18* %storeaddr) { +; CHECK: vbsl +; CHECK: vbsl %v0 = load %T0_18* %loadaddr %v1 = load %T0_18* %loadaddr2 %c = icmp slt %T0_18 %v0, %v1 -; CHECK: strh -; CHECK: strh -; CHECK: strh -; CHECK: strh ; COST: func_blend18 ; COST: cost of 19 {{.*}} select %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1 @@ -90,13 +82,13 @@ define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2, ; CHECK: func_blend19: define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2, %T1_19* %blend, %T0_19* %storeaddr) { +; CHECK: vbsl +; CHECK: vbsl +; CHECK: vbsl +; CHECK: vbsl %v0 = load %T0_19* %loadaddr %v1 = load %T0_19* %loadaddr2 %c = icmp slt %T0_19 %v0, %v1 -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb ; COST: func_blend19 ; COST: cost of 50 {{.*}} select %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1 @@ -108,13 +100,17 @@ define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2, ; CHECK: func_blend20: define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2, %T1_20* %blend, %T0_20* %storeaddr) { +; CHECK: vbsl +; CHECK: vbsl +; CHECK: vbsl +; CHECK: vbsl +; CHECK: vbsl +; CHECK: vbsl +; CHECK: vbsl +; CHECK: vbsl %v0 = load %T0_20* %loadaddr %v1 = load %T0_20* %loadaddr2 %c = icmp slt %T0_20 %v0, %v1 -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb ; COST: func_blend20 ; COST: cost of 100 {{.*}} select %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1 |