diff options
author | Chad Rosier <mcrosier@apple.com> | 2012-04-09 20:32:02 +0000 |
---|---|---|
committer | Chad Rosier <mcrosier@apple.com> | 2012-04-09 20:32:02 +0000 |
commit | 7f354557089d93cc681c440f9e064c906e1fbd58 (patch) | |
tree | 2e210b2ea7b3828fefea848ead70dcd2736098c9 /test/CodeGen/ARM/vrev.ll | |
parent | f31ceaf8b7b1c5092fe35e20732711f5e1074118 (diff) | |
download | llvm-7f354557089d93cc681c440f9e064c906e1fbd58.tar.gz llvm-7f354557089d93cc681c440f9e064c906e1fbd58.tar.bz2 llvm-7f354557089d93cc681c440f9e064c906e1fbd58.tar.xz |
When performing a truncating store, it's possible to rearrange the data
in-register, such that we can use a single vector store rather then a
series of scalar stores.
For func_4_8 the generated code
vldr d16, LCPI0_0
vmov d17, r0, r1
vadd.i16 d16, d17, d16
vmov.u16 r0, d16[3]
strb r0, [r2, #3]
vmov.u16 r0, d16[2]
strb r0, [r2, #2]
vmov.u16 r0, d16[1]
strb r0, [r2, #1]
vmov.u16 r0, d16[0]
strb r0, [r2]
bx lr
becomes
vldr d16, LCPI0_0
vmov d17, r0, r1
vadd.i16 d16, d17, d16
vuzp.8 d16, d17
vst1.32 {d16[0]}, [r2, :32]
bx lr
I'm not fond of how this combine pessimizes 2012-03-13-DAGCombineBug.ll,
but I couldn't think of a way to judiciously apply this combine.
This
ldrh r0, [r0, #4]
strh r0, [r1]
becomes
vldr d16, [r0]
vmov.u16 r0, d16[2]
vmov.32 d16[0], r0
vuzp.16 d16, d17
vst1.32 {d16[0]}, [r1, :32]
PR11158
rdar://10703339
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154340 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/ARM/vrev.ll')
-rw-r--r-- | test/CodeGen/ARM/vrev.ll | 6 |
1 files changed, 2 insertions, 4 deletions
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll index e154334970..122ec0357f 100644 --- a/test/CodeGen/ARM/vrev.ll +++ b/test/CodeGen/ARM/vrev.ll @@ -149,12 +149,10 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind { } ; The type <2 x i16> is legalized to <2 x i32> and need to be trunc-stored -; to <2 x i16> when stored to memory. Currently ARM scalarizes these stores. -; See PR 11158 +; to <2 x i16> when stored to memory. define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp { ; CHECK: test_vrev64: -; CHECK: vst1.16 -; CHECK: vst1.16 +; CHECK: vst1.32 entry: %0 = bitcast <4 x i16>* %source to <8 x i16>* %tmp2 = load <8 x i16>* %0, align 4 |