diff options
author | James Molloy <james.molloy@arm.com> | 2012-09-06 09:55:02 +0000 |
---|---|---|
committer | James Molloy <james.molloy@arm.com> | 2012-09-06 09:55:02 +0000 |
commit | ba8562af4440753ba6175ccd54d71f79f5c4f3dc (patch) | |
tree | b3a682afcaec10c5546b6c31f0e192659dc56b66 /test | |
parent | 486270aee6ffd2a0c3c2333a8a0091c29f037aae (diff) | |
download | llvm-ba8562af4440753ba6175ccd54d71f79f5c4f3dc.tar.gz llvm-ba8562af4440753ba6175ccd54d71f79f5c4f3dc.tar.bz2 llvm-ba8562af4440753ba6175ccd54d71f79f5c4f3dc.tar.xz |
Improve codegen for BUILD_VECTORs on ARM.
If we have a BUILD_VECTOR that is mostly a constant splat, it is often better to splat that constant then insertelement the non-constant lanes instead of insertelementing every lane from an undef base.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163304 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/ARM/vdup.ll | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll index 05332e4d8c..a8c224b438 100644 --- a/test/CodeGen/ARM/vdup.ll +++ b/test/CodeGen/ARM/vdup.ll @@ -261,3 +261,37 @@ define void @redundantVdup(<8 x i8>* %ptr) nounwind { store <8 x i8> %2, <8 x i8>* %ptr, align 8 ret void } + +define <4 x i32> @tdupi(i32 %x, i32 %y) { +;CHECK: tdupi +;CHECK: vdup.32 + %1 = insertelement <4 x i32> undef, i32 %x, i32 0 + %2 = insertelement <4 x i32> %1, i32 %x, i32 1 + %3 = insertelement <4 x i32> %2, i32 %x, i32 2 + %4 = insertelement <4 x i32> %3, i32 %y, i32 3 + ret <4 x i32> %4 +} + +define <4 x float> @tdupf(float %x, float %y) { +;CHECK: tdupf +;CHECK: vdup.32 + %1 = insertelement <4 x float> undef, float %x, i32 0 + %2 = insertelement <4 x float> %1, float %x, i32 1 + %3 = insertelement <4 x float> %2, float %x, i32 2 + %4 = insertelement <4 x float> %3, float %y, i32 3 + ret <4 x float> %4 +} + +; This test checks that when splatting an element from a vector into another, +; the value isn't moved out to GPRs first. +define <4 x i32> @tduplane(<4 x i32> %invec) { +;CHECK: tduplane +;CHECK-NOT: vmov {{.*}}, d16[1] +;CHECK: vdup.32 {{.*}}, d16[1] + %in = extractelement <4 x i32> %invec, i32 1 + %1 = insertelement <4 x i32> undef, i32 %in, i32 0 + %2 = insertelement <4 x i32> %1, i32 %in, i32 1 + %3 = insertelement <4 x i32> %2, i32 %in, i32 2 + %4 = insertelement <4 x i32> %3, i32 255, i32 3 + ret <4 x i32> %4 +} |