diff options
author | Tim Northover <tnorthover@apple.com> | 2014-02-10 14:04:07 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2014-02-10 14:04:07 +0000 |
commit | 9ed30bb2303dc4676af9892f780a14a019d030c6 (patch) | |
tree | a2c854b5dc79975549dcdbe0498d187458061488 /test | |
parent | 5a2ae984073242d922850d23c859cc0439b54401 (diff) | |
download | llvm-9ed30bb2303dc4676af9892f780a14a019d030c6.tar.gz llvm-9ed30bb2303dc4676af9892f780a14a019d030c6.tar.bz2 llvm-9ed30bb2303dc4676af9892f780a14a019d030c6.tar.xz |
ARM: use LLVM IR to represent the vshrn operation
vshrn is just the combination of a right shift and a truncate (and the limits
on the immediate value actually mean the signedness of the shift doesn't
matter). Using that representation allows us to get rid of an ARM-specific
intrinsic, share more code with AArch64 and hopefully get better code out of
the mid-end optimisers.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201085 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/ARM/reg_sequence.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/ARM/vshrn.ll | 47 |
2 files changed, 43 insertions, 12 deletions
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 25484f4848..b245674c3c 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -34,9 +34,11 @@ entry: %12 = sext <4 x i16> %11 to <4 x i32> ; <<4 x i32>> [#uses=1] %13 = mul <4 x i32> %1, %9 ; <<4 x i32>> [#uses=1] %14 = mul <4 x i32> %3, %12 ; <<4 x i32>> [#uses=1] - %15 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %13, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1] - %16 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %14, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1] - %17 = shufflevector <4 x i16> %15, <4 x i16> %16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1] + %15 = lshr <4 x i32> %13, <i32 12, i32 12, i32 12, i32 12> + %trunc_15 = trunc <4 x i32> %15 to <4 x i16> + %16 = lshr <4 x i32> %14, <i32 12, i32 12, i32 12, i32 12> + %trunc_16 = trunc <4 x i32> %16 to <4 x i16> + %17 = shufflevector <4 x i16> %trunc_15, <4 x i16> %trunc_16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1] %18 = bitcast i16* %o_ptr to i8* ; <i8*> [#uses=1] tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17, i32 1) ret void diff --git a/test/CodeGen/ARM/vshrn.ll b/test/CodeGen/ARM/vshrn.ll index 40a94fee0d..cc936be829 100644 --- a/test/CodeGen/ARM/vshrn.ll +++ b/test/CodeGen/ARM/vshrn.ll @@ -4,29 +4,58 @@ define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind { ;CHECK-LABEL: vshrns8: ;CHECK: vshrn.i16 %tmp1 = load <8 x i16>* %A - %tmp2 = call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) - ret <8 x i8> %tmp2 + %tmp2 = lshr <8 x i16> %tmp1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> + %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8> + ret <8 x i8> %tmp3 } define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind { ;CHECK-LABEL: vshrns16: ;CHECK: vshrn.i32 %tmp1 = load <4 x i32>* %A - %tmp2 = call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) - ret <4 x i16> %tmp2 + %tmp2 = ashr <4 x i32> %tmp1, <i32 16, i32 16, i32 16, i32 16> + %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16> + ret <4 x i16> %tmp3 } define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind { ;CHECK-LABEL: vshrns32: ;CHECK: vshrn.i64 %tmp1 = load <2 x i64>* %A - %tmp2 = call <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) - ret <2 x i32> %tmp2 + %tmp2 = ashr <2 x i64> %tmp1, <i64 32, i64 32> + %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32> + ret <2 x i32> %tmp3 +} + +define <8 x i8> @vshrns8_bad(<8 x i16>* %A) nounwind { +; CHECK-LABEL: vshrns8_bad: +; CHECK: vshr.s16 +; CHECK: vmovn.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = ashr <8 x i16> %tmp1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9> + %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8> + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vshrns16_bad(<4 x i32>* %A) nounwind { +; CHECK-LABEL: vshrns16_bad: +; CHECK: vshr.u32 +; CHECK: vmovn.i32 + %tmp1 = load <4 x i32>* %A + %tmp2 = lshr <4 x i32> %tmp1, <i32 17, i32 17, i32 17, i32 17> + %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16> + ret <4 x i16> %tmp3 } -declare <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone +define <2 x i32> @vshrns32_bad(<2 x i64>* %A) nounwind { +; CHECK-LABEL: vshrns32_bad: +; CHECK: vshr.u64 +; CHECK: vmovn.i64 + %tmp1 = load <2 x i64>* %A + %tmp2 = lshr <2 x i64> %tmp1, <i64 33, i64 33> + %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32> + ret <2 x i32> %tmp3 +} define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind { ;CHECK-LABEL: vrshrns8: |