From 2825b8f0ea989e0d50f4287970f1461a1050ffb5 Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Mon, 10 Feb 2014 03:16:22 +0000 Subject: [AArch64]Implement the copy of two FPR8 registers by using FMOVss of two FPR32 registers in copyPhysReg. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201061 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrInfo.cpp | 10 ++++++++++ test/CodeGen/AArch64/neon-copy.ll | 13 +++++++++++++ 2 files changed, 23 insertions(+) diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index ba185a436f..9c27f82d2b 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -132,6 +132,16 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addImm(16); return; } + } else if (AArch64::FPR8RegClass.contains(DestReg, SrcReg)) { + // The copy of two FPR8 registers is implemented by the copy of two FPR32 + const TargetRegisterInfo *TRI = &getRegisterInfo(); + unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_8, + &AArch64::FPR32RegClass); + unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_8, + &AArch64::FPR32RegClass); + BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst) + .addReg(Src); + return; } else if (AArch64::FPR16RegClass.contains(DestReg, SrcReg)) { // The copy of two FPR16 registers is implemented by the copy of two FPR32 const TargetRegisterInfo *TRI = &getRegisterInfo(); diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll index 8d4a3388bb..b4d55df529 100644 --- a/test/CodeGen/AArch64/neon-copy.ll +++ b/test/CodeGen/AArch64/neon-copy.ll @@ -1291,6 +1291,19 @@ entry: ret <2 x i64> %vecinit2 } +declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>) + +; This case tests the copy of two FPR8 registers, which is implemented by fmov +; of two FPR32 registers. +define <1 x i8> @test_copy_FPR8_FPR8(<1 x i8> %a, <1 x i8> %b) { +; CHECK-LABEL: test_copy_FPR8_FPR8: +; CHECK: usqadd b1, b0 +; CHECK-NEXT: fmov s0, s1 +entry: + %vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %b, <1 x i8> %a) + ret <1 x i8> %vsqadd2.i +} + declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>) define <1 x i16> @test_copy_FPR16_FPR16(<1 x i16> %a, <1 x i16> %b) { -- cgit v1.2.3