diff options
author | Tim Northover <Tim.Northover@arm.com> | 2012-08-30 10:17:45 +0000 |
---|---|---|
committer | Tim Northover <Tim.Northover@arm.com> | 2012-08-30 10:17:45 +0000 |
commit | c4a32e6596f3974a6c00322db1f5f31ea448bd58 (patch) | |
tree | fdb97641f761269861dedbe3e73b56ac986ad10e | |
parent | f52f6b9ecab7458d382e6cf431278a47a7ffbd8f (diff) | |
download | llvm-c4a32e6596f3974a6c00322db1f5f31ea448bd58.tar.gz llvm-c4a32e6596f3974a6c00322db1f5f31ea448bd58.tar.bz2 llvm-c4a32e6596f3974a6c00322db1f5f31ea448bd58.tar.xz |
Add support for moving pure S-register to NEON pipeline if desired
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162898 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 73 | ||||
-rw-r--r-- | test/CodeGen/ARM/domain-conv-vmovs.ll | 64 |
2 files changed, 135 insertions, 2 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 41d0c570bf..7b7b6e3395 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -3377,7 +3377,8 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { // converted. if (Subtarget.isCortexA9() && !isPredicated(MI) && (MI->getOpcode() == ARM::VMOVRS || - MI->getOpcode() == ARM::VMOVSR)) + MI->getOpcode() == ARM::VMOVSR || + MI->getOpcode() == ARM::VMOVS)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); // No other instructions can be swizzled, so just determine their domain. @@ -3490,10 +3491,78 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { .addReg(DReg, RegState::Undef) .addReg(SrcReg) .addImm(Lane)); - + // The destination must be marked as set. MIB.addReg(DstReg, RegState::Define | RegState::Implicit); break; + case ARM::VMOVS: { + if (Domain != ExeNEON) + break; + + // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + + unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; + DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); + DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); + + if (DSrc == DDst) { + // Destination can be: + // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) + MI->setDesc(get(ARM::VDUPLN32d)); + AddDefaultPred(MIB.addReg(DDst, RegState::Define) + .addReg(DDst, RegState::Undef) + .addImm(SrcLane)); + + // Neither the source or the destination are naturally represented any + // more, so add them in manually. + MIB.addReg(DstReg, RegState::Implicit | RegState::Define); + MIB.addReg(SrcReg, RegState::Implicit); + break; + } + + // In general there's no single instruction that can perform an S <-> S + // move in NEON space, but a pair of VEXT instructions *can* do the + // job. It turns out that the VEXTs needed will only use DSrc once, with + // the position based purely on the combination of lane-0 and lane-1 + // involved. For example + // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 + // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 + // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 + // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 + // + // Pattern of the MachineInstrs is: + // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) + MachineInstrBuilder NewMIB; + NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(ARM::VEXTd32), DDst); + NewMIB.addReg(SrcLane == 1 && DstLane == 1 ? DSrc : DDst, RegState::Undef); + NewMIB.addReg(SrcLane == 0 && DstLane == 0 ? DSrc : DDst, RegState::Undef); + NewMIB.addImm(1); + AddDefaultPred(NewMIB); + + if (SrcLane == DstLane) + NewMIB.addReg(SrcReg, RegState::Implicit); + + MI->setDesc(get(ARM::VEXTd32)); + MIB.addReg(DDst, RegState::Define); + MIB.addReg(SrcLane == 1 && DstLane == 0 ? DSrc : DDst, RegState::Undef); + MIB.addReg(SrcLane == 0 && DstLane == 1 ? DSrc : DDst, RegState::Undef); + MIB.addImm(1); + AddDefaultPred(MIB); + + if (SrcLane != DstLane) + MIB.addReg(SrcReg, RegState::Implicit); + + // As before, the original destination is no longer represented, add it + // implicitly. + MIB.addReg(DstReg, RegState::Define | RegState::Implicit); + break; + } } } diff --git a/test/CodeGen/ARM/domain-conv-vmovs.ll b/test/CodeGen/ARM/domain-conv-vmovs.ll new file mode 100644 index 0000000000..f1cd9f5840 --- /dev/null +++ b/test/CodeGen/ARM/domain-conv-vmovs.ll @@ -0,0 +1,64 @@ +; RUN: llc -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s + +define <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) { +; CHECK: test_vmovs_via_vext_lane0to0: + %vec = insertelement <2 x float> %in, float %arg, i32 0 + %res = fadd <2 x float> %vec, %vec + +; CHECK: vext.32 d1, d1, d0, #1 +; CHECK: vext.32 d1, d1, d1, #1 +; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1 + + ret <2 x float> %res +} + +define <2 x float> @test_vmovs_via_vext_lane0to1(float %arg, <2 x float> %in) { +; CHECK: test_vmovs_via_vext_lane0to1: + %vec = insertelement <2 x float> %in, float %arg, i32 1 + %res = fadd <2 x float> %vec, %vec + +; CHECK: vext.32 d1, d1, d1, #1 +; CHECK: vext.32 d1, d1, d0, #1 +; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1 + + ret <2 x float> %res +} + +define <2 x float> @test_vmovs_via_vext_lane1to0(float, float %arg, <2 x float> %in) { +; CHECK: test_vmovs_via_vext_lane1to0: + %vec = insertelement <2 x float> %in, float %arg, i32 0 + %res = fadd <2 x float> %vec, %vec + +; CHECK: vext.32 d1, d1, d1, #1 +; CHECK: vext.32 d1, d0, d1, #1 +; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1 + + ret <2 x float> %res +} + +define <2 x float> @test_vmovs_via_vext_lane1to1(float, float %arg, <2 x float> %in) { +; CHECK: test_vmovs_via_vext_lane1to1: + %vec = insertelement <2 x float> %in, float %arg, i32 1 + %res = fadd <2 x float> %vec, %vec + +; CHECK: vext.32 d1, d0, d1, #1 +; CHECK: vext.32 d1, d1, d1, #1 +; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1 + + ret <2 x float> %res +} + + +define float @test_vmovs_via_vdup(float, float %ret, float %lhs, float %rhs) { +; CHECK: test_vmovs_via_vdup: + + ; Do an operation (which will end up NEON because of +neonfp) to convince the + ; execution-domain pass that NEON is a good thing to use. + %res = fadd float %ret, %ret + ; It makes sense for LLVM to do the addition in d0 here, because it's going + ; to be returned. This means it will want a "vmov s0, s1": +; CHECK: vdup.32 d0, d0[1] + + ret float %res +} + |