diff options
Diffstat (limited to 'lib/Target/PowerPC/PPCInstrInfo.cpp')
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.cpp | 199 |
1 files changed, 199 insertions, 0 deletions
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index d9907a0baf..408dd06b22 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -165,6 +165,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, case PPC::RESTORE_CR: case PPC::RESTORE_CRBIT: case PPC::LVX: + case PPC::LXVD2X: case PPC::RESTORE_VRSAVE: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). @@ -190,6 +191,7 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, case PPC::SPILL_CR: case PPC::SPILL_CRBIT: case PPC::STVX: + case PPC::STXVD2X: case PPC::SPILL_VRSAVE: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). @@ -655,6 +657,47 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { + // We can end up with self copies and similar things as a result of VSX copy + // legalization. Promote (or just ignore) them here. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + if (PPC::F8RCRegClass.contains(DestReg) && + PPC::VSLRCRegClass.contains(SrcReg)) { + unsigned SuperReg = + TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass); + + if (SrcReg == SuperReg) + return; + + DestReg = SuperReg; + } else if (PPC::VRRCRegClass.contains(DestReg) && + PPC::VSHRCRegClass.contains(SrcReg)) { + unsigned SuperReg = + TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass); + + if (SrcReg == SuperReg) + return; + + DestReg = SuperReg; + } else if (PPC::F8RCRegClass.contains(SrcReg) && + PPC::VSLRCRegClass.contains(DestReg)) { + unsigned SuperReg = + TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass); + + if (DestReg == SuperReg) + return; + + SrcReg = SuperReg; + } else if (PPC::VRRCRegClass.contains(SrcReg) && + PPC::VSHRCRegClass.contains(DestReg)) { + unsigned SuperReg = + TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass); + + if (DestReg == SuperReg) + return; + + SrcReg = SuperReg; + } + unsigned Opc; if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::OR; @@ -666,6 +709,14 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = PPC::MCRF; else if (PPC::VRRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::VOR; + else if (PPC::VSRCRegClass.contains(DestReg, SrcReg)) + // FIXME: There are really two different ways this can be done, and we + // should pick the better one depending on the situation: + // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only + // issue in VSU pipeline 0. + // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but + // can go to either pipeline. + Opc = PPC::XXLOR; else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; else @@ -731,6 +782,12 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); NonRI = true; + } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { assert(TM.getSubtargetImpl()->isDarwin() && "VRSAVE only needs spill/restore on Darwin"); @@ -818,6 +875,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg), FrameIdx)); NonRI = true; + } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg), + FrameIdx)); + NonRI = true; } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { assert(TM.getSubtargetImpl()->isDarwin() && "VRSAVE only needs spill/restore on Darwin"); @@ -1485,6 +1546,144 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { } } + +#undef DEBUG_TYPE +#define DEBUG_TYPE "ppc-vsx-copy" + +namespace llvm { + void initializePPCVSXCopyPass(PassRegistry&); +} + +namespace { + // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers + // (Altivec and scalar floating-point registers), we need to transform the + // copies into subregister copies with other restrictions. + struct PPCVSXCopy : public MachineFunctionPass { + static char ID; + PPCVSXCopy() : MachineFunctionPass(ID) { + initializePPCVSXCopyPass(*PassRegistry::getPassRegistry()); + } + + const PPCTargetMachine *TM; + const PPCInstrInfo *TII; + + bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC, + MachineRegisterInfo &MRI) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + return RC->hasSubClassEq(MRI.getRegClass(Reg)); + } else if (RC->contains(Reg)) { + return true; + } + + return false; + } + + bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI); + } + + bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI); + } + + bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI); + } + +protected: + bool processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); + I != IE; ++I) { + MachineInstr *MI = I; + if (!MI->isFullCopy()) + continue; + + MachineOperand &DstMO = MI->getOperand(0); + MachineOperand &SrcMO = MI->getOperand(1); + + if ( IsVSReg(DstMO.getReg(), MRI) && + !IsVSReg(SrcMO.getReg(), MRI)) { + // This is a copy *to* a VSX register from a non-VSX register. + Changed = true; + + const TargetRegisterClass *SrcRC = + IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass : + &PPC::VSLRCRegClass; + assert((IsF8Reg(SrcMO.getReg(), MRI) || + IsVRReg(SrcMO.getReg(), MRI)) && + "Unknown source for a VSX copy"); + + unsigned NewVReg = MRI.createVirtualRegister(SrcRC); + BuildMI(MBB, MI, MI->getDebugLoc(), + TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg) + .addImm(1) // add 1, not 0, because there is no implicit clearing + // of the high bits. + .addOperand(SrcMO) + .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 : + PPC::sub_64); + + // The source of the original copy is now the new virtual register. + SrcMO.setReg(NewVReg); + } else if (!IsVSReg(DstMO.getReg(), MRI) && + IsVSReg(SrcMO.getReg(), MRI)) { + // This is a copy *from* a VSX register to a non-VSX register. + Changed = true; + + const TargetRegisterClass *DstRC = + IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass : + &PPC::VSLRCRegClass; + assert((IsF8Reg(DstMO.getReg(), MRI) || + IsVRReg(DstMO.getReg(), MRI)) && + "Unknown destination for a VSX copy"); + + // Copy the VSX value into a new VSX register of the correct subclass. + unsigned NewVReg = MRI.createVirtualRegister(DstRC); + BuildMI(MBB, MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg) + .addOperand(SrcMO); + + // Transform the original copy into a subregister extraction copy. + SrcMO.setReg(NewVReg); + SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 : + PPC::sub_64); + } + } + + return Changed; + } + +public: + virtual bool runOnMachineFunction(MachineFunction &MF) { + TM = static_cast<const PPCTargetMachine *>(&MF.getTarget()); + TII = TM->getInstrInfo(); + + bool Changed = false; + + for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { + MachineBasicBlock &B = *I++; + if (processBlock(B)) + Changed = true; + } + + return Changed; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE, + "PowerPC VSX Copy Legalization", false, false) + +char PPCVSXCopy::ID = 0; +FunctionPass* +llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); } + #undef DEBUG_TYPE #define DEBUG_TYPE "ppc-early-ret" STATISTIC(NumBCLR, "Number of early conditional returns"); |