From 824a70a384988aebbe3b46254a3631e81a8f0690 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 27 Oct 2011 21:21:05 +0000 Subject: Avoid partial CPSR dependency from loop backedges. rdar://10357570 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143145 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Thumb2SizeReduction.cpp | 67 ++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 24 deletions(-) (limited to 'lib/Target/ARM/Thumb2SizeReduction.cpp') diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 89a155c5a7..e5fc8b4fdd 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -146,7 +146,8 @@ namespace { /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. DenseMap ReduceOpcodeMap; - bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use); + bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, + bool IsSelfLoop); bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, bool is2Addr, ARMCC::CondCodes Pred, @@ -157,19 +158,21 @@ namespace { bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, - MachineInstr *CPSRDef); + MachineInstr *CPSRDef, bool IsSelfLoop); /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address /// instruction. bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef); + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop); /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit /// non-two-address instruction. bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef); + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop); /// ReduceMBB - Reduce width of instructions in the specified basic block. bool ReduceMBB(MachineBasicBlock &MBB); @@ -210,10 +213,17 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { /// In this case it would have been ok to narrow the mul.w to muls since there /// are indirect RAW dependency between the muls and the mul.w bool -Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) { - if (!Def || !STI->avoidCPSRPartialUpdate()) +Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, + bool FirstInSelfLoop) { + // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder). + if (!STI->avoidCPSRPartialUpdate()) return false; + if (!Def) + // If this BB loops back to itself, conservatively avoid narrowing the + // first instruction that does partial flag update. + return FirstInSelfLoop; + SmallSet Defs; for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { const MachineOperand &MO = Def->getOperand(i); @@ -476,15 +486,16 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef) { + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop) { unsigned Opc = MI->getOpcode(); if (Opc == ARM::t2ADDri) { // If the source register is SP, try to reduce to tADDrSPi, otherwise // it's a normal reduce. if (MI->getOperand(1).getReg() != ARM::SP) { - if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) return true; - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); } // Try to reduce to tADDrSPi. unsigned Imm = MI->getOperand(2).getImm(); @@ -535,12 +546,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, switch (Opc) { default: break; case ARM::t2ADDSri: { - if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) return true; // fallthrough } case ARM::t2ADDSrr: - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); } } break; @@ -552,13 +563,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, case ARM::t2UXTB: case ARM::t2UXTH: if (MI->getOperand(2).getImm() == 0) - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); break; case ARM::t2MOVi16: // Can convert only 'pure' immediate operands, not immediates obtained as // globals' addresses. if (MI->getOperand(1).isImm()) - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); break; case ARM::t2CMPrr: { // Try to reduce to the lo-reg only version first. Why there are two @@ -568,9 +579,9 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, // source insn opcode. So for now, we hack a local entry record to use. static const ReduceEntry NarrowEntry = { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 }; - if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef)) + if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop)) return true; - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); } } return false; @@ -579,7 +590,8 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef) { + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop) { if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; @@ -637,7 +649,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && - canAddPseudoFlagDep(CPSRDef, MI)) + canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop)) return false; // Add the 16-bit instruction. @@ -674,7 +686,8 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef) { + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop) { if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; @@ -727,7 +740,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && - canAddPseudoFlagDep(CPSRDef, MI)) + canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop)) return false; // Add the 16-bit instruction. @@ -818,6 +831,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineInstr *CPSRDef = 0; + // If this BB loops back to itself, conservatively avoid narrowing the + // first instruction that does partial flag update. + bool IsSelfLoop = MBB.isSuccessor(&MBB); MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII; for (; MII != E; MII = NextMII) { @@ -832,7 +848,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { const ReduceEntry &Entry = ReduceTable[OPI->second]; // Ignore "special" cases for now. if (Entry.Special) { - if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) { + if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -842,7 +858,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Try to transform to a 16-bit two-address instruction. if (Entry.NarrowOpc2 && - ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) { + ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -851,7 +867,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Try to transform to a 16-bit non-two-address instruction. if (Entry.NarrowOpc1 && - ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) { + ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -861,12 +877,15 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { ProcessNext: bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); - if (MI->getDesc().isCall()) + if (MI->getDesc().isCall()) { // Calls don't really set CPSR. CPSRDef = 0; - else if (DefCPSR) + IsSelfLoop = false; + } else if (DefCPSR) { // This is the last CPSR defining instruction. CPSRDef = MI; + IsSelfLoop = false; + } } return Modified; -- cgit v1.2.3