From 7eb0d8148e1210d9e31ab471477de47b53bab117 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Tue, 9 Apr 2013 22:58:37 +0000 Subject: Allow PPC B and BLR to be if-converted into some predicated forms This enables us to form predicated branches (which are the same conditional branches we had before) and also a larger set of predicated returns (including instructions like bdnzlr which is a conditional return and loop-counter decrement all in one). At the moment, if conversion does not capture all possible opportunities. A simple example is provided in early-ret2.ll, where if conversion forms one predicated return, and then the PPCEarlyReturn pass picks up the other one. So, at least for now, we'll keep both mechanisms. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179134 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstr64Bit.td | 9 +++ lib/Target/PowerPC/PPCInstrInfo.cpp | 137 ++++++++++++++++++++++++++++++++ lib/Target/PowerPC/PPCInstrInfo.h | 47 +++++++++++ lib/Target/PowerPC/PPCInstrInfo.td | 7 ++ lib/Target/PowerPC/PPCTargetMachine.cpp | 8 ++ test/CodeGen/PowerPC/bdzlr.ll | 63 +++++++++++++++ test/CodeGen/PowerPC/early-ret2.ll | 26 ++++++ 7 files changed, 297 insertions(+) create mode 100644 test/CodeGen/PowerPC/bdzlr.ll create mode 100644 test/CodeGen/PowerPC/early-ret2.ll diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 584829110c..a3049d4aea 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -83,8 +83,17 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), "bdnz $dst">; } + + let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in { + def BDZLR8 : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), + "bdzlr", BrB, []>; + def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), + "bdnzlr", BrB, []>; + } } + + let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 219d8931a2..51bc4f23cc 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -876,6 +876,143 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, return true; } +bool PPCInstrInfo::isPredicated(const MachineInstr *MI) const { + unsigned OpC = MI->getOpcode(); + switch (OpC) { + default: + return false; + case PPC::BCC: + case PPC::BCLR: + case PPC::BDZLR: + case PPC::BDZLR8: + case PPC::BDNZLR: + case PPC::BDNZLR8: + return true; + } +} + +bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { + if (!MI->isTerminator()) + return false; + + // Conditional branch is a special case. + if (MI->isBranch() && !MI->isBarrier()) + return true; + + return !isPredicated(MI); +} + +bool PPCInstrInfo::PredicateInstruction( + MachineInstr *MI, + const SmallVectorImpl &Pred) const { + unsigned OpC = MI->getOpcode(); + if (OpC == PPC::BLR) { + if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) { + bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + MI->setDesc(get(Pred[0].getImm() ? + (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) : + (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR))); + } else { + MI->setDesc(get(PPC::BCLR)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addImm(Pred[0].getImm()) + .addReg(Pred[1].getReg()); + } + + return true; + } else if (OpC == PPC::B) { + if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) { + bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + MI->setDesc(get(Pred[0].getImm() ? + (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : + (isPPC64 ? PPC::BDZ8 : PPC::BDZ))); + } else { + MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); + MI->RemoveOperand(0); + + MI->setDesc(get(PPC::BCC)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addImm(Pred[0].getImm()) + .addReg(Pred[1].getReg()) + .addMBB(MBB); + } + + return true; + } + + return false; +} + +bool PPCInstrInfo::SubsumesPredicate( + const SmallVectorImpl &Pred1, + const SmallVectorImpl &Pred2) const { + assert(Pred1.size() == 2 && "Invalid PPC first predicate"); + assert(Pred2.size() == 2 && "Invalid PPC second predicate"); + + if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR) + return false; + if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR) + return false; + + PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm(); + PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm(); + + if (P1 == P2) + return true; + + // Does P1 subsume P2, e.g. GE subsumes GT. + if (P1 == PPC::PRED_LE && + (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ)) + return true; + if (P1 == PPC::PRED_GE && + (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ)) + return true; + + return false; +} + +bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector &Pred) const { + // Note: At the present time, the contents of Pred from this function is + // unused by IfConversion. This implementation follows ARM by pushing the + // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of + // predicate, instructions defining CTR or CTR8 are also included as + // predicate-defining instructions. + + const TargetRegisterClass *RCs[] = + { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass, + &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass }; + + bool Found = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + for (int c = 0; c < 2 && !Found; ++c) { + const TargetRegisterClass *RC = RCs[c]; + for (TargetRegisterClass::iterator I = RC->begin(), + IE = RC->end(); I != IE; ++I) { + if ((MO.isRegMask() && MO.clobbersPhysReg(*I)) || + (MO.isReg() && MO.isDef() && MO.getReg() == *I)) { + Pred.push_back(MO); + Found = true; + } + } + } + } + + return Found; +} + +bool PPCInstrInfo::isPredicable(MachineInstr *MI) const { + unsigned OpC = MI->getOpcode(); + switch (OpC) { + default: + return false; + case PPC::B: + case PPC::BLR: + return true; + } +} + /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index fe3b84c9b2..a6ab617407 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -160,6 +160,53 @@ public: virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, unsigned Reg, MachineRegisterInfo *MRI) const; + // If conversion by predication (only supported by some branch instructions). + // All of the profitability checks always return true; it is always + // profitable to use the predicated branches. + virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, unsigned ExtraPredCycles, + const BranchProbability &Probability) const { + return true; + } + + virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumT, unsigned ExtraT, + MachineBasicBlock &FMBB, + unsigned NumF, unsigned ExtraF, + const BranchProbability &Probability) const { + return true; + } + + virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, + const BranchProbability + &Probability) const { + return true; + } + + virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const { + return false; + } + + // Predication support. + bool isPredicated(const MachineInstr *MI) const; + + virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; + + virtual + bool PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl &Pred) const; + + virtual + bool SubsumesPredicate(const SmallVectorImpl &Pred1, + const SmallVectorImpl &Pred2) const; + + virtual bool DefinesPredicate(MachineInstr *MI, + std::vector &Pred) const; + + virtual bool isPredicable(MachineInstr *MI) const; + /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 37b1a0b19a..11969fed85 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -518,6 +518,13 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let isReturn = 1, Uses = [LR, RM] in def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond), "b${cond:cc}lr ${cond:reg}", BrB, []>; + + let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in { + def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), + "bdzlr", BrB, []>; + def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), + "bdnzlr", BrB, []>; + } } let Defs = [CTR], Uses = [CTR] in { diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index caf5c98306..14dc794195 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -93,6 +93,7 @@ public: virtual bool addPreRegAlloc(); virtual bool addILPOpts(); virtual bool addInstSelector(); + virtual bool addPreSched2(); virtual bool addPreEmitPass(); }; } // namespace @@ -123,6 +124,13 @@ bool PPCPassConfig::addInstSelector() { return false; } +bool PPCPassConfig::addPreSched2() { + if (getOptLevel() != CodeGenOpt::None) + addPass(&IfConverterID); + + return true; +} + bool PPCPassConfig::addPreEmitPass() { if (getOptLevel() != CodeGenOpt::None) addPass(createPPCEarlyReturnPass()); diff --git a/test/CodeGen/PowerPC/bdzlr.ll b/test/CodeGen/PowerPC/bdzlr.ll new file mode 100644 index 0000000000..1d3b31ba10 --- /dev/null +++ b/test/CodeGen/PowerPC/bdzlr.ll @@ -0,0 +1,63 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.lua_TValue.17.692 = type { %union.Value.16.691, i32 } +%union.Value.16.691 = type { %union.GCObject.15.690* } +%union.GCObject.15.690 = type { %struct.lua_State.14.689 } +%struct.lua_State.14.689 = type { %union.GCObject.15.690*, i8, i8, i8, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.global_State.10.685*, %struct.CallInfo.11.686*, i32*, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.CallInfo.11.686*, %struct.CallInfo.11.686*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State.14.689*, %struct.lua_Debug.12.687*)*, %struct.lua_TValue.17.692, %struct.lua_TValue.17.692, %union.GCObject.15.690*, %union.GCObject.15.690*, %struct.lua_longjmp.13.688*, i64 } +%struct.global_State.10.685 = type { %struct.stringtable.0.675, i8* (i8*, i8*, i64, i64)*, i8*, i8, i8, i32, %union.GCObject.15.690*, %union.GCObject.15.690**, %union.GCObject.15.690*, %union.GCObject.15.690*, %union.GCObject.15.690*, %union.GCObject.15.690*, %struct.Mbuffer.1.676, i64, i64, i64, i64, i32, i32, i32 (%struct.lua_State.14.689*)*, %struct.lua_TValue.17.692, %struct.lua_State.14.689*, %struct.UpVal.3.678, [9 x %struct.Table.7.682*], [17 x %union.TString.9.684*] } +%struct.stringtable.0.675 = type { %union.GCObject.15.690**, i32, i32 } +%struct.Mbuffer.1.676 = type { i8*, i64, i64 } +%struct.UpVal.3.678 = type { %union.GCObject.15.690*, i8, i8, %struct.lua_TValue.17.692*, %union.anon.2.677 } +%union.anon.2.677 = type { %struct.lua_TValue.17.692 } +%struct.Table.7.682 = type { %union.GCObject.15.690*, i8, i8, i8, i8, %struct.Table.7.682*, %struct.lua_TValue.17.692*, %struct.Node.6.681*, %struct.Node.6.681*, %union.GCObject.15.690*, i32 } +%struct.Node.6.681 = type { %struct.lua_TValue.17.692, %union.TKey.5.680 } +%union.TKey.5.680 = type { %struct.anon.0.4.679 } +%struct.anon.0.4.679 = type { %union.Value.16.691, i32, %struct.Node.6.681* } +%union.TString.9.684 = type { %struct.anon.1.8.683 } +%struct.anon.1.8.683 = type { %union.GCObject.15.690*, i8, i8, i8, i32, i64 } +%struct.CallInfo.11.686 = type { %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, i32*, i32, i32 } +%struct.lua_Debug.12.687 = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 } +%struct.lua_longjmp.13.688 = type opaque + +define void @lua_xmove(i32 signext %n) #0 { +entry: + br i1 undef, label %for.end, label %if.end + +if.end: ; preds = %entry + br i1 undef, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %if.end + br label %for.body + +for.body: ; preds = %for.body.for.body_crit_edge, %for.body.lr.ph + %0 = phi %struct.lua_TValue.17.692* [ undef, %for.body.lr.ph ], [ %.pre, %for.body.for.body_crit_edge ] + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body.for.body_crit_edge ] + %tt = getelementptr inbounds %struct.lua_TValue.17.692* %0, i64 %indvars.iv, i32 1 + %1 = load i32* %tt, align 4, !tbaa !0 + store i32 %1, i32* undef, align 4, !tbaa !0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge + +for.body.for.body_crit_edge: ; preds = %for.body + %.pre = load %struct.lua_TValue.17.692** undef, align 8, !tbaa !3 + br label %for.body + +for.end: ; preds = %for.body, %if.end, %entry + ret void + +; CHECK: @lua_xmove +; CHECK: bnelr +; CHECK: bnelr +; CHECK: bdzlr +} + +attributes #0 = { nounwind } + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"any pointer", metadata !1} diff --git a/test/CodeGen/PowerPC/early-ret2.ll b/test/CodeGen/PowerPC/early-ret2.ll new file mode 100644 index 0000000000..bedd16ce4d --- /dev/null +++ b/test/CodeGen/PowerPC/early-ret2.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define void @_Z8example3iPiS_() #0 { +entry: + br i1 undef, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %entry + br i1 undef, label %while.end, label %while.body + +while.body: ; preds = %while.body, %while.body.lr.ph + br i1 false, label %while.end, label %while.body, !llvm.vectorizer.already_vectorized !0 + +while.end: ; preds = %while.body, %while.body.lr.ph, %entry + ret void + +; CHECK: @_Z8example3iPiS_ +; CHECK: bnelr +; CHECK: bnelr +} + +attributes #0 = { noinline nounwind } + +!0 = metadata !{} + -- cgit v1.2.3