summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2013-04-09 22:58:37 +0000
committerHal Finkel <hfinkel@anl.gov>2013-04-09 22:58:37 +0000
commit7eb0d8148e1210d9e31ab471477de47b53bab117 (patch)
tree59cbf016ac5c78bc44d43e66bc48073584217500
parent58ddf528927a57c4f92ef12513bfef3422318b77 (diff)
downloadllvm-7eb0d8148e1210d9e31ab471477de47b53bab117.tar.gz
llvm-7eb0d8148e1210d9e31ab471477de47b53bab117.tar.bz2
llvm-7eb0d8148e1210d9e31ab471477de47b53bab117.tar.xz
Allow PPC B and BLR to be if-converted into some predicated forms
This enables us to form predicated branches (which are the same conditional branches we had before) and also a larger set of predicated returns (including instructions like bdnzlr which is a conditional return and loop-counter decrement all in one). At the moment, if conversion does not capture all possible opportunities. A simple example is provided in early-ret2.ll, where if conversion forms one predicated return, and then the PPCEarlyReturn pass picks up the other one. So, at least for now, we'll keep both mechanisms. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179134 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td9
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp137
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h47
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td7
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp8
-rw-r--r--test/CodeGen/PowerPC/bdzlr.ll63
-rw-r--r--test/CodeGen/PowerPC/early-ret2.ll26
7 files changed, 297 insertions, 0 deletions
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 584829110c..a3049d4aea 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -83,8 +83,17 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
"bdnz $dst">;
}
+
+ let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in {
+ def BDZLR8 : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+ "bdzlr", BrB, []>;
+ def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+ "bdnzlr", BrB, []>;
+ }
}
+
+
let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 219d8931a2..51bc4f23cc 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -876,6 +876,143 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
return true;
}
+bool PPCInstrInfo::isPredicated(const MachineInstr *MI) const {
+ unsigned OpC = MI->getOpcode();
+ switch (OpC) {
+ default:
+ return false;
+ case PPC::BCC:
+ case PPC::BCLR:
+ case PPC::BDZLR:
+ case PPC::BDZLR8:
+ case PPC::BDNZLR:
+ case PPC::BDNZLR8:
+ return true;
+ }
+}
+
+bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ if (!MI->isTerminator())
+ return false;
+
+ // Conditional branch is a special case.
+ if (MI->isBranch() && !MI->isBarrier())
+ return true;
+
+ return !isPredicated(MI);
+}
+
+bool PPCInstrInfo::PredicateInstruction(
+ MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ unsigned OpC = MI->getOpcode();
+ if (OpC == PPC::BLR) {
+ if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
+ bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ MI->setDesc(get(Pred[0].getImm() ?
+ (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
+ (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
+ } else {
+ MI->setDesc(get(PPC::BCLR));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg());
+ }
+
+ return true;
+ } else if (OpC == PPC::B) {
+ if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
+ bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ MI->setDesc(get(Pred[0].getImm() ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
+ } else {
+ MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(PPC::BCC));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg())
+ .addMBB(MBB);
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCInstrInfo::SubsumesPredicate(
+ const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ assert(Pred1.size() == 2 && "Invalid PPC first predicate");
+ assert(Pred2.size() == 2 && "Invalid PPC second predicate");
+
+ if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
+ return false;
+ if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
+ return false;
+
+ PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
+ PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
+
+ if (P1 == P2)
+ return true;
+
+ // Does P1 subsume P2, e.g. GE subsumes GT.
+ if (P1 == PPC::PRED_LE &&
+ (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
+ return true;
+ if (P1 == PPC::PRED_GE &&
+ (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
+ return true;
+
+ return false;
+}
+
+bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // Note: At the present time, the contents of Pred from this function is
+ // unused by IfConversion. This implementation follows ARM by pushing the
+ // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
+ // predicate, instructions defining CTR or CTR8 are also included as
+ // predicate-defining instructions.
+
+ const TargetRegisterClass *RCs[] =
+ { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
+ &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
+
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ for (int c = 0; c < 2 && !Found; ++c) {
+ const TargetRegisterClass *RC = RCs[c];
+ for (TargetRegisterClass::iterator I = RC->begin(),
+ IE = RC->end(); I != IE; ++I) {
+ if ((MO.isRegMask() && MO.clobbersPhysReg(*I)) ||
+ (MO.isReg() && MO.isDef() && MO.getReg() == *I)) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ }
+ }
+ }
+
+ return Found;
+}
+
+bool PPCInstrInfo::isPredicable(MachineInstr *MI) const {
+ unsigned OpC = MI->getOpcode();
+ switch (OpC) {
+ default:
+ return false;
+ case PPC::B:
+ case PPC::BLR:
+ return true;
+ }
+}
+
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
///
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index fe3b84c9b2..a6ab617407 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -160,6 +160,53 @@ public:
virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
unsigned Reg, MachineRegisterInfo *MRI) const;
+ // If conversion by predication (only supported by some branch instructions).
+ // All of the profitability checks always return true; it is always
+ // profitable to use the predicated branches.
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const {
+ return true;
+ }
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ const BranchProbability &Probability) const {
+ return true;
+ }
+
+ virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles,
+ const BranchProbability
+ &Probability) const {
+ return true;
+ }
+
+ virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const {
+ return false;
+ }
+
+ // Predication support.
+ bool isPredicated(const MachineInstr *MI) const;
+
+ virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+
+ virtual
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ virtual
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ virtual bool isPredicable(MachineInstr *MI) const;
+
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
///
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 37b1a0b19a..11969fed85 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -518,6 +518,13 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let isReturn = 1, Uses = [LR, RM] in
def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
"b${cond:cc}lr ${cond:reg}", BrB, []>;
+
+ let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
+ def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+ "bdzlr", BrB, []>;
+ def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+ "bdnzlr", BrB, []>;
+ }
}
let Defs = [CTR], Uses = [CTR] in {
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index caf5c98306..14dc794195 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -93,6 +93,7 @@ public:
virtual bool addPreRegAlloc();
virtual bool addILPOpts();
virtual bool addInstSelector();
+ virtual bool addPreSched2();
virtual bool addPreEmitPass();
};
} // namespace
@@ -123,6 +124,13 @@ bool PPCPassConfig::addInstSelector() {
return false;
}
+bool PPCPassConfig::addPreSched2() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(&IfConverterID);
+
+ return true;
+}
+
bool PPCPassConfig::addPreEmitPass() {
if (getOptLevel() != CodeGenOpt::None)
addPass(createPPCEarlyReturnPass());
diff --git a/test/CodeGen/PowerPC/bdzlr.ll b/test/CodeGen/PowerPC/bdzlr.ll
new file mode 100644
index 0000000000..1d3b31ba10
--- /dev/null
+++ b/test/CodeGen/PowerPC/bdzlr.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.lua_TValue.17.692 = type { %union.Value.16.691, i32 }
+%union.Value.16.691 = type { %union.GCObject.15.690* }
+%union.GCObject.15.690 = type { %struct.lua_State.14.689 }
+%struct.lua_State.14.689 = type { %union.GCObject.15.690*, i8, i8, i8, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.global_State.10.685*, %struct.CallInfo.11.686*, i32*, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.CallInfo.11.686*, %struct.CallInfo.11.686*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State.14.689*, %struct.lua_Debug.12.687*)*, %struct.lua_TValue.17.692, %struct.lua_TValue.17.692, %union.GCObject.15.690*, %union.GCObject.15.690*, %struct.lua_longjmp.13.688*, i64 }
+%struct.global_State.10.685 = type { %struct.stringtable.0.675, i8* (i8*, i8*, i64, i64)*, i8*, i8, i8, i32, %union.GCObject.15.690*, %union.GCObject.15.690**, %union.GCObject.15.690*, %union.GCObject.15.690*, %union.GCObject.15.690*, %union.GCObject.15.690*, %struct.Mbuffer.1.676, i64, i64, i64, i64, i32, i32, i32 (%struct.lua_State.14.689*)*, %struct.lua_TValue.17.692, %struct.lua_State.14.689*, %struct.UpVal.3.678, [9 x %struct.Table.7.682*], [17 x %union.TString.9.684*] }
+%struct.stringtable.0.675 = type { %union.GCObject.15.690**, i32, i32 }
+%struct.Mbuffer.1.676 = type { i8*, i64, i64 }
+%struct.UpVal.3.678 = type { %union.GCObject.15.690*, i8, i8, %struct.lua_TValue.17.692*, %union.anon.2.677 }
+%union.anon.2.677 = type { %struct.lua_TValue.17.692 }
+%struct.Table.7.682 = type { %union.GCObject.15.690*, i8, i8, i8, i8, %struct.Table.7.682*, %struct.lua_TValue.17.692*, %struct.Node.6.681*, %struct.Node.6.681*, %union.GCObject.15.690*, i32 }
+%struct.Node.6.681 = type { %struct.lua_TValue.17.692, %union.TKey.5.680 }
+%union.TKey.5.680 = type { %struct.anon.0.4.679 }
+%struct.anon.0.4.679 = type { %union.Value.16.691, i32, %struct.Node.6.681* }
+%union.TString.9.684 = type { %struct.anon.1.8.683 }
+%struct.anon.1.8.683 = type { %union.GCObject.15.690*, i8, i8, i8, i32, i64 }
+%struct.CallInfo.11.686 = type { %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, i32*, i32, i32 }
+%struct.lua_Debug.12.687 = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 }
+%struct.lua_longjmp.13.688 = type opaque
+
+define void @lua_xmove(i32 signext %n) #0 {
+entry:
+ br i1 undef, label %for.end, label %if.end
+
+if.end: ; preds = %entry
+ br i1 undef, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %if.end
+ br label %for.body
+
+for.body: ; preds = %for.body.for.body_crit_edge, %for.body.lr.ph
+ %0 = phi %struct.lua_TValue.17.692* [ undef, %for.body.lr.ph ], [ %.pre, %for.body.for.body_crit_edge ]
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body.for.body_crit_edge ]
+ %tt = getelementptr inbounds %struct.lua_TValue.17.692* %0, i64 %indvars.iv, i32 1
+ %1 = load i32* %tt, align 4, !tbaa !0
+ store i32 %1, i32* undef, align 4, !tbaa !0
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge
+
+for.body.for.body_crit_edge: ; preds = %for.body
+ %.pre = load %struct.lua_TValue.17.692** undef, align 8, !tbaa !3
+ br label %for.body
+
+for.end: ; preds = %for.body, %if.end, %entry
+ ret void
+
+; CHECK: @lua_xmove
+; CHECK: bnelr
+; CHECK: bnelr
+; CHECK: bdzlr
+}
+
+attributes #0 = { nounwind }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
diff --git a/test/CodeGen/PowerPC/early-ret2.ll b/test/CodeGen/PowerPC/early-ret2.ll
new file mode 100644
index 0000000000..bedd16ce4d
--- /dev/null
+++ b/test/CodeGen/PowerPC/early-ret2.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @_Z8example3iPiS_() #0 {
+entry:
+ br i1 undef, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph: ; preds = %entry
+ br i1 undef, label %while.end, label %while.body
+
+while.body: ; preds = %while.body, %while.body.lr.ph
+ br i1 false, label %while.end, label %while.body, !llvm.vectorizer.already_vectorized !0
+
+while.end: ; preds = %while.body, %while.body.lr.ph, %entry
+ ret void
+
+; CHECK: @_Z8example3iPiS_
+; CHECK: bnelr
+; CHECK: bnelr
+}
+
+attributes #0 = { noinline nounwind }
+
+!0 = metadata !{}
+