diff options
30 files changed, 2371 insertions, 165 deletions
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 8281b5ca03..b1c480b4aa 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -149,6 +149,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, case PPC::PRED_NU: O << "nu"; return; + case PPC::PRED_BIT_SET: + case PPC::PRED_BIT_UNSET: + llvm_unreachable("Invalid use of bit predicate code"); } llvm_unreachable("Invalid predicate code"); } @@ -184,6 +187,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, case PPC::PRED_NU_PLUS: O << "+"; return; + case PPC::PRED_BIT_SET: + case PPC::PRED_BIT_UNSET: + llvm_unreachable("Invalid use of bit predicate code"); } llvm_unreachable("Invalid predicate code"); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp index 63facc5446..c2987b641c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp @@ -42,6 +42,10 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { case PPC::PRED_LE_PLUS: return PPC::PRED_GT_MINUS; case PPC::PRED_NU_PLUS: return PPC::PRED_UN_MINUS; case PPC::PRED_UN_PLUS: return PPC::PRED_NU_MINUS; + + // Simple predicates for single condition-register bits. + case PPC::PRED_BIT_SET: return PPC::PRED_BIT_UNSET; + case PPC::PRED_BIT_UNSET: return PPC::PRED_BIT_SET; } llvm_unreachable("Unknown PPC branch opcode!"); } @@ -72,6 +76,10 @@ PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) { case PPC::PRED_LE_PLUS: return PPC::PRED_GE_PLUS; case PPC::PRED_NU_PLUS: return PPC::PRED_NU_PLUS; case PPC::PRED_UN_PLUS: return PPC::PRED_UN_PLUS; + + case PPC::PRED_BIT_SET: + case PPC::PRED_BIT_UNSET: + llvm_unreachable("Invalid use of bit predicate code"); } llvm_unreachable("Unknown PPC branch opcode!"); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index d498c2f8f4..c140973776 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -48,7 +48,12 @@ namespace PPC { PRED_GT_PLUS = (1 << 5) | 15, PRED_NE_PLUS = (2 << 5) | 7, PRED_UN_PLUS = (3 << 5) | 15, - PRED_NU_PLUS = (3 << 5) | 7 + PRED_NU_PLUS = (3 << 5) | 7, + + // When dealing with individual condition-register bits, we have simple set + // and unset predicates. + PRED_BIT_SET = -1, + PRED_BIT_UNSET = -2 }; /// Invert the specified predicate. != -> ==, < -> >=. diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 6611498378..b27004d1ef 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -51,6 +51,8 @@ def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true", "Enable 64-bit registers usage for ppc32 [beta]">; +def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true", + "Use condition-register bits individually">; def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", "Enable Altivec instructions">; def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true", diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 3e608ca8f6..9276211de7 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -115,6 +115,9 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { MachineBasicBlock *Dest = 0; if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm()) Dest = I->getOperand(2).getMBB(); + else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) && + !I->getOperand(1).isImm()) + Dest = I->getOperand(1).getMBB(); else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ || I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) && !I->getOperand(0).isImm()) @@ -166,6 +169,12 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. BuildMI(MBB, I, dl, TII->get(PPC::BCC)) .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); + } else if (I->getOpcode() == PPC::BC) { + unsigned CRBit = I->getOperand(0).getReg(); + BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2); + } else if (I->getOpcode() == PPC::BCn) { + unsigned CRBit = I->getOperand(0).getReg(); + BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BDNZ) { BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2); } else if (I->getOpcode() == PPC::BDNZ8) { diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index e8e7f4c2d2..7852d895b5 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -15,6 +15,8 @@ /// CCIfSubtarget - Match if the current subtarget has a feature F. class CCIfSubtarget<string F, CCAction A> : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; +class CCIfNotSubtarget<string F, CCAction A> + : CCIf<!strconcat("!State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; //===----------------------------------------------------------------------===// // Return Value Calling Convention @@ -23,7 +25,8 @@ class CCIfSubtarget<string F, CCAction A> // Return-value convention for PowerPC def RetCC_PPC : CallingConv<[ // On PPC64, integer return values are always promoted to i64 - CCIfType<[i32], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>, + CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>, + CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>, CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, @@ -46,6 +49,7 @@ def RetCC_PPC : CallingConv<[ // Only handle ints and floats. All ints are promoted to i64. // Vector types and quadword ints are not handled. def CC_PPC64_ELF_FIS : CallingConv<[ + CCIfType<[i1], CCPromoteToType<i64>>, CCIfType<[i8], CCPromoteToType<i64>>, CCIfType<[i16], CCPromoteToType<i64>>, CCIfType<[i32], CCPromoteToType<i64>>, @@ -58,6 +62,7 @@ def CC_PPC64_ELF_FIS : CallingConv<[ // and multiple register returns are "supported" to avoid compile // errors, but none are handled by the fast selector. def RetCC_PPC64_ELF_FIS : CallingConv<[ + CCIfType<[i1], CCPromoteToType<i64>>, CCIfType<[i8], CCPromoteToType<i64>>, CCIfType<[i16], CCPromoteToType<i64>>, CCIfType<[i32], CCPromoteToType<i64>>, @@ -73,6 +78,8 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[ //===----------------------------------------------------------------------===// def CC_PPC32_SVR4_Common : CallingConv<[ + CCIfType<[i1], CCPromoteToType<i32>>, + // The ABI requires i64 to be passed in two adjacent registers with the first // register having an odd register number. CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>, diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 07e3b64f7a..b7ad34dd25 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -740,6 +740,9 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, return false; MVT SrcVT = SrcEVT.getSimpleVT(); + if (SrcVT == MVT::i1 && PPCSubTarget.useCRBits()) + return false; + // See if operand 2 is an immediate encodeable in the compare. // FIXME: Operands are not in canonical order at -O0, so an immediate // operand in position 1 is a lost opportunity for now. We are @@ -1203,7 +1206,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, // Skip vector arguments for now, as well as long double and // uint128_t, and anything that isn't passed in a register. - if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || + if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 || !VA.isRegLoc() || VA.needsCustom()) return false; @@ -1995,6 +1998,15 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, // Materialize an integer constant into a register, and return // the register number (or zero if we failed to handle it). unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { + // If we're using CR bit registers for i1 values, handle that as a special + // case first. + if (VT == MVT::i1 && PPCSubTarget.useCRBits()) { + const ConstantInt *CI = cast<ConstantInt>(C); + unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg); + return ImmReg; + } if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) @@ -2160,6 +2172,15 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { if (Opc != ISD::Constant) return 0; + // If we're using CR bit registers for i1 values, handle that as a special + // case first. + if (VT == MVT::i1 && PPCSubTarget.useCRBits()) { + unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg); + return ImmReg; + } + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return 0; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index e714281bdf..b9eecbe16f 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -34,6 +35,10 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +// FIXME: Remove this once the bug has been fixed! +cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", +cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); + namespace llvm { void initializePPCDAGToDAGISelPass(PassRegistry&); } @@ -181,6 +186,9 @@ namespace { private: SDNode *SelectSETCC(SDNode *N); + + void PeepholePPC64(); + void PeepholdCROps(); }; } @@ -715,7 +723,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); - if (isInt32Immediate(N->getOperand(1), Imm)) { + if (!PPCSubTarget.useCRBits() && + isInt32Immediate(N->getOperand(1), Imm)) { // We can codegen setcc op, imm very efficiently compared to a brcond. // Check for those cases here. // setcc op, 0 @@ -845,6 +854,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { } } + if (PPCSubTarget.useCRBits()) + return 0; + bool Inv; unsigned Idx = getCRIdxForSetCC(CC, Inv); SDValue CCReg = SelectCC(LHS, RHS, CC, dl); @@ -959,8 +971,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { break; } - case ISD::SETCC: - return SelectSETCC(N); + case ISD::SETCC: { + SDNode *SN = SelectSETCC(N); + if (SN) + return SN; + break; + } case PPCISD::GlobalBaseReg: return getGlobalBaseReg(); @@ -1193,11 +1209,39 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } + // FIXME: Remove this once the ANDI glue bug is fixed: + case PPCISD::ANDIo_1_EQ_BIT: + case PPCISD::ANDIo_1_GT_BIT: { + if (!ANDIGlueBug) + break; + + EVT InVT = N->getOperand(0).getValueType(); + assert((InVT == MVT::i64 || InVT == MVT::i32) && + "Invalid input type for ANDIo_1_EQ_BIT"); + + unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo; + SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue, + N->getOperand(0), + CurDAG->getTargetConstant(1, InVT)), 0); + SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); + SDValue SRIdxVal = + CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ? + PPC::sub_eq : PPC::sub_gt, MVT::i32); + + return CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, + CR0Reg, SRIdxVal, + SDValue(AndI.getNode(), 1) /* glue */); + } case ISD::SELECT_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); + // If this is a select of i1 operands, we'll pattern match it. + if (PPCSubTarget.useCRBits() && + N->getOperand(0).getValueType() == MVT::i1) + break; + // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc if (!isPPC64) if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) @@ -1216,6 +1260,36 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { } SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); + + if (N->getValueType(0) == MVT::i1) { + // An i1 select is: (c & t) | (!c & f). + bool Inv; + unsigned Idx = getCRIdxForSetCC(CC, Inv); + + unsigned SRI; + switch (Idx) { + default: llvm_unreachable("Invalid CC index"); + case 0: SRI = PPC::sub_lt; break; + case 1: SRI = PPC::sub_gt; break; + case 2: SRI = PPC::sub_eq; break; + case 3: SRI = PPC::sub_un; break; + } + + SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg); + + SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1, + CCBit, CCBit), 0); + SDValue C = Inv ? NotCCBit : CCBit, + NotC = Inv ? CCBit : NotCCBit; + + SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, + C, N->getOperand(2)), 0); + SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, + NotC, N->getOperand(3)), 0); + + return CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); + } + unsigned BROpc = getPredicateForSetCC(CC); unsigned SelectCCOp; @@ -1258,8 +1332,30 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { } case ISD::BR_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); + unsigned PCC = getPredicateForSetCC(CC); + + if (N->getOperand(2).getValueType() == MVT::i1) { + unsigned Opc; + bool Swap; + switch (PCC) { + default: llvm_unreachable("Unexpected Boolean-operand predicate"); + case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break; + case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break; + case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break; + case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break; + case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break; + case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; + } + + SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, + N->getOperand(Swap ? 3 : 2), + N->getOperand(Swap ? 2 : 3)), 0); + return CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, + BitComp, N->getOperand(4), N->getOperand(0)); + } + SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); - SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode, + SDValue Ops[] = { getI32Imm(PCC), CondCode, N->getOperand(4), N->getOperand(0) }; return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4); } @@ -1404,6 +1500,364 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { if (TM.getOptLevel() == CodeGenOpt::None) return; + PeepholePPC64(); + PeepholdCROps(); +} + +void PPCDAGToDAGISel::PeepholdCROps() { + bool IsModified; + do { + IsModified = false; + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ++I) { + MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I); + if (!MachineNode || MachineNode->use_empty()) + continue; + SDNode *ResNode = MachineNode; + + bool Op1Set = false, Op1Unset = false, + Op1Not = false, + Op2Set = false, Op2Unset = false, + Op2Not = false; + + unsigned Opcode = MachineNode->getMachineOpcode(); + switch (Opcode) { + default: break; + case PPC::CRAND: + case PPC::CRNAND: + case PPC::CROR: + case PPC::CRXOR: + case PPC::CRNOR: + case PPC::CREQV: + case PPC::CRANDC: + case PPC::CRORC: { + SDValue Op = MachineNode->getOperand(1); + if (Op.isMachineOpcode()) { + if (Op.getMachineOpcode() == PPC::CRSET) + Op2Set = true; + else if (Op.getMachineOpcode() == PPC::CRUNSET) + Op2Unset = true; + else if (Op.getMachineOpcode() == PPC::CRNOR && + Op.getOperand(0) == Op.getOperand(1)) + Op2Not = true; + } + } // fallthrough + case PPC::BC: + case PPC::BCn: + case PPC::SELECT_I4: + case PPC::SELECT_I8: + case PPC::SELECT_F4: + case PPC::SELECT_F8: + case PPC::SELECT_VRRC: { + SDValue Op = MachineNode->getOperand(0); + if (Op.isMachineOpcode()) { + if (Op.getMachineOpcode() == PPC::CRSET) + Op1Set = true; + else if (Op.getMachineOpcode() == PPC::CRUNSET) + Op1Unset = true; + else if (Op.getMachineOpcode() == PPC::CRNOR && + Op.getOperand(0) == Op.getOperand(1)) + Op1Not = true; + } + } + break; + } + + switch (Opcode) { + default: break; + case PPC::CRAND: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // x & x = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Set) + // 1 & y = y + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op2Set) + // x & 1 = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Unset || Op2Unset) + // x & 0 = 0 & y = 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Not) + // ~x & y = andc(y, x) + ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(0). + getOperand(0)); + else if (Op2Not) + // x & ~y = andc(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + break; + case PPC::CRNAND: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // nand(x, x) -> nor(x, x) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Set) + // nand(1, y) -> nor(y, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op2Set) + // nand(x, 1) -> nor(x, x) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Unset || Op2Unset) + // nand(x, 0) = nand(0, y) = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Not) + // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // nand(x, ~y) = ~x | y = orc(y, x) + ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1). + getOperand(0), + MachineNode->getOperand(0)); + break; + case PPC::CROR: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // x | x = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Set || Op2Set) + // x | 1 = 1 | y = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Unset) + // 0 | y = y + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op2Unset) + // x | 0 = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Not) + // ~x | y = orc(y, x) + ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(0). + getOperand(0)); + else if (Op2Not) + // x | ~y = orc(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + break; + case PPC::CRXOR: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // xor(x, x) = 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Set) + // xor(1, y) -> nor(y, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op2Set) + // xor(x, 1) -> nor(x, x) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Unset) + // xor(0, y) = y + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op2Unset) + // xor(x, 0) = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Not) + // xor(~x, y) = eqv(x, y) + ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // xor(x, ~y) = eqv(x, y) + ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + break; + case PPC::CRNOR: + if (Op1Set || Op2Set) + // nor(1, y) -> 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Unset) + // nor(0, y) = ~y -> nor(y, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op2Unset) + // nor(x, 0) = ~x + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Not) + // nor(~x, y) = andc(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // nor(x, ~y) = andc(y, x) + ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1). + getOperand(0), + MachineNode->getOperand(0)); + break; + case PPC::CREQV: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // eqv(x, x) = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Set) + // eqv(1, y) = y + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op2Set) + // eqv(x, 1) = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Unset) + // eqv(0, y) = ~y -> nor(y, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op2Unset) + // eqv(x, 0) = ~x + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Not) + // eqv(~x, y) = xor(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // eqv(x, ~y) = xor(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + break; + case PPC::CRANDC: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // andc(x, x) = 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Set) + // andc(1, y) = ~y + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op1Unset || Op2Set) + // andc(0, y) = andc(x, 1) = 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op2Unset) + // andc(x, 0) = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Not) + // andc(~x, y) = ~(x | y) = nor(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // andc(x, ~y) = x & y + ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + break; + case PPC::CRORC: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // orc(x, x) = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Set || Op2Unset) + // orc(1, y) = orc(x, 0) = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op2Set) + // orc(x, 1) = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Unset) + // orc(0, y) = ~y + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op1Not) + // orc(~x, y) = ~(x & y) = nand(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // orc(x, ~y) = x | y + ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + break; + case PPC::SELECT_I4: + case PPC::SELECT_I8: + case PPC::SELECT_F4: + case PPC::SELECT_F8: + case PPC::SELECT_VRRC: + if (Op1Set) + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op1Unset) + ResNode = MachineNode->getOperand(2).getNode(); + else if (Op1Not) + ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(), + SDLoc(MachineNode), + MachineNode->getValueType(0), + MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(2), + MachineNode->getOperand(1)); + break; + case PPC::BC: + case PPC::BCn: + if (Op1Not) + ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn : + PPC::BC, + SDLoc(MachineNode), + MVT::Other, + MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1), + MachineNode->getOperand(2)); + // FIXME: Handle Op1Set, Op1Unset here too. + break; + } + + if (ResNode != MachineNode) { + DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); + DEBUG(MachineNode->dump(CurDAG)); + DEBUG(dbgs() << "\nNew: "); + DEBUG(ResNode->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + ReplaceUses(MachineNode, ResNode); + IsModified = true; + } + } + if (IsModified) + CurDAG->RemoveDeadNodes(); + } while (IsModified); +} + +void PPCDAGToDAGISel::PeepholePPC64() { // These optimizations are currently supported only for 64-bit SVR4. if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64()) return; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index b8bf3bda80..90bd867987 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -46,6 +46,9 @@ cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hi static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); +// FIXME: Remove this once the bug has been fixed! +extern cl::opt<bool> ANDIGlueBug; + static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) return new TargetLoweringObjectFileMachO(); @@ -94,6 +97,34 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); + if (Subtarget->useCRBits()) { + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); + AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); + AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + + // PowerPC does not support direct load / store of condition registers + setOperationAction(ISD::LOAD, MVT::i1, Custom); + setOperationAction(ISD::STORE, MVT::i1, Custom); + + // FIXME: Remove this once the ANDI glue bug is fixed: + if (ANDIGlueBug) + setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); + + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setTruncStoreAction(MVT::i64, MVT::i1, Expand); + setTruncStoreAction(MVT::i32, MVT::i1, Expand); + setTruncStoreAction(MVT::i16, MVT::i1, Expand); + setTruncStoreAction(MVT::i8, MVT::i1, Expand); + + addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); + } + // This is used in the ppcf128->int sequence. Note it has different semantics // from FP_ROUND: that rounds to nearest, this rounds to zero. setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); @@ -191,21 +222,25 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::ROTR, MVT::i32 , Expand); setOperationAction(ISD::ROTR, MVT::i64 , Expand); - // PowerPC does not have Select - setOperationAction(ISD::SELECT, MVT::i32, Expand); - setOperationAction(ISD::SELECT, MVT::i64, Expand); - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::f64, Expand); + if (!Subtarget->useCRBits()) { + // PowerPC does not have Select + setOperationAction(ISD::SELECT, MVT::i32, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + } // PowerPC wants to turn select_cc of FP into fsel when possible. setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // PowerPC wants to optimize integer setcc a bit - setOperationAction(ISD::SETCC, MVT::i32, Custom); + if (!Subtarget->useCRBits()) + setOperationAction(ISD::SETCC, MVT::i32, Custom); // PowerPC does not have BRCOND which requires SetCC - setOperationAction(ISD::BRCOND, MVT::Other, Expand); + if (!Subtarget->useCRBits()) + setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -445,7 +480,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::OR , MVT::v4i32, Legal); setOperationAction(ISD::XOR , MVT::v4i32, Legal); setOperationAction(ISD::LOAD , MVT::v4i32, Legal); - setOperationAction(ISD::SELECT, MVT::v4i32, Expand); + setOperationAction(ISD::SELECT, MVT::v4i32, + Subtarget->useCRBits() ? Legal : Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); @@ -522,9 +558,20 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::BR_CC); + if (Subtarget->useCRBits()) + setTargetDAGCombine(ISD::BRCOND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + if (Subtarget->useCRBits()) { + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::ANY_EXTEND); + setTargetDAGCombine(ISD::TRUNCATE); + setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::SELECT_CC); + } + // Use reciprocal estimates. if (TM.Options.UnsafeFPMath) { setTargetDAGCombine(ISD::FDIV); @@ -545,6 +592,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); } + // With 32 condition bits, we don't need to sink (and duplicate) compares + // aggressively in CodeGenPrep. + if (Subtarget->useCRBits()) + setHasMultipleConditionRegisters(); + setMinFunctionAlignment(2); if (PPCSubTarget.isDarwin()) setPrefFunctionAlignment(4); @@ -689,7 +741,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { if (!VT.isVector()) - return MVT::i32; + return PPCSubTarget.useCRBits() ? MVT::i1 : MVT::i32; return VT.changeVectorElementTypeToInteger(); } @@ -1924,7 +1976,7 @@ static const uint16_t *GetFPR() { /// the stack. static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize) { - unsigned ArgSize = ArgVT.getSizeInBits()/8; + unsigned ArgSize = ArgVT.getStoreSize(); if (Flags.isByVal()) ArgSize = Flags.getByValSize(); ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; @@ -2022,6 +2074,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( switch (ValVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("ValVT not supported by formal arguments Lowering"); + case MVT::i1: case MVT::i32: RC = &PPC::GPRCRegClass; break; @@ -2041,14 +2094,18 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Transform the arguments stored in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, + ValVT == MVT::i1 ? MVT::i32 : ValVT); + + if (ValVT == MVT::i1) + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); InVals.push_back(ArgValue); } else { // Argument stored in memory. assert(VA.isMemLoc()); - unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; + unsigned ArgSize = VA.getLocVT().getStoreSize(); int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), isImmutable); @@ -2184,7 +2241,7 @@ PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); + return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); } // Set the size that is at least reserved in caller of this function. Tail @@ -2267,7 +2324,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; - unsigned ObjSize = ObjectVT.getSizeInBits()/8; + unsigned ObjSize = ObjectVT.getStoreSize(); unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); @@ -2386,13 +2443,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); + case MVT::i1: case MVT::i32: case MVT::i64: if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); - if (ObjectVT == MVT::i32) + if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); @@ -2726,7 +2784,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); - if (ObjectVT == MVT::i32) + if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); @@ -3885,7 +3943,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); // Promote integers to 64-bit values. - if (Arg.getValueType() == MVT::i32) { + if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { // FIXME: Should this use ANY_EXTEND if neither sext nor zext? unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); @@ -4009,6 +4067,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, switch (Arg.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i1: case MVT::i32: case MVT::i64: if (GPR_idx != NumGPRs) { @@ -4694,6 +4753,55 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, Op.getOperand(0), Op.getOperand(1)); } +SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i1 && + "Custom lowering only for i1 loads"); + + // First, load 8 bits into 32 bits, then truncate to 1 bit. + + SDLoc dl(Op); + LoadSDNode *LD = cast<LoadSDNode>(Op); + + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand *MMO = LD->getMemOperand(); + + SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain, + BasePtr, MVT::i8, MMO); + SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); + + SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; + return DAG.getMergeValues(Ops, 2, dl); +} + +SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOperand(1).getValueType() == MVT::i1 && + "Custom lowering only for i1 stores"); + + // First, zero extend to 32 bits, then use a truncating store to 8 bits. + + SDLoc dl(Op); + StoreSDNode *ST = cast<StoreSDNode>(Op); + + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + SDValue Value = ST->getValue(); + MachineMemOperand *MMO = ST->getMemOperand(); + + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value); + return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); +} + +// FIXME: Remove this once the ANDI glue bug is fixed: +SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i1 && + "Custom lowering only for i1 results"); + + SDLoc DL(Op); + return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, + Op.getOperand(0)); +} + /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { @@ -5793,6 +5901,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, @@ -6358,9 +6469,15 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || - MI->getOpcode() == PPC::SELECT_CC_I8)) { + MI->getOpcode() == PPC::SELECT_CC_I8 || + MI->getOpcode() == PPC::SELECT_I4 || + MI->getOpcode() == PPC::SELECT_I8)) { SmallVector<MachineOperand, 2> Cond; - Cond.push_back(MI->getOperand(4)); + if (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8) + Cond.push_back(MI->getOperand(4)); + else + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); Cond.push_back(MI->getOperand(1)); DebugLoc dl = MI->getDebugLoc(); @@ -6372,9 +6489,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_CC_I8 || MI->getOpcode() == PPC::SELECT_CC_F4 || MI->getOpcode() == PPC::SELECT_CC_F8 || - MI->getOpcode() == PPC::SELECT_CC_VRRC) { - - + MI->getOpcode() == PPC::SELECT_CC_VRRC || + MI->getOpcode() == PPC::SELECT_I4 || + MI->getOpcode() == PPC::SELECT_I8 || + MI->getOpcode() == PPC::SELECT_F4 || + MI->getOpcode() == PPC::SELECT_F8 || + MI->getOpcode() == PPC::SELECT_VRRC) { // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to // select between, and a branch opcode to use. @@ -6388,7 +6508,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *thisMBB = BB; MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - unsigned SelectPred = MI->getOperand(4).getImm(); DebugLoc dl = MI->getDebugLoc(); F->insert(It, copy0MBB); F->insert(It, sinkMBB); @@ -6403,8 +6522,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); - BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + if (MI->getOpcode() == PPC::SELECT_I4 || + MI->getOpcode() == PPC::SELECT_I8 || + MI->getOpcode() == PPC::SELECT_F4 || + MI->getOpcode() == PPC::SELECT_F8 || + MI->getOpcode() == PPC::SELECT_VRRC) { + BuildMI(BB, dl, TII->get(PPC::BC)) + .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + } else { + unsigned SelectPred = MI->getOperand(4).getImm(); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + } // copy0MBB: // %FalseValue = ... @@ -6727,6 +6856,27 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); + } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || + MI->getOpcode() == PPC::ANDIo_1_GT_BIT || + MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || + MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) { + unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || + MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ? + PPC::ANDIo8 : PPC::ANDIo; + bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || + MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? + &PPC::GPRCRegClass : + &PPC::G8RCRegClass); + + DebugLoc dl = MI->getDebugLoc(); + BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) + .addReg(MI->getOperand(1).getReg()).addImm(1); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), + MI->getOperand(0).getReg()) + .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -6981,6 +7131,467 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { return false; } +SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + + assert(PPCSubTarget.useCRBits() && + "Expecting to be tracking CR bits"); + // If we're tracking CR bits, we need to be careful that we don't have: + // trunc(binary-ops(zext(x), zext(y))) + // or + // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) + // such that we're unnecessarily moving things into GPRs when it would be + // better to keep them in CR bits. + + // Note that trunc here can be an actual i1 trunc, or can be the effective + // truncation that comes from a setcc or select_cc. + if (N->getOpcode() == ISD::TRUNCATE && + N->getValueType(0) != MVT::i1) + return SDValue(); + + if (N->getOperand(0).getValueType() != MVT::i32 && + N->getOperand(0).getValueType() != MVT::i64) + return SDValue(); + + if (N->getOpcode() == ISD::SETCC || + N->getOpcode() == ISD::SELECT_CC) { + // If we're looking at a comparison, then we need to make sure that the + // high bits (all except for the first) don't matter the result. + ISD::CondCode CC = + cast<CondCodeSDNode>(N->getOperand( + N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); + unsigned OpBits = N->getOperand(0).getValueSizeInBits(); + + if (ISD::isSignedIntSetCC(CC)) { + if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || + DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) + return SDValue(); + } else if (ISD::isUnsignedIntSetCC(CC)) { + if (!DAG.MaskedValueIsZero(N->getOperand(0), + APInt::getHighBitsSet(OpBits, OpBits-1)) || + !DAG.MaskedValueIsZero(N->getOperand(1), + APInt::getHighBitsSet(OpBits, OpBits-1))) + return SDValue(); + } else { + // This is neither a signed nor an unsigned comparison, just make sure + // that the high bits are equal. + APInt Op1Zero, Op1One; + APInt Op2Zero, Op2One; + DAG.ComputeMaskedBits(N->getOperand(0), Op1Zero, Op1One); + DAG.ComputeMaskedBits(N->getOperand(1), Op2Zero, Op2One); + + // We don't really care about what is known about the first bit (if + // anything), so clear it in all masks prior to comparing them. + Op1Zero.clearBit(0); Op1One.clearBit(0); + Op2Zero.clearBit(0); Op2One.clearBit(0); + + if (Op1Zero != Op2Zero || Op1One != Op2One) + return SDValue(); + } + } + + // We now know that the higher-order bits are irrelevant, we just need to + // make sure that all of the intermediate operations are bit operations, and + // all inputs are extensions. + if (N->getOperand(0).getOpcode() != ISD::AND && + N->getOperand(0).getOpcode() != ISD::OR && + N->getOperand(0).getOpcode() != ISD::XOR && + N->getOperand(0).getOpcode() != ISD::SELECT && + N->getOperand(0).getOpcode() != ISD::SELECT_CC && + N->getOperand(0).getOpcode() != ISD::TRUNCATE && + N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && + N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && + N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) + return SDValue(); + + if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && + N->getOperand(1).getOpcode() != ISD::AND && + N->getOperand(1).getOpcode() != ISD::OR && + N->getOperand(1).getOpcode() != ISD::XOR && + N->getOperand(1).getOpcode() != ISD::SELECT && + N->getOperand(1).getOpcode() != ISD::SELECT_CC && + N->getOperand(1).getOpcode() != ISD::TRUNCATE && + N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && + N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && + N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) + return SDValue(); + + SmallVector<SDValue, 4> Inputs; + SmallVector<SDValue, 8> BinOps, PromOps; + SmallPtrSet<SDNode *, 16> Visited; + + for (unsigned i = 0; i < 2; ++i) { + if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || + N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || + N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && + N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || + isa<ConstantSDNode>(N->getOperand(i))) + Inputs.push_back(N->getOperand(i)); + else + BinOps.push_back(N->getOperand(i)); + + if (N->getOpcode() == ISD::TRUNCATE) + break; + } + + // Visit all inputs, collect all binary operations (and, or, xor and + // select) that are all fed by extensions. + while (!BinOps.empty()) { + SDValue BinOp = BinOps.back(); + BinOps.pop_back(); + + if (!Visited.insert(BinOp.getNode())) + continue; + + PromOps.push_back(BinOp); + + for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { + // The condition of the select is not promoted. + if (BinOp.getOpcode() == ISD::SELECT && i == 0) + continue; + if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) + continue; + + if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && + BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || + isa<ConstantSDNode>(BinOp.getOperand(i))) { + Inputs.push_back(BinOp.getOperand(i)); + } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || + BinOp.getOperand(i).getOpcode() == ISD::OR || + BinOp.getOperand(i).getOpcode() == ISD::XOR || + BinOp.getOperand(i).getOpcode() == ISD::SELECT || + BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || + BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || + BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { + BinOps.push_back(BinOp.getOperand(i)); + } else { + // We have an input that is not an extension or another binary + // operation; we'll abort this transformation. + return SDValue(); + } + } + } + + // Make sure that this is a self-contained cluster of operations (which + // is not quite the same thing as saying that everything has only one + // use). + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + if (isa<ConstantSDNode>(Inputs[i])) + continue; + + for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), + UE = Inputs[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + } + } + + for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { + for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), + UE = PromOps[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + } + } + + // Replace all inputs with the extension operand. + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + // Constants may have users outside the cluster of to-be-promoted nodes, + // and so we need to replace those as we do the promotions. + if (isa<ConstantSDNode>(Inputs[i])) + continue; + else + DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); + } + + // Replace all operations (these are all the same, but have a different + // (i1) return type). DAG.getNode will validate that the types of + // a binary operator match, so go through the list in reverse so that + // we've likely promoted both operands first. Any intermediate truncations or + // extensions disappear. + while (!PromOps.empty()) { + SDValue PromOp = PromOps.back(); + PromOps.pop_back(); + + if (PromOp.getOpcode() == ISD::TRUNCATE || + PromOp.getOpcode() == ISD::SIGN_EXTEND || + PromOp.getOpcode() == ISD::ZERO_EXTEND || + PromOp.getOpcode() == ISD::ANY_EXTEND) { + if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && + PromOp.getOperand(0).getValueType() != MVT::i1) { + // The operand is not yet ready (see comment below). + PromOps.insert(PromOps.begin(), PromOp); + continue; + } + + SDValue RepValue = PromOp.getOperand(0); + if (isa<ConstantSDNode>(RepValue)) + RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); + + DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); + continue; + } + + unsigned C; + switch (PromOp.getOpcode()) { + default: C = 0; break; + case ISD::SELECT: C = 1; break; + case ISD::SELECT_CC: C = 2; break; + } + + if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && + PromOp.getOperand(C).getValueType() != MVT::i1) || + (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && + PromOp.getOperand(C+1).getValueType() != MVT::i1)) { + // The to-be-promoted operands of this node have not yet been + // promoted (this should be rare because we're going through the + // list backward, but if one of the operands has several users in + // this cluster of to-be-promoted nodes, it is possible). + PromOps.insert(PromOps.begin(), PromOp); + continue; + } + + SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), + PromOp.getNode()->op_end()); + + // If there are any constant inputs, make sure they're replaced now. + for (unsigned i = 0; i < 2; ++i) + if (isa<ConstantSDNode>(Ops[C+i])) + Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); + + DAG.ReplaceAllUsesOfValueWith(PromOp, + DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, + Ops.data(), Ops.size())); + } + + // Now we're left with the initial truncation itself. + if (N->getOpcode() == ISD::TRUNCATE) + return N->getOperand(0); + + // Otherwise, this is a comparison. The operands to be compared have just + // changed type (to i1), but everything else is the same. + return SDValue(N, 0); +} + +SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + + assert(PPCSubTarget.useCRBits() && + "Expecting to be tracking CR bits"); + // If we're tracking CR bits, we need to be careful that we don't have: + // zext(binary-ops(trunc(x), trunc(y))) + // or + // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) + // such that we're unnecessarily moving things into CR bits that can more + // efficiently stay in GPRs. Note that if we're not certain that the high + // bits are set as required by the final extension, we still may need to do + // some masking to get the proper behavior. + + if (N->getValueType(0) != MVT::i32 && + N->getValueType(0) != MVT::i64) + return SDValue(); + + if (N->getOperand(0).getValueType() != MVT::i1) + return SDValue(); + + if (N->getOperand(0).getOpcode() != ISD::AND && + N->getOperand(0).getOpcode() != ISD::OR && + N->getOperand(0).getOpcode() != ISD::XOR && + N->getOperand(0).getOpcode() != ISD::SELECT && + N->getOperand(0).getOpcode() != ISD::SELECT_CC) + return SDValue(); + + SmallVector<SDValue, 4> Inputs; + SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; + SmallPtrSet<SDNode *, 16> Visited; + + // Visit all inputs, collect all binary operations (and, or, xor and + // select) that are all fed by truncations. + while (!BinOps.empty()) { + SDValue BinOp = BinOps.back(); + BinOps.pop_back(); + + if (!Visited.insert(BinOp.getNode())) + continue; + + PromOps.push_back(BinOp); + + for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { + // The condition of the select is not promoted. + if (BinOp.getOpcode() == ISD::SELECT && i == 0) + continue; + if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) + continue; + + if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || + isa<ConstantSDNode>(BinOp.getOperand(i))) { + Inputs.push_back(BinOp.getOperand(i)); + } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || + BinOp.getOperand(i).getOpcode() == ISD::OR || + BinOp.getOperand(i).getOpcode() == ISD::XOR || + BinOp.getOperand(i).getOpcode() == ISD::SELECT || + BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { + BinOps.push_back(BinOp.getOperand(i)); + } else { + // We have an input that is not a truncation or another binary + // operation; we'll abort this transformation. + return SDValue(); + } + } + } + + // Make sure that this is a self-contained cluster of operations (which + // is not quite the same thing as saying that everything has only one + // use). + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + if (isa<ConstantSDNode>(Inputs[i])) + continue; + + for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), + UE = Inputs[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + } + } + + for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { + for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), + UE = PromOps[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + } + } + + bool ReallyNeedsExt = false; + if (N->getOpcode() != ISD::ANY_EXTEND) { + // If all of the inputs are not already sign/zero extended, then + // we'll still need to do that at the end. + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + if (isa<ConstantSDNode>(Inputs[i])) + continue; + + unsigned OpBits = + Inputs[i].getOperand(0).getValueSizeInBits(); + if ((N->getOpcode() == ISD::ZERO_EXTEND && + !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), + APInt::getHighBitsSet(OpBits, + OpBits-1))) || + (N->getOpcode() == ISD::SIGN_EXTEND && + DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) != OpBits)) { + ReallyNeedsExt = true; + break; + } + } + } + + // Replace all inputs, either with the truncation operand, or a + // truncation or extension to the final output type. + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + // Constant inputs need to be replaced with the to-be-promoted nodes that + // use them because they might have users outside of the cluster of + // promoted nodes. + if (isa<ConstantSDNode>(Inputs[i])) + continue; + + SDValue InSrc = Inputs[i].getOperand(0); + if (Inputs[i].getValueType() == N->getValueType(0)) + DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); + else if (N->getOpcode() == ISD::SIGN_EXTEND) + DAG.ReplaceAllUsesOfValueWith(Inputs[i], + DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); + else if (N->getOpcode() == ISD::ZERO_EXTEND) + DAG.ReplaceAllUsesOfValueWith(Inputs[i], + DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); + else + DAG.ReplaceAllUsesOfValueWith(Inputs[i], + DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); + } + + // Replace all operations (these are all the same, but have a different + // (promoted) return type). DAG.getNode will validate that the types of + // a binary operator match, so go through the list in reverse so that + // we've likely promoted both operands first. + while (!PromOps.empty()) { + SDValue PromOp = PromOps.back(); + PromOps.pop_back(); + + unsigned C; + switch (PromOp.getOpcode()) { + default: C = 0; break; + case ISD::SELECT: C = 1; break; + case ISD::SELECT_CC: C = 2; break; + } + + if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && + PromOp.getOperand(C).getValueType() != N->getValueType(0)) || + (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && + PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { + // The to-be-promoted operands of this node have not yet been + // promoted (this should be rare because we're going through the + // list backward, but if one of the operands has several users in + // this cluster of to-be-promoted nodes, it is possible). + PromOps.insert(PromOps.begin(), PromOp); + continue; + } + + SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), + PromOp.getNode()->op_end()); + + // If this node has constant inputs, then they'll need to be promoted here. + for (unsigned i = 0; i < 2; ++i) { + if (!isa<ConstantSDNode>(Ops[C+i])) + continue; + if (Ops[C+i].getValueType() == N->getValueType(0)) + continue; + + if (N->getOpcode() == ISD::SIGN_EXTEND) + Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); + else if (N->getOpcode() == ISD::ZERO_EXTEND) + Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); + else + Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); + } + + DAG.ReplaceAllUsesOfValueWith(PromOp, + DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), + Ops.data(), Ops.size())); + } + + // Now we're left with the initial extension itself. + if (!ReallyNeedsExt) + return N->getOperand(0); + + // To zero extend, just mask off everything except for the first bit. + if (N->getOpcode() == ISD::ZERO_EXTEND) + return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), + DAG.getConstant(1, N->getValueType(0))); + + assert(N->getOpcode() == ISD::SIGN_EXTEND && + "Invalid extension type"); + EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0)); + SDValue ShiftCst = + DAG.getConstant(N->getValueSizeInBits(0)-1, ShiftAmountTy); + return DAG.getNode(ISD::SRA, dl, N->getValueType(0), + DAG.getNode(ISD::SHL, dl, N->getValueType(0), + N->getOperand(0), ShiftCst), ShiftCst); +} + SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { const TargetMachine &TM = getTargetMachine(); @@ -7007,6 +7618,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return N->getOperand(0); } break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + return DAGCombineExtBoolTrunc(N, DCI); + case ISD::TRUNCATE: + case ISD::SETCC: + case ISD::SELECT_CC: + return DAGCombineTruncBoolExt(N, DCI); case ISD::FDIV: { assert(TM.Options.UnsafeFPMath && "Reciprocal estimates require UnsafeFPMath"); @@ -7422,6 +8041,25 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } break; } + case ISD::BRCOND: { + SDValue Cond = N->getOperand(1); + SDValue Target = N->getOperand(2); + + if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && + cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == + Intrinsic::ppc_is_decremented_ctr_nonzero) { + + // We now need to make the intrinsic dead (it cannot be instruction + // selected). + DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); + assert(Cond.getNode()->hasOneUse() && + "Counter decrement has more than one use"); + + return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, + N->getOperand(0), Target); + } + } + break; case ISD::BR_CC: { // If this is a branch on an altivec predicate comparison, lower this so // that we don't have to do a MFOCRF: instead, branch directly on CR6. This diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index f8eab274d2..1d351c069a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -121,6 +121,12 @@ namespace llvm { /// resultant GPR. Bits corresponding to other CR regs are undefined. MFOCRF, + // FIXME: Remove these once the ANDI glue bug is fixed: + /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the + /// eq or gt bit of CR0 after executing andi. x, 1. This is used to + /// implement truncation of i32 or i64 to i1. + ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT, + // EH_SJLJ_SETJMP - SjLj exception handling setjmp. EH_SJLJ_SETJMP, @@ -515,6 +521,9 @@ namespace llvm { const PPCSubtarget &Subtarget) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; @@ -631,6 +640,8 @@ namespace llvm { SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const; SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index c5919fdaa3..50b210f41b 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -85,9 +85,16 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, []>, Requires<[In64BitMode]>; - def BCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), - "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB, - []>, + def BCCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), + "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB, + []>, + Requires<[In64BitMode]>; + + def BCCTR8 : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi), + "bcctr 12, $bi, 0", IIC_BrB, []>, + Requires<[In64BitMode]>; + def BCCTR8n : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi), + "bcctr 4, $bi, 0", IIC_BrB, []>, Requires<[In64BitMode]>; } } @@ -145,11 +152,19 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { "bctrl", IIC_BrB, [(PPCbctrl)]>, Requires<[In64BitMode]>; - let isCodeGenOnly = 1 in - def BCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), - "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB, - []>, - Requires<[In64BitMode]>; + let isCodeGenOnly = 1 in { + def BCCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), + "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB, + []>, + Requires<[In64BitMode]>; + + def BCCTRL8 : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi), + "bcctrl 12, $bi, 0", IIC_BrB, []>, + Requires<[In64BitMode]>; + def BCCTRL8n : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi), + "bcctrl 4, $bi, 0", IIC_BrB, []>, + Requires<[In64BitMode]>; + } } } } // Interpretation64Bit @@ -614,6 +629,16 @@ defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA), "rlwinm", "$rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, []>; +let isCommutable = 1 in { +// RLWIMI can be commuted if the rotate amount is zero. +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm RLWIMI8 : MForm_2r<20, (outs g8rc:$rA), + (ins g8rc:$rSi, g8rc:$rS, u5imm:$SH, u5imm:$MB, + u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", + IIC_IntRotate, []>, PPC970_DGroup_Cracked, + RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; +} + let isSelect = 1 in def ISEL8 : AForm_4<31, 15, (outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond), @@ -1020,6 +1045,14 @@ def : Pat<(i64 (anyext i32:$in)), def : Pat<(i32 (trunc i64:$in)), (EXTRACT_SUBREG $in, sub_32)>; +// Implement the 'not' operation with the NOR instruction. +// (we could use the default xori pattern, but nor has lower latency on some +// cores (such as the A2)). +def i64not : OutPatFrag<(ops node:$in), + (NOR8 $in, $in)>; +def : Pat<(not i64:$in), + (i64not $in)>; + // Extending loads with i64 targets. def : Pat<(zextloadi1 iaddr:$src), (LBZ8 iaddr:$src)>; diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index dae40c0cc2..9c7b17a4c2 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -163,6 +163,19 @@ class BForm_3<bits<6> opcode, bit aa, bit lk, let Inst{31} = lk; } +class BForm_4<bits<6> opcode, bits<5> bo, bit aa, bit lk, + dag OOL, dag IOL, string asmstr> + : I<opcode, OOL, IOL, asmstr, IIC_BrB> { + bits<5> BI; + bits<14> BD; + + let Inst{6-10} = bo; + let Inst{11-15} = BI; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + // 1.7.3 SC-Form class SCForm<bits<6> opcode, bits<1> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, @@ -677,6 +690,12 @@ class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk, let BH = 0; } +class XLForm_2_br2<bits<6> opcode, bits<10> xo, bits<5> bo, bit lk, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> + : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> { + let BO = bo; + let BH = 0; +} class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index b15ef1b9a6..18357c30d4 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -163,6 +163,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, case PPC::LFS: case PPC::LFD: case PPC::RESTORE_CR: + case PPC::RESTORE_CRBIT: case PPC::LVX: case PPC::RESTORE_VRSAVE: // Check for the operands added by addFrameReference (the immediate is the @@ -187,6 +188,7 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, case PPC::STFS: case PPC::STFD: case PPC::SPILL_CR: + case PPC::SPILL_CRBIT: case PPC::STVX: case PPC::SPILL_VRSAVE: // Check for the operands added by addFrameReference (the immediate is the @@ -209,7 +211,9 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // Normal instructions can be commuted the obvious way. if (MI->getOpcode() != PPC::RLWIMI && - MI->getOpcode() != PPC::RLWIMIo) + MI->getOpcode() != PPC::RLWIMIo && + MI->getOpcode() != PPC::RLWIMI8 && + MI->getOpcode() != PPC::RLWIMI8o) return TargetInstrInfo::commuteInstruction(MI, NewMI); // Cannot commute if it has a non-zero rotate count. @@ -332,6 +336,22 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, Cond.push_back(LastInst->getOperand(0)); Cond.push_back(LastInst->getOperand(1)); return false; + } else if (LastInst->getOpcode() == PPC::BC) { + if (!LastInst->getOperand(1).isMBB()) + return true; + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); + Cond.push_back(LastInst->getOperand(0)); + return false; + } else if (LastInst->getOpcode() == PPC::BCn) { + if (!LastInst->getOperand(1).isMBB()) + return true; + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET)); + Cond.push_back(LastInst->getOperand(0)); + return false; } else if (LastInst->getOpcode() == PPC::BDNZ8 || LastInst->getOpcode() == PPC::BDNZ) { if (!LastInst->getOperand(0).isMBB()) @@ -379,6 +399,26 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, Cond.push_back(SecondLastInst->getOperand(1)); FBB = LastInst->getOperand(0).getMBB(); return false; + } else if (SecondLastInst->getOpcode() == PPC::BC && + LastInst->getOpcode() == PPC::B) { + if (!SecondLastInst->getOperand(1).isMBB() || + !LastInst->getOperand(0).isMBB()) + return true; + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (SecondLastInst->getOpcode() == PPC::BCn && + LastInst->getOpcode() == PPC::B) { + if (!SecondLastInst->getOperand(1).isMBB() || + !LastInst->getOperand(0).isMBB()) + return true; + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET)); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; } else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 || SecondLastInst->getOpcode() == PPC::BDNZ) && LastInst->getOpcode() == PPC::B) { @@ -436,6 +476,7 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { --I; } if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC && + I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn && I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ && I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ) return 0; @@ -448,6 +489,7 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { if (I == MBB.begin()) return 1; --I; if (I->getOpcode() != PPC::BCC && + I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn && I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ && I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ) return 1; @@ -477,9 +519,13 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, BuildMI(&MBB, DL, get(Cond[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB); + else if (Cond[0].getImm() == PPC::PRED_BIT_SET) + BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB); + else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET) + BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB); else // Conditional branch BuildMI(&MBB, DL, get(PPC::BCC)) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB); return 1; } @@ -488,9 +534,13 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, BuildMI(&MBB, DL, get(Cond[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB); + else if (Cond[0].getImm() == PPC::PRED_BIT_SET) + BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB); + else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET) + BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB); else BuildMI(&MBB, DL, get(PPC::BCC)) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB); BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB); return 2; } @@ -575,6 +625,8 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break; case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break; case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break; + case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break; + case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break; } unsigned FirstReg = SwapOps ? FalseReg : TrueReg, @@ -668,39 +720,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, FrameIdx)); return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { - // FIXME: We use CRi here because there is no mtcrf on a bit. Since the - // backend currently only uses CR1EQ as an individual bit, this should - // not cause any bug. If we need other uses of CR bits, the following - // code may be invalid. - unsigned Reg = 0; - if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || - SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) - Reg = PPC::CR0; - else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || - SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) - Reg = PPC::CR1; - else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || - SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) - Reg = PPC::CR2; - else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || - SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) - Reg = PPC::CR3; - else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || - SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) - Reg = PPC::CR4; - else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || - SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) - Reg = PPC::CR5; - else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || - SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) - Reg = PPC::CR6; - else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || - SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) - Reg = PPC::CR7; - - return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, - &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS); - + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CRBIT)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + return true; } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX)) .addReg(SrcReg, @@ -786,36 +810,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, FrameIdx)); return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { - - unsigned Reg = 0; - if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT || - DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN) - Reg = PPC::CR0; - else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT || - DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN) - Reg = PPC::CR1; - else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT || - DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN) - Reg = PPC::CR2; - else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT || - DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN) - Reg = PPC::CR3; - else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT || - DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN) - Reg = PPC::CR4; - else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT || - DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN) - Reg = PPC::CR5; - else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT || - DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN) - Reg = PPC::CR6; - else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT || - DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN) - Reg = PPC::CR7; - - return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, - &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS); - + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_CRBIT), DestReg), + FrameIdx)); + return true; } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg), FrameIdx)); @@ -1006,9 +1004,17 @@ bool PPCInstrInfo::PredicateInstruction( MI->setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR))); - } else { + } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { MI->setDesc(get(PPC::BCLR)); MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); + } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { + MI->setDesc(get(PPC::BCLRn)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); + } else { + MI->setDesc(get(PPC::BCCLR)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) .addImm(Pred[0].getImm()) .addReg(Pred[1].getReg()); } @@ -1020,6 +1026,22 @@ bool PPCInstrInfo::PredicateInstruction( MI->setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))); + } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { + MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); + MI->RemoveOperand(0); + + MI->setDesc(get(PPC::BC)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()) + .addMBB(MBB); + } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { + MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); + MI->RemoveOperand(0); + + MI->setDesc(get(PPC::BCn)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()) + .addMBB(MBB); } else { MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); MI->RemoveOperand(0); @@ -1039,8 +1061,23 @@ bool PPCInstrInfo::PredicateInstruction( bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8; bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); - MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) : - (setLR ? PPC::BCCTRL : PPC::BCCTR))); + + if (Pred[0].getImm() == PPC::PRED_BIT_SET) { + MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) : + (setLR ? PPC::BCCTRL : PPC::BCCTR))); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); + return true; + } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { + MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n) : + (setLR ? PPC::BCCTRLn : PPC::BCCTRn))); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); + return true; + } + + MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8) : + (setLR ? PPC::BCCCTRL : PPC::BCCCTR))); MachineInstrBuilder(*MI->getParent()->getParent(), MI) .addImm(Pred[0].getImm()) .addReg(Pred[1].getReg()); @@ -1502,7 +1539,7 @@ protected: if (J->getOperand(2).getMBB() == &ReturnMBB) { // This is a conditional branch to the return. Replace the branch // with a bclr. - BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCLR)) + BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR)) .addImm(J->getOperand(0).getImm()) .addReg(J->getOperand(1).getReg()); MachineBasicBlock::iterator K = J--; @@ -1511,6 +1548,20 @@ protected: ++NumBCLR; continue; } + } else if (J->getOpcode() == PPC::BC || J->getOpcode() == PPC::BCn) { + if (J->getOperand(1).getMBB() == &ReturnMBB) { + // This is a conditional branch to the return. Replace the branch + // with a bclr. + BuildMI(**PI, J, J->getDebugLoc(), + TII->get(J->getOpcode() == PPC::BC ? + PPC::BCLR : PPC::BCLRn)) + .addReg(J->getOperand(0).getReg()); + MachineBasicBlock::iterator K = J--; + K->eraseFromParent(); + BlockChanged = true; + ++NumBCLR; + continue; + } } else if (J->isBranch()) { if (J->isIndirectBranch()) { if (ReturnMBB.hasAddressTaken()) diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 464e07cfda..14c70a23b0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -290,6 +290,12 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); }], HI16>; +def imm64ZExt32 : Operand<i64>, ImmLeaf<i64, [{ + // imm64ZExt32 predicate - True if the i64 immediate fits in a 32-bit + // zero extended field. + return isUInt<32>(Imm); +}]>; + // Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require // restricted memrix (4-aligned) constants are alignment sensitive. If these // offsets are hidden behind TOC entries than the values of the lower-order @@ -893,19 +899,44 @@ let usesCustomInserter = 1, // Expanded after instruction selection. def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F, i32imm:$BROPC), "#SELECT_CC_VRRC", []>; + + // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition + // register bit directly. + def SELECT_I4 : Pseudo<(outs gprc:$dst), (ins crbitrc:$cond, + gprc_nor0:$T, gprc_nor0:$F), "#SELECT_I4", + [(set i32:$dst, (select i1:$cond, i32:$T, i32:$F))]>; + def SELECT_I8 : Pseudo<(outs g8rc:$dst), (ins crbitrc:$cond, + g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8", + [(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>; + def SELECT_F4 : Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond, + f4rc:$T, f4rc:$F), "#SELECT_F4", + [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>; + def SELECT_F8 : Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond, + f8rc:$T, f8rc:$F), "#SELECT_F8", + [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>; + def SELECT_VRRC: Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond, + vrrc:$T, vrrc:$F), "#SELECT_VRRC", + [(set v4i32:$dst, + (select i1:$cond, v4i32:$T, v4i32:$F))]>; } // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to // scavenge a register for it. -let mayStore = 1 in +let mayStore = 1 in { def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F), "#SPILL_CR", []>; +def SPILL_CRBIT : Pseudo<(outs), (ins crbitrc:$cond, memri:$F), + "#SPILL_CRBIT", []>; +} // RESTORE_CR - Indicate that we're restoring the CR register (previously // spilled), so we'll need to scavenge a register for it. -let mayLoad = 1 in +let mayLoad = 1 in { def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F), "#RESTORE_CR", []>; +def RESTORE_CRBIT : Pseudo<(outs crbitrc:$cond), (ins memri:$F), + "#RESTORE_CRBIT", []>; +} let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { let isReturn = 1, Uses = [LR, RM] in @@ -915,10 +946,16 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, []>; - let isCodeGenOnly = 1 in - def BCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), - "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB, - []>; + let isCodeGenOnly = 1 in { + def BCCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), + "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB, + []>; + + def BCCTR : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi), + "bcctr 12, $bi, 0", IIC_BrB, []>; + def BCCTRn : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi), + "bcctr 4, $bi, 0", IIC_BrB, []>; + } } } @@ -946,10 +983,26 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { "b${cond:cc}a${cond:pm} ${cond:reg}, $dst">; let isReturn = 1, Uses = [LR, RM] in - def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond), + def BCCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond), "b${cond:cc}lr${cond:pm} ${cond:reg}", IIC_BrB, []>; } + let isCodeGenOnly = 1 in { + let Pattern = [(brcond i1:$bi, bb:$dst)] in + def BC : BForm_4<16, 12, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst), + "bc 12, $bi, $dst">; + + let Pattern = [(brcond (not i1:$bi), bb:$dst)] in + def BCn : BForm_4<16, 4, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst), + "bc 4, $bi, $dst">; + + let isReturn = 1, Uses = [LR, RM] in + def BCLR : XLForm_2_br2<19, 16, 12, 0, (outs), (ins crbitrc:$bi), + "bclr 12, $bi, 0", IIC_BrB, []>; + def BCLRn : XLForm_2_br2<19, 16, 4, 0, (outs), (ins crbitrc:$bi), + "bclr 4, $bi, 0", IIC_BrB, []>; + } + let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in { def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), "bdzlr", IIC_BrB, []>; @@ -1014,6 +1067,13 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">; def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst), "b${cond:cc}la${cond:pm} ${cond:reg}, $dst">; + + def BCL : BForm_4<16, 12, 0, 1, (outs), + (ins crbitrc:$bi, condbrtarget:$dst), + "bcl 12, $bi, $dst">; + def BCLn : BForm_4<16, 4, 0, 1, (outs), + (ins crbitrc:$bi, condbrtarget:$dst), + "bcl 4, $bi, $dst">; } } let Uses = [CTR, RM] in { @@ -1021,19 +1081,31 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { "bctrl", IIC_BrB, [(PPCbctrl)]>, Requires<[In32BitMode]>; - let isCodeGenOnly = 1 in - def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), - "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB, - []>; + let isCodeGenOnly = 1 in { + def BCCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), + "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB, + []>; + + def BCCTRL : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi), + "bcctrl 12, $bi, 0", IIC_BrB, []>; + def BCCTRLn : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi), + "bcctrl 4, $bi, 0", IIC_BrB, []>; + } } let Uses = [LR, RM] in { def BLRL : XLForm_2_ext<19, 16, 20, 0, 1, (outs), (ins), "blrl", IIC_BrB, []>; - let isCodeGenOnly = 1 in - def BCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond), - "b${cond:cc}lrl${cond:pm} ${cond:reg}", IIC_BrB, - []>; + let isCodeGenOnly = 1 in { + def BCCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond), + "b${cond:cc}lrl${cond:pm} ${cond:reg}", IIC_BrB, + []>; + + def BCLRL : XLForm_2_br2<19, 16, 12, 1, (outs), (ins crbitrc:$bi), + "bclrl 12, $bi, 0", IIC_BrB, []>; + def BCLRLn : XLForm_2_br2<19, 16, 4, 1, (outs), (ins crbitrc:$bi), + "bclrl 4, $bi, 0", IIC_BrB, []>; + } } let Defs = [CTR], Uses = [CTR, RM] in { def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst), @@ -1840,44 +1912,52 @@ def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA), def CRAND : XLForm_1<19, 257, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crand $CRD, $CRA, $CRB", IIC_BrCR, []>; + "crand $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (and i1:$CRA, i1:$CRB))]>; def CRNAND : XLForm_1<19, 225, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crnand $CRD, $CRA, $CRB", IIC_BrCR, []>; + "crnand $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (not (and i1:$CRA, i1:$CRB)))]>; def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "cror $CRD, $CRA, $CRB", IIC_BrCR, []>; + "cror $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (or i1:$CRA, i1:$CRB))]>; def CRXOR : XLForm_1<19, 193, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crxor $CRD, $CRA, $CRB", IIC_BrCR, []>; + "crxor $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (xor i1:$CRA, i1:$CRB))]>; def CRNOR : XLForm_1<19, 33, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crnor $CRD, $CRA, $CRB", IIC_BrCR, []>; + "crnor $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (not (or i1:$CRA, i1:$CRB)))]>; def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "creqv $CRD, $CRA, $CRB", IIC_BrCR, []>; + "creqv $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (not (xor i1:$CRA, i1:$CRB)))]>; def CRANDC : XLForm_1<19, 129, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crandc $CRD, $CRA, $CRB", IIC_BrCR, []>; + "crandc $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (and i1:$CRA, (not i1:$CRB)))]>; def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crorc $CRD, $CRA, $CRB", IIC_BrCR, []>; + "crorc $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>; let isCodeGenOnly = 1 in { def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins), "creqv $dst, $dst, $dst", IIC_BrCR, - []>; + [(set i1:$dst, 1)]>; def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins), "crxor $dst, $dst, $dst", IIC_BrCR, - []>; + [(set i1:$dst, 0)]>; let Defs = [CR1EQ], CRD = 6 in { def CR6SET : XLForm_1_ext<19, 289, (outs), (ins), @@ -2211,8 +2291,10 @@ def : Pat<(i32 imm:$imm), (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>; // Implement the 'not' operation with the NOR instruction. -def NOT : Pat<(not i32:$in), - (NOR $in, $in)>; +def i32not : OutPatFrag<(ops node:$in), + (NOR $in, $in)>; +def : Pat<(not i32:$in), + (i32not $in)>; // ADD an arbitrary immediate. def : Pat<(add i32:$in, imm:$imm), @@ -2350,6 +2432,514 @@ def : Pat<(fcopysign f32:$frB, f64:$frA), include "PPCInstrAltivec.td" include "PPCInstr64Bit.td" +def crnot : OutPatFrag<(ops node:$in), + (CRNOR $in, $in)>; +def : Pat<(not i1:$in), + (crnot $in)>; + +// Patterns for arithmetic i1 operations. +def : Pat<(add i1:$a, i1:$b), + (CRXOR $a, $b)>; +def : Pat<(sub i1:$a, i1:$b), + (CRXOR $a, $b)>; +def : Pat<(mul i1:$a, i1:$b), + (CRAND $a, $b)>; + +// We're sometimes asked to materialize i1 -1, which is just 1 in this case +// (-1 is used to mean all bits set). +def : Pat<(i1 -1), (CRSET)>; + +// i1 extensions, implemented in terms of isel. +def : Pat<(i32 (zext i1:$in)), + (SELECT_I4 $in, (LI 1), (LI 0))>; +def : Pat<(i32 (sext i1:$in)), + (SELECT_I4 $in, (LI -1), (LI 0))>; + +def : Pat<(i64 (zext i1:$in)), + (SELECT_I8 $in, (LI8 1), (LI8 0))>; +def : Pat<(i64 (sext i1:$in)), + (SELECT_I8 $in, (LI8 -1), (LI8 0))>; + +// FIXME: We should choose either a zext or a sext based on other constants +// already around. +def : Pat<(i32 (anyext i1:$in)), + (SELECT_I4 $in, (LI 1), (LI 0))>; +def : Pat<(i64 (anyext i1:$in)), + (SELECT_I8 $in, (LI8 1), (LI8 0))>; + +// match setcc on i1 variables. +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)), + (CRANDC $s2, $s1)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)), + (CRANDC $s2, $s1)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)), + (CRORC $s2, $s1)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)), + (CRORC $s2, $s1)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)), + (CREQV $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)), + (CRORC $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)), + (CRORC $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)), + (CRANDC $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)), + (CRANDC $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)), + (CRXOR $s1, $s2)>; + +// match setcc on non-i1 (non-vector) variables. Note that SETUEQ, SETOGE, +// SETOLE, SETONE, SETULT and SETUGT should be expanded by legalize for +// floating-point types. + +multiclass CRNotPat<dag pattern, dag result> { + def : Pat<pattern, (crnot result)>; + def : Pat<(not pattern), result>; + + // We can also fold the crnot into an extension: + def : Pat<(i32 (zext pattern)), + (SELECT_I4 result, (LI 0), (LI 1))>; + def : Pat<(i32 (sext pattern)), + (SELECT_I4 result, (LI 0), (LI -1))>; + + // We can also fold the crnot into an extension: + def : Pat<(i64 (zext pattern)), + (SELECT_I8 result, (LI8 0), (LI8 1))>; + def : Pat<(i64 (sext pattern)), + (SELECT_I8 result, (LI8 0), (LI8 -1))>; + + // FIXME: We should choose either a zext or a sext based on other constants + // already around. + def : Pat<(i32 (anyext pattern)), + (SELECT_I4 result, (LI 0), (LI 1))>; + + def : Pat<(i64 (anyext pattern)), + (SELECT_I8 result, (LI8 0), (LI8 1))>; +} + +// FIXME: Because of what seems like a bug in TableGen's type-inference code, +// we need to write imm:$imm in the output patterns below, not just $imm, or +// else the resulting matcher will not correctly add the immediate operand +// (making it a register operand instead). + +// extended SETCC. +multiclass ExtSetCCPat<CondCode cc, PatFrag pfrag, + OutPatFrag rfrag, OutPatFrag rfrag8> { + def : Pat<(i32 (zext (i1 (pfrag i32:$s1, cc)))), + (rfrag $s1)>; + def : Pat<(i64 (zext (i1 (pfrag i64:$s1, cc)))), + (rfrag8 $s1)>; + def : Pat<(i64 (zext (i1 (pfrag i32:$s1, cc)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>; + def : Pat<(i32 (zext (i1 (pfrag i64:$s1, cc)))), + (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>; + + def : Pat<(i32 (anyext (i1 (pfrag i32:$s1, cc)))), + (rfrag $s1)>; + def : Pat<(i64 (anyext (i1 (pfrag i64:$s1, cc)))), + (rfrag8 $s1)>; + def : Pat<(i64 (anyext (i1 (pfrag i32:$s1, cc)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>; + def : Pat<(i32 (anyext (i1 (pfrag i64:$s1, cc)))), + (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>; +} + +// Note that we do all inversions below with i(32|64)not, instead of using +// (xori x, 1) because on the A2 nor has single-cycle latency while xori +// has 2-cycle latency. + +defm : ExtSetCCPat<SETEQ, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (CNTLZW $in), 27, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (CNTLZD $in), 58, 63)> >; + +defm : ExtSetCCPat<SETNE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (i32not (CNTLZW $in)), 27, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (i64not (CNTLZD $in)), 58, 63)> >; + +defm : ExtSetCCPat<SETLT, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM $in, 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL $in, 1, 63)> >; + +defm : ExtSetCCPat<SETGE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (i32not $in), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (i64not $in), 1, 63)> >; + +defm : ExtSetCCPat<SETGT, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (ANDC (NEG $in), $in), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (ANDC8 (NEG8 $in), $in), 1, 63)> >; + +defm : ExtSetCCPat<SETLE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (ORC $in, (NEG $in)), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (ORC8 $in, (NEG8 $in)), 1, 63)> >; + +defm : ExtSetCCPat<SETLT, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, -1, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (AND $in, (ADDI $in, 1)), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (AND8 $in, (ADDI8 $in, 1)), 1, 63)> >; + +defm : ExtSetCCPat<SETGE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, -1, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (NAND $in, (ADDI $in, 1)), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (NAND8 $in, (ADDI8 $in, 1)), 1, 63)> >; + +defm : ExtSetCCPat<SETGT, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, -1, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (i32not $in), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (i64not $in), 1, 63)> >; + +defm : ExtSetCCPat<SETLE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, -1, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM $in, 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL $in, 1, 63)> >; + +// SETCC for i32. +def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULT)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLT)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGT)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGT)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; +def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; + +// For non-equality comparisons, the default code would materialize the +// constant, then compare against it, like this: +// lis r2, 4660 +// ori r2, r2, 22136 +// cmpw cr0, r3, r2 +// beq cr0,L6 +// Since we are just comparing for equality, we can emit this instead: +// xoris r0,r3,0x1234 +// cmplwi cr0,r0,0x5678 +// beq cr0,L6 + +def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETUGT)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; + +// SETCC for i64. +def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLT)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGT)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGT)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>; +def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>; + +// For non-equality comparisons, the default code would materialize the +// constant, then compare against it, like this: +// lis r2, 4660 +// ori r2, r2, 22136 +// cmpd cr0, r3, r2 +// beq cr0,L6 +// Since we are just comparing for equality, we can emit this instead: +// xoris r0,r3,0x1234 +// cmpldi cr0,r0,0x5678 +// beq cr0,L6 + +def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULE)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLE)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>; +defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETULE)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; + +// SETCC for f32. +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; + +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; + +// SETCC for f64. +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; + +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; + +// match select on i1 variables: +def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)), + (CROR (CRAND $cond , $tval), + (CRAND (crnot $cond), $fval))>; + +// match selectcc on i1 variables: +// select (lhs == rhs), tval, fval is: +// ((lhs == rhs) & tval) | (!(lhs == rhs) & fval) +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)), + (CROR (CRAND (CRANDC $rhs, $lhs), $tval), + (CRAND (CRORC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)), + (CROR (CRAND (CRORC $rhs, $lhs), $tval), + (CRAND (CRANDC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)), + (CROR (CRAND (CREQV $lhs, $rhs), $tval), + (CRAND (CRXOR $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)), + (CROR (CRAND (CRORC $lhs, $rhs), $tval), + (CRAND (CRANDC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)), + (CROR (CRAND (CRANDC $lhs, $rhs), $tval), + (CRAND (CRORC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)), + (CROR (CRAND (CREQV $lhs, $rhs), $fval), + (CRAND (CRXOR $lhs, $rhs), $tval))>; + +// match selectcc on i1 variables with non-i1 output. +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)), + (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)), + (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)), + (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)), + (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)), + (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)), + (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)), + (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)), + (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)), + (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)), + (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)), + (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)), + (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), + (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), + (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), + (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), + (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), + (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), + (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), + (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), + (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)), + (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)), + (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)), + (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)), + (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)), + (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)), + (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +let usesCustomInserter = 1 in { +def ANDIo_1_EQ_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in), + "#ANDIo_1_EQ_BIT", + [(set i1:$dst, (trunc (not i32:$in)))]>; +def ANDIo_1_GT_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in), + "#ANDIo_1_GT_BIT", + [(set i1:$dst, (trunc i32:$in))]>; + +def ANDIo_1_EQ_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in), + "#ANDIo_1_EQ_BIT8", + [(set i1:$dst, (trunc (not i64:$in)))]>; +def ANDIo_1_GT_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in), + "#ANDIo_1_GT_BIT8", + [(set i1:$dst, (trunc i64:$in))]>; +} + +def : Pat<(i1 (not (trunc i32:$in))), + (ANDIo_1_EQ_BIT $in)>; +def : Pat<(i1 (not (trunc i64:$in))), + (ANDIo_1_EQ_BIT8 $in)>; //===----------------------------------------------------------------------===// // PowerPC Instructions used for assembler/disassembler only @@ -2665,14 +3255,14 @@ multiclass BranchExtendedMnemonicPM<string name, string pm, int bibo> { (BCCA bibo, CR0, abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"lr"#pm#" $cc", - (BCLR bibo, crrc:$cc)>; + (BCCLR bibo, crrc:$cc)>; def : InstAlias<"b"#name#"lr"#pm, - (BCLR bibo, CR0)>; + (BCCLR bibo, CR0)>; def : InstAlias<"b"#name#"ctr"#pm#" $cc", - (BCCTR bibo, crrc:$cc)>; + (BCCCTR bibo, crrc:$cc)>; def : InstAlias<"b"#name#"ctr"#pm, - (BCCTR bibo, CR0)>; + (BCCCTR bibo, CR0)>; def : InstAlias<"b"#name#"l"#pm#" $cc, $dst", (BCCL bibo, crrc:$cc, condbrtarget:$dst)>; @@ -2685,14 +3275,14 @@ multiclass BranchExtendedMnemonicPM<string name, string pm, int bibo> { (BCCLA bibo, CR0, abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"lrl"#pm#" $cc", - (BCLRL bibo, crrc:$cc)>; + (BCCLRL bibo, crrc:$cc)>; def : InstAlias<"b"#name#"lrl"#pm, - (BCLRL bibo, CR0)>; + (BCCLRL bibo, CR0)>; def : InstAlias<"b"#name#"ctrl"#pm#" $cc", - (BCCTRL bibo, crrc:$cc)>; + (BCCCTRL bibo, crrc:$cc)>; def : InstAlias<"b"#name#"ctrl"#pm, - (BCCTRL bibo, CR0)>; + (BCCCTRL bibo, CR0)>; } multiclass BranchExtendedMnemonic<string name, int bibo> { defm : BranchExtendedMnemonicPM<name, "", bibo>; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 19ccbfcdb1..f716600f6b 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -452,6 +452,127 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, MBB.erase(II); } +static unsigned getCRFromCRBit(unsigned SrcReg) { + unsigned Reg = 0; + if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || + SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) + Reg = PPC::CR0; + else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || + SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) + Reg = PPC::CR1; + else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || + SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) + Reg = PPC::CR2; + else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || + SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) + Reg = PPC::CR3; + else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || + SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) + Reg = PPC::CR4; + else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || + SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) + Reg = PPC::CR5; + else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || + SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) + Reg = PPC::CR6; + else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || + SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) + Reg = PPC::CR7; + + assert(Reg != 0 && "Invalid CR bit register"); + return Reg; +} + +void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + // Get the instruction. + MachineInstr &MI = *II; // ; SPILL_CRBIT <SrcReg>, <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc dl = MI.getDebugLoc(); + + bool LP64 = Subtarget.isPPC64(); + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + unsigned SrcReg = MI.getOperand(0).getReg(); + + BuildMI(MBB, II, dl, TII.get(TargetOpcode::KILL), + getCRFromCRBit(SrcReg)) + .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); + + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg) + .addReg(getCRFromCRBit(SrcReg)); + + // If the saved register wasn't CR0LT, shift the bits left so that the bit to + // store is the first one. Mask all but that bit. + unsigned Reg1 = Reg; + Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + + // rlwinm rA, rA, ShiftBits, 0, 0. + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) + .addReg(Reg1, RegState::Kill) + .addImm(getEncodingValue(SrcReg)) + .addImm(0).addImm(0); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW)) + .addReg(Reg, RegState::Kill), + FrameIndex); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + // Get the instruction. + MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CRBIT <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc dl = MI.getDebugLoc(); + + bool LP64 = Subtarget.isPPC64(); + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + unsigned DestReg = MI.getOperand(0).getReg(); + assert(MI.definesRegister(DestReg) && + "RESTORE_CRBIT does not define its destination"); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ), + Reg), FrameIndex); + + BuildMI(MBB, II, dl, TII.get(TargetOpcode::IMPLICIT_DEF), DestReg); + + unsigned RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), RegO) + .addReg(getCRFromCRBit(DestReg)); + + unsigned ShiftBits = getEncodingValue(DestReg); + // rlwimi r11, r10, 32-ShiftBits, ..., ... + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWIMI8 : PPC::RLWIMI), RegO) + .addReg(RegO, RegState::Kill).addReg(Reg, RegState::Kill) + .addImm(ShiftBits ? 32-ShiftBits : 0) + .addImm(ShiftBits).addImm(ShiftBits); + + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF), + getCRFromCRBit(DestReg)) + .addReg(RegO, RegState::Kill) + // Make sure we have a use dependency all the way through this + // sequence of instructions. We can't have the other bits in the CR + // modified in between the mfocrf and the mtocrf. + .addReg(getCRFromCRBit(DestReg), RegState::Implicit); + + // Discard the pseudo instruction. + MBB.erase(II); +} + void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, unsigned FrameIndex) const { // Get the instruction. @@ -595,6 +716,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else if (OpC == PPC::RESTORE_CR) { lowerCRRestore(II, FrameIndex); return; + } else if (OpC == PPC::SPILL_CRBIT) { + lowerCRBitSpilling(II, FrameIndex); + return; + } else if (OpC == PPC::RESTORE_CRBIT) { + lowerCRBitRestore(II, FrameIndex); + return; } else if (OpC == PPC::SPILL_VRSAVE) { lowerVRSAVESpilling(II, FrameIndex); return; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 96b0b4bcd8..4871834c26 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -69,6 +69,10 @@ public: unsigned FrameIndex) const; void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex) const; + void lowerCRBitSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + void lowerCRBitRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; void lowerVRSAVESpilling(MachineBasicBlock::iterator II, unsigned FrameIndex) const; void lowerVRSAVERestore(MachineBasicBlock::iterator II, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index d566e2c3e5..f1ecda198f 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -204,17 +204,16 @@ def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>; -def CRBITRC : RegisterClass<"PPC", [i32], 32, - (add CR0LT, CR0GT, CR0EQ, CR0UN, - CR1LT, CR1GT, CR1EQ, CR1UN, - CR2LT, CR2GT, CR2EQ, CR2UN, +def CRBITRC : RegisterClass<"PPC", [i1], 32, + (add CR2LT, CR2GT, CR2EQ, CR2UN, CR3LT, CR3GT, CR3EQ, CR3UN, CR4LT, CR4GT, CR4EQ, CR4UN, CR5LT, CR5GT, CR5EQ, CR5UN, CR6LT, CR6GT, CR6EQ, CR6UN, - CR7LT, CR7GT, CR7EQ, CR7UN)> -{ - let CopyCost = -1; + CR7LT, CR7GT, CR7EQ, CR7UN, + CR1LT, CR1GT, CR1EQ, CR1UN, + CR0LT, CR0GT, CR0EQ, CR0UN)> { + let Size = 32; } def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6, diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index e01683247d..87b7c9f957 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -31,12 +31,24 @@ using namespace llvm; PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit) + const std::string &FS, bool is64Bit, + CodeGenOpt::Level OptLevel) : PPCGenSubtargetInfo(TT, CPU, FS) , IsPPC64(is64Bit) , TargetTriple(TT) { initializeEnvironment(); - resetSubtargetFeatures(CPU, FS); + + std::string FullFS = FS; + + // At -O2 and above, track CR bits as individual registers. + if (OptLevel >= CodeGenOpt::Default) { + if (!FullFS.empty()) + FullFS = "+crbits," + FullFS; + else + FullFS = "+crbits"; + } + + resetSubtargetFeatures(CPU, FullFS); } /// SetJITMode - This is called to inform the subtarget info that we are @@ -73,6 +85,7 @@ void PPCSubtarget::initializeEnvironment() { HasMFOCRF = false; Has64BitSupport = false; Use64BitRegs = false; + UseCRBits = false; HasAltivec = false; HasQPX = false; HasFCPSGN = false; diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index ec8c82ad52..a9159fbc73 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -73,6 +73,7 @@ protected: bool HasMFOCRF; bool Has64BitSupport; bool Use64BitRegs; + bool UseCRBits; bool IsPPC64; bool HasAltivec; bool HasQPX; @@ -103,7 +104,8 @@ public: /// of the specified triple. /// PPCSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit); + const std::string &FS, bool is64Bit, + CodeGenOpt::Level OptLevel); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -146,6 +148,10 @@ public: /// has64BitSupport() returns true. bool use64BitRegs() const { return Use64BitRegs; } + /// useCRBits - Return true if we should store and manipulate i1 values in + /// the individual condition register bits. + bool useCRBits() const { return UseCRBits; } + /// hasLazyResolverStub - Return true if accesses to the specified global have /// to go through a dyld lazy resolution stub. This means that an extra load /// is required to get the address of the global. diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 2e8d2d67fd..b6dffc205b 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -70,7 +70,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL, bool is64Bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, is64Bit), + Subtarget(TT, CPU, FS, is64Bit, OL), DL(getDataLayoutString(Subtarget)), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), diff --git a/test/CodeGen/PowerPC/bdzlr.ll b/test/CodeGen/PowerPC/bdzlr.ll index e487558e94..29b74c6c8c 100644 --- a/test/CodeGen/PowerPC/bdzlr.ll +++ b/test/CodeGen/PowerPC/bdzlr.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-crbits | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s -check-prefix=CHECK-CRB target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -54,6 +55,12 @@ for.end: ; preds = %for.body, %if.end, ; CHECK: bnelr ; CHECK: bdzlr ; CHECK-NOT: blr + +; CHECK-CRB: @lua_xmove +; CHECK-CRB: bclr 12, +; CHECK-CRB: bclr 12, +; CHECK-CRB: bdzlr +; CHECK-CRB-NOT: blr } attributes #0 = { nounwind } diff --git a/test/CodeGen/PowerPC/crbits.ll b/test/CodeGen/PowerPC/crbits.ll new file mode 100644 index 0000000000..998e940e8a --- /dev/null +++ b/test/CodeGen/PowerPC/crbits.ll @@ -0,0 +1,174 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; FIXME: For a number of these we load (1, 0) for the isel into two registers, +; whereas if we reverse the condition, we could use only one register (using ZERO +; for 0 in the isel). + +; Function Attrs: nounwind readnone +define zeroext i1 @test1(float %v1, float %v2) #0 { +entry: + %cmp = fcmp oge float %v1, %v2 + %cmp2 = fcmp ole float %v2, 0.000000e+00 + %and5 = and i1 %cmp, %cmp2 + ret i1 %and5 + +; CHECK-LABEL: @test1 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK-DAG: li [[REG1:[0-9]+]], 1 +; CHECK-DAG: lfs [[REG2:[0-9]+]], +; CHECK-DAG: li [[REG3:[0-9]+]], 0 +; CHECK-DAG: fcmpu {{[0-9]+}}, 2, [[REG2]] +; CHECK: crnor +; CHECK: crnor +; CHECK: crand [[REG4:[0-9]+]], +; CHECK: isel 3, [[REG1]], [[REG3]], [[REG4]] +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define zeroext i1 @test2(float %v1, float %v2) #0 { +entry: + %cmp = fcmp oge float %v1, %v2 + %cmp2 = fcmp ole float %v2, 0.000000e+00 + %xor5 = xor i1 %cmp, %cmp2 + ret i1 %xor5 + +; CHECK-LABEL: @test2 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK-DAG: li [[REG1:[0-9]+]], 1 +; CHECK-DAG: lfs [[REG2:[0-9]+]], +; CHECK-DAG: li [[REG3:[0-9]+]], 0 +; CHECK-DAG: fcmpu {{[0-9]+}}, 2, [[REG2]] +; CHECK: crnor +; CHECK: crnor +; CHECK: crxor [[REG4:[0-9]+]], +; CHECK: isel 3, [[REG1]], [[REG3]], [[REG4]] +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define zeroext i1 @test3(float %v1, float %v2, i32 signext %x) #0 { +entry: + %cmp = fcmp oge float %v1, %v2 + %cmp2 = fcmp ole float %v2, 0.000000e+00 + %cmp4 = icmp ne i32 %x, -2 + %and7 = and i1 %cmp2, %cmp4 + %xor8 = xor i1 %cmp, %and7 + ret i1 %xor8 + +; CHECK-LABEL: @test3 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK-DAG: li [[REG1:[0-9]+]], 1 +; CHECK-DAG: lfs [[REG2:[0-9]+]], +; CHECK-DAG: li [[REG3:[0-9]+]], 0 +; CHECK-DAG: fcmpu {{[0-9]+}}, 2, [[REG2]] +; CHECK: crnor +; CHECK: crnor +; CHECK: crandc +; CHECK: crxor [[REG4:[0-9]+]], +; CHECK: isel 3, [[REG1]], [[REG3]], [[REG4]] +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define zeroext i1 @test4(i1 zeroext %v1, i1 zeroext %v2, i1 zeroext %v3) #0 { +entry: + %and8 = and i1 %v1, %v2 + %or9 = or i1 %and8, %v3 + ret i1 %or9 + +; CHECK-DAG: @test4 +; CHECK: and [[REG1:[0-9]+]], 3, 4 +; CHECK: or 3, [[REG1]], 5 +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define zeroext i1 @test5(i1 zeroext %v1, i1 zeroext %v2, i32 signext %v3) #0 { +entry: + %and6 = and i1 %v1, %v2 + %cmp = icmp ne i32 %v3, -2 + %or7 = or i1 %and6, %cmp + ret i1 %or7 + +; CHECK-LABEL: @test5 +; CHECK-DAG: and [[REG1:[0-9]+]], 3, 4 +; CHECK-DAG: cmpwi {{[0-9]+}}, 5, -2 +; CHECK: li [[REG3:[0-9]+]], 1 +; CHECK: andi. {{[0-9]+}}, [[REG1]], 1 +; CHECK: li [[REG4:[0-9]+]], 0 +; CHECK: crorc [[REG5:[0-9]+]], +; CHECK: isel 3, [[REG3]], [[REG4]], [[REG5]] +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define zeroext i1 @test6(i1 zeroext %v1, i1 zeroext %v2, i32 signext %v3) #0 { +entry: + %cmp = icmp ne i32 %v3, -2 + %or6 = or i1 %cmp, %v2 + %and7 = and i1 %or6, %v1 + ret i1 %and7 + +; CHECK-LABEL: @test6 +; CHECK-DAG: andi. {{[0-9]+}}, 3, 1 +; CHECK-DAG: cmpwi {{[0-9]+}}, 5, -2 +; CHECK-DAG: cror [[REG1:[0-9]+]], 1, 1 +; CHECK: andi. {{[0-9]+}}, 4, 1 +; CHECK: li [[REG2:[0-9]+]], 1 +; CHECK: li [[REG3:[0-9]+]], 0 +; CHECK: crorc [[REG4:[0-9]+]], 1, +; CHECK: crand [[REG5:[0-9]+]], [[REG4]], [[REG1]] +; CHECK: isel 3, [[REG2]], [[REG3]], [[REG5]] +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define signext i32 @test7(i1 zeroext %v2, i32 signext %i1, i32 signext %i2) #0 { +entry: + %cond = select i1 %v2, i32 %i1, i32 %i2 + ret i32 %cond + +; CHECK-LABEL: @test7 +; CHECK: andi. {{[0-9]+}}, 3, 1 +; CHECK: isel [[REG1:[0-9]+]], 4, 5, 1 +; CHECK: extsw 3, [[REG1]] +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define float @test8(i1 zeroext %v2, float %v1, float %v3) #0 { +entry: + %cond = select i1 %v2, float %v1, float %v3 + ret float %cond + +; CHECK-LABEL: @test8 +; CHECK: andi. {{[0-9]+}}, 3, 1 +; CHECK: bclr 12, 1, 0 +; CHECK: fmr 1, 2 +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define signext i32 @test10(i32 signext %v1, i32 signext %v2) #0 { +entry: + %tobool = icmp ne i32 %v1, 0 + %lnot = icmp eq i32 %v2, 0 + %and3 = and i1 %tobool, %lnot + %and = zext i1 %and3 to i32 + ret i32 %and + +; CHECK-LABEL: @test10 +; CHECK-DAG: cmpwi {{[0-9]+}}, 3, 0 +; CHECK-DAG: cmpwi {{[0-9]+}}, 4, 0 +; CHECK-DAG: li [[REG1:[0-9]+]], 0 +; CHECK-DAG: li [[REG2:[0-9]+]], 1 +; CHECK: crandc [[REG3:[0-9]+]], +; CHECK: isel 3, [[REG2]], [[REG1]], [[REG3]] +; CHECK: blr +} + +attributes #0 = { nounwind readnone } + diff --git a/test/CodeGen/PowerPC/early-ret2.ll b/test/CodeGen/PowerPC/early-ret2.ll index a274e2c265..a8e456fea6 100644 --- a/test/CodeGen/PowerPC/early-ret2.ll +++ b/test/CodeGen/PowerPC/early-ret2.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-crbits | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s -check-prefix=CHECK-CRB target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -17,6 +18,9 @@ while.end: ; preds = %while.body, %while. ; CHECK: @_Z8example3iPiS_ ; CHECK: bnelr + +; CHECK-CRB: @_Z8example3iPiS_ +; CHECK-CRB: bclr 12, } attributes #0 = { noinline nounwind } diff --git a/test/CodeGen/PowerPC/fold-zero.ll b/test/CodeGen/PowerPC/fold-zero.ll index c7ec6fade5..c1eea43017 100644 --- a/test/CodeGen/PowerPC/fold-zero.ll +++ b/test/CodeGen/PowerPC/fold-zero.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-crbits | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck -check-prefix=CHECK-CRB %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -12,3 +13,13 @@ define i32 @test1(i1 %a, i32 %c) nounwind { ; CHECK: blr } +define i32 @test2(i1 %a, i32 %c) nounwind { + %x = select i1 %a, i32 0, i32 %c + ret i32 %x + +; CHECK-CRB: @test2 +; CHECK-CRB-NOT: li {{[0-9]+}}, 0 +; CHECK-CRB: isel 3, 0, +; CHECK-CRB: blr +} + diff --git a/test/CodeGen/PowerPC/optcmp.ll b/test/CodeGen/PowerPC/optcmp.ll index 35aabfa52c..d929eae206 100644 --- a/test/CodeGen/PowerPC/optcmp.ll +++ b/test/CodeGen/PowerPC/optcmp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -disable-ppc-cmp-opt=0 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -mattr=-crbits -disable-ppc-cmp-opt=0 | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" diff --git a/test/CodeGen/PowerPC/rlwimi-and.ll b/test/CodeGen/PowerPC/rlwimi-and.ll index 7963249ddf..213363ee81 100644 --- a/test/CodeGen/PowerPC/rlwimi-and.ll +++ b/test/CodeGen/PowerPC/rlwimi-and.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -mcpu=pwr7 -mattr=-crbits < %s | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-bgq-linux" diff --git a/test/CodeGen/PowerPC/sdag-ppcf128.ll b/test/CodeGen/PowerPC/sdag-ppcf128.ll index 535ece6d3d..c46bc6b22d 100644 --- a/test/CodeGen/PowerPC/sdag-ppcf128.ll +++ b/test/CodeGen/PowerPC/sdag-ppcf128.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mattr=-crbits < %s | FileCheck %s ; ; PR14751: Unsupported type in SelectionDAG::getConstantFP() diff --git a/test/CodeGen/PowerPC/setcc_no_zext.ll b/test/CodeGen/PowerPC/setcc_no_zext.ll index 9b2036e1dc..467e921f74 100644 --- a/test/CodeGen/PowerPC/setcc_no_zext.ll +++ b/test/CodeGen/PowerPC/setcc_no_zext.ll @@ -1,5 +1,9 @@ ; RUN: llc < %s -march=ppc32 | not grep rlwinm +; FIXME: This optimization has temporarily regressed with crbits enabled by +; default at the default CodeOpt level. +; XFAIL: * + define i32 @setcc_one_or_zero(i32* %a) { entry: %tmp.1 = icmp ne i32* %a, null ; <i1> [#uses=1] diff --git a/test/CodeGen/PowerPC/seteq-0.ll b/test/CodeGen/PowerPC/seteq-0.ll index 731958374e..b7dd78085e 100644 --- a/test/CodeGen/PowerPC/seteq-0.ll +++ b/test/CodeGen/PowerPC/seteq-0.ll @@ -1,9 +1,12 @@ -; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ -; RUN: grep "srwi r., r., 5" +; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s define i32 @eq0(i32 %a) { %tmp.1 = icmp eq i32 %a, 0 ; <i1> [#uses=1] %tmp.2 = zext i1 %tmp.1 to i32 ; <i32> [#uses=1] ret i32 %tmp.2 + +; CHECK: cntlzw [[REG:r[0-9]+]], r3 +; CHECK: rlwinm r3, [[REG]], 27, 31, 31 +; CHECK: blr } diff --git a/test/CodeGen/PowerPC/subsumes-pred-regs.ll b/test/CodeGen/PowerPC/subsumes-pred-regs.ll index 97ac788164..da637cd254 100644 --- a/test/CodeGen/PowerPC/subsumes-pred-regs.ll +++ b/test/CodeGen/PowerPC/subsumes-pred-regs.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=ppc64 | FileCheck %s +; RUN: llc < %s -mcpu=ppc64 -mattr=-crbits | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" |