diff options
21 files changed, 410 insertions, 66 deletions
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html index fc3a8b71bd..88fb12064b 100644 --- a/docs/ReleaseNotes.html +++ b/docs/ReleaseNotes.html @@ -647,6 +647,46 @@ Release Notes</a>.</h1> <!--=========================================================================--> <h3> +<a name="PowerPC">PowerPC Target Improvements</a> +</h3> + +<div> + +<ul> +<p>Many fixes and changes across LLVM (and Clang) for better compliance with + the 64-bit PowerPC ELF Application Binary Interface, interoperability with + GCC, and overall 64-bit PowerPC support. Some highlights include:</p> +<ul> + <li> MCJIT support added.</li> + <li> PPC64 relocation support and (small code model) TOC handling + added.</li> + <li> Parameter passing and return value fixes (alignment issues, + padding, varargs support, proper register usage, odd-sized + structure support, float support, extension of return values + for i32 return values).</li> + <li> Fixes in spill and reload code for vector registers.</li> + <li> C++ exception handling enabled.</li> + <li> Changes to remediate double-rounding compatibility issues with + respect to GCC behavior.</li> + <li> Refactoring to disentangle ppc64-elf-linux ABI from Darwin + ppc64 ABI support.</li> + <li> Assorted new test cases and test case fixes (endian and word + size issues).</li> + <li> Fixes for big-endian codegen bugs, instruction encodings, and + instruction constraints.</li> + <li> Implemented -integrated-as support.</li> + <li> Additional support for Altivec compare operations.</li> + <li> IBM long double support.</li> +</ul> +<p>There have also been code generation improvements for both 32- and 64-bit + code. Instruction scheduling support for the Freescale e500mc and e5500 + cores has been added.</p> +</ul> + +</div> + +<!--=========================================================================--> +<h3> <a name="OtherTS">Other Target Specific Improvements</a> </h3> diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h index b676e91eba..2cd267116c 100644 --- a/include/llvm/Support/ELF.h +++ b/include/llvm/Support/ELF.h @@ -457,7 +457,9 @@ enum { R_PPC_REL14 = 11, R_PPC_REL14_BRTAKEN = 12, R_PPC_REL14_BRNTAKEN = 13, - R_PPC_REL32 = 26 + R_PPC_REL32 = 26, + R_PPC_TPREL16_LO = 70, + R_PPC_TPREL16_HA = 72 }; // ELF Relocation types for PPC64 diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 0f260205df..497e000b68 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -84,7 +84,8 @@ namespace { bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs) const; + SmallVector<unsigned,2> &PhysDefs, + bool &PhysUseDef) const; bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, SmallSet<unsigned,8> &PhysRefs, SmallVector<unsigned,2> &PhysDefs, @@ -194,31 +195,52 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs) const{ - MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); + SmallVector<unsigned,2> &PhysDefs, + bool &PhysUseDef) const{ + // First, add all uses to PhysRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) + if (!MO.isReg() || MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; - // If the def is dead, it's ok. But the def may not marked "dead". That's - // common since this pass is run before livevariables. We can scan - // forward a few instructions and check if it is obviously dead. - if (MO.isDef() && - (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) - continue; // Reading constant physregs is ok. if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) PhysRefs.insert(*AI); - if (MO.isDef()) + } + + // Next, collect all defs into PhysDefs. If any is already in PhysRefs + // (which currently contains only uses), set the PhysUseDef flag. + PhysUseDef = false; + MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + // Check against PhysRefs even if the def is "dead". + if (PhysRefs.count(Reg)) + PhysUseDef = true; + // If the def is dead, it's ok. But the def may not marked "dead". That's + // common since this pass is run before livevariables. We can scan + // forward a few instructions and check if it is obviously dead. + if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end())) PhysDefs.push_back(Reg); } + // Finally, add all defs to PhysRefs as well. + for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) + for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); + return !PhysRefs.empty(); } @@ -459,16 +481,22 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool CrossMBBPhysDef = false; SmallSet<unsigned, 8> PhysRefs; SmallVector<unsigned, 2> PhysDefs; - if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) { + bool PhysUseDef = false; + if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, + PhysDefs, PhysUseDef)) { FoundCSE = false; // ... Unless the CS is local or is in the sole predecessor block // and it also defines the physical register which is not clobbered // in between and the physical register uses were not clobbered. - unsigned CSVN = VNT.lookup(MI); - MachineInstr *CSMI = Exps[CSVN]; - if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) - FoundCSE = true; + // This can never be the case if the instruction both uses and + // defines the same physical register, which was detected above. + if (!PhysUseDef) { + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) + FoundCSE = true; + } } if (!FoundCSE) { diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 3dd9bf5613..0a885ce1c4 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -68,7 +68,54 @@ static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src", cl::location(llvm::InterleaveSrcInPtx)); +namespace { +/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V +/// depends. +void DiscoverDependentGlobals(Value *V, + DenseSet<GlobalVariable*> &Globals) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + Globals.insert(GV); + else { + if (User *U = dyn_cast<User>(V)) { + for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { + DiscoverDependentGlobals(U->getOperand(i), Globals); + } + } + } +} +/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable +/// instances to be emitted, but only after any dependents have been added +/// first. +void VisitGlobalVariableForEmission(GlobalVariable *GV, + SmallVectorImpl<GlobalVariable*> &Order, + DenseSet<GlobalVariable*> &Visited, + DenseSet<GlobalVariable*> &Visiting) { + // Have we already visited this one? + if (Visited.count(GV)) return; + + // Do we have a circular dependency? + if (Visiting.count(GV)) + report_fatal_error("Circular dependency found in global variable set"); + + // Start visiting this global + Visiting.insert(GV); + + // Make sure we visit all dependents first + DenseSet<GlobalVariable*> Others; + for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) + DiscoverDependentGlobals(GV->getOperand(i), Others); + + for (DenseSet<GlobalVariable*>::iterator I = Others.begin(), + E = Others.end(); I != E; ++I) + VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); + + // Now we can visit ourself + Order.push_back(GV); + Visited.insert(GV); + Visiting.erase(GV); +} +} // @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we // cannot just link to the existing version. @@ -893,10 +940,27 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { emitDeclarations(M, OS2); - // Print out module-level global variables here. + // As ptxas does not support forward references of globals, we need to first + // sort the list of module-level globals in def-use order. We visit each + // global variable in order, and ensure that we emit it *after* its dependent + // globals. We use a little extra memory maintaining both a set and a list to + // have fast searches while maintaining a strict ordering. + SmallVector<GlobalVariable*,8> Globals; + DenseSet<GlobalVariable*> GVVisited; + DenseSet<GlobalVariable*> GVVisiting; + + // Visit each global variable, in order for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - printModuleLevelGV(I, OS2); + I != E; ++I) + VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); + + assert(GVVisited.size() == M.getGlobalList().size() && + "Missed a global variable"); + assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); + + // Print out module-level global variables in proper order + for (unsigned i = 0, e = Globals.size(); i != e; ++i) + printModuleLevelGV(Globals[i], OS2); OS2 << '\n'; diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index be771e3567..f1a99d77be 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -174,10 +174,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PTX does not support load / store predicate registers - setOperationAction(ISD::LOAD, MVT::i1, Expand); + setOperationAction(ISD::LOAD, MVT::i1, Custom); + setOperationAction(ISD::STORE, MVT::i1, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setOperationAction(ISD::STORE, MVT::i1, Expand); setTruncStoreAction(MVT::i64, MVT::i1, Expand); setTruncStoreAction(MVT::i32, MVT::i1, Expand); setTruncStoreAction(MVT::i16, MVT::i1, Expand); @@ -856,11 +857,64 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EXTRACT_SUBVECTOR: return Op; case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); default: llvm_unreachable("Custom lowering not defined for operation"); } } + +// v = ld i1* addr +// => +// v1 = ld i8* addr +// v = trunc v1 to i1 +SDValue NVPTXTargetLowering:: +LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + LoadSDNode *LD = cast<LoadSDNode>(Node); + DebugLoc dl = Node->getDebugLoc(); + assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ; + assert(Node->getValueType(0) == MVT::i1 && + "Custom lowering for i1 load only"); + SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), + LD->getAlignment()); + SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); + // The legalizer (the caller) is expecting two values from the legalized + // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() + // in LegalizeDAG.cpp which also uses MergeValues. + SDValue Ops[] = {result, LD->getChain()}; + return DAG.getMergeValues(Ops, 2, dl); +} + +// st i1 v, addr +// => +// v1 = zxt v to i8 +// st i8, addr +SDValue NVPTXTargetLowering:: +LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + StoreSDNode *ST = cast<StoreSDNode>(Node); + SDValue Tmp1 = ST->getChain(); + SDValue Tmp2 = ST->getBasePtr(); + SDValue Tmp3 = ST->getValue(); + assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, + MVT::i8, Tmp3); + SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + return Result; +} + + SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx, EVT v) const { diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 86246e6449..94a177ceb0 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -138,6 +138,9 @@ private: SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; }; } // namespace llvm diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index c3a683a2c6..3cfd9718e5 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -28,7 +28,6 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { std::string TargetName; NVPTX::DrvInterface drvInterface; - bool dummy; // For the 'dummy' feature, see NVPTX.td bool Is64Bit; // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31 diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 1518a60db8..dc93f7124a 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -74,10 +74,26 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, Type = ELF::R_PPC_ADDR14; // XXX: or BRNTAKEN?_ break; case PPC::fixup_ppc_ha16: - Type = ELF::R_PPC_ADDR16_HA; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TPREL16_HA: + Type = ELF::R_PPC_TPREL16_HA; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC_ADDR16_HA; + break; + } break; case PPC::fixup_ppc_lo16: - Type = ELF::R_PPC_ADDR16_LO; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TPREL16_LO: + Type = ELF::R_PPC_TPREL16_LO; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC_ADDR16_LO; + break; + } break; case PPC::fixup_ppc_lo14: Type = ELF::R_PPC_ADDR14; diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 4387730fcc..15d690bd89 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -54,12 +54,13 @@ #include "llvm/Support/ELF.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/MapVector.h" using namespace llvm; namespace { class PPCAsmPrinter : public AsmPrinter { protected: - DenseMap<MCSymbol*, MCSymbol*> TOC; + MapVector<MCSymbol*, MCSymbol*> TOC; const PPCSubtarget &Subtarget; uint64_t TOCLabelID; public: @@ -465,8 +466,7 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { SectionKind::getReadOnly()); OutStreamer.SwitchSection(Section); - // FIXME: This is nondeterminstic! - for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(), + for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(), E = TOC.end(); I != E; ++I) { OutStreamer.EmitLabel(I->second); MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName()); diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 6c2249a11b..9711452ec4 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -234,10 +234,10 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let Defs = [CTR8], Uses = [CTR8] in { - def BDZ8 : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), - "bdz $dst", BrB, []>; - def BDNZ8 : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), - "bdnz $dst", BrB, []>; + def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz $dst">; + def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz $dst">; } } @@ -511,7 +511,7 @@ def RLWINM8 : MForm_2<21, "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, []>; -def ISEL8 : AForm_1<31, 15, +def ISEL8 : AForm_4<31, 15, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; @@ -556,7 +556,7 @@ def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), "lwaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -606,7 +606,7 @@ def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -706,7 +706,7 @@ def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), let PPC970_Unit = 2 in { -def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, +def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index a41a0279d2..c3c171cd21 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -94,12 +94,6 @@ class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, let Inst{31} = lk; } -class IForm_ext<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list<dag> pattern> - : IForm<opcode, aa, lk, OOL, IOL, asmstr, itin, pattern> { - let LI{0-4} = bo; -} - // 1.7.2 B-Form class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> : I<opcode, OOL, IOL, asmstr, BrB> { @@ -118,6 +112,13 @@ class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> let Inst{31} = lk; } +class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, + string asmstr> + : BForm<opcode, aa, lk, OOL, IOL, asmstr> { + let BIBO{4-0} = bo; + let BIBO{6-5} = 0; + let CR = 0; +} // 1.7.4 D-Form class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr, @@ -625,9 +626,9 @@ class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : I<opcode, OOL, IOL, asmstr, itin> { bits<8> FXM; - bits<5> ST; + bits<5> rS; - let Inst{6-10} = ST; + let Inst{6-10} = rS; let Inst{11} = 0; let Inst{12-19} = FXM; let Inst{20} = 0; @@ -666,7 +667,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, string cstr, InstrItinClass itin, list<dag>pattern> : I<opcode, OOL, IOL, asmstr, itin> { bits<8> FM; - bits<5> RT; + bits<5> rT; bit RC = 0; // set by isDOT let Pattern = pattern; @@ -675,7 +676,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{6} = 0; let Inst{7-14} = FM; let Inst{15} = 0; - let Inst{16-20} = RT; + let Inst{16-20} = rT; let Inst{21-30} = xo; let Inst{31} = RC; } @@ -758,6 +759,26 @@ class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, let FRB = 0; } +class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<5> RA; + bits<5> RB; + bits<7> BIBO; // 2 bits of BI and 5 bits of BO (must be 12). + bits<3> CR; + + let Pattern = pattern; + + let Inst{6-10} = RT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-23} = CR; + let Inst{24-25} = BIBO{6-5}; + let Inst{26-30} = xo; + let Inst{31} = 0; +} + // 1.7.13 M-Form class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 3ef3bab957..6ee045a2c7 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -446,10 +446,10 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>; let Defs = [CTR], Uses = [CTR] in { - def BDZ : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), - "bdz $dst", BrB, []>; - def BDNZ : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), - "bdnz $dst", BrB, []>; + def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz $dst">; + def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz $dst">; } } @@ -732,7 +732,7 @@ def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -1395,13 +1395,13 @@ let Uses = [RM] in { "fdivs $FRT, $FRA, $FRB", FPDivS, [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>; def FMUL : AForm_3<63, 25, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fmul $FRT, $FRA, $FRB", FPFused, - [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>; + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC), + "fmul $FRT, $FRA, $FRC", FPFused, + [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRC))]>; def FMULS : AForm_3<59, 25, - (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), - "fmuls $FRT, $FRA, $FRB", FPGeneral, - [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>; + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC), + "fmuls $FRT, $FRA, $FRC", FPGeneral, + [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRC))]>; def FSUB : AForm_2<63, 20, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "fsub $FRT, $FRA, $FRB", FPAddSub, @@ -1414,7 +1414,7 @@ let Uses = [RM] in { } let PPC970_Unit = 1 in { // FXU Operations. - def ISEL : AForm_1<31, 15, + def ISEL : AForm_4<31, 15, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1f729e3133..b35fb514bf 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12729,8 +12729,8 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, case X86::ATOMSUB6432: { unsigned HiOpc; unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); - BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg).addReg(LoReg); - BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg).addReg(HiReg); + BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(LoReg).addReg(SrcLoReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(HiReg).addReg(SrcHiReg); break; } case X86::ATOMNAND6432: { diff --git a/test/CodeGen/NVPTX/global-ordering.ll b/test/CodeGen/NVPTX/global-ordering.ll new file mode 100644 index 0000000000..43394a79e9 --- /dev/null +++ b/test/CodeGen/NVPTX/global-ordering.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 + +; Make sure we emit these globals in def-use order + + +; PTX32: .visible .global .align 1 .u8 a = 2; +; PTX32-NEXT: .visible .global .align 4 .u32 a2 = a; +; PTX64: .visible .global .align 1 .u8 a = 2; +; PTX64-NEXT: .visible .global .align 8 .u64 a2 = a; +@a2 = addrspace(1) global i8 addrspace(1)* @a +@a = addrspace(1) global i8 2 + + +; PTX32: .visible .global .align 1 .u8 b = 1; +; PTX32-NEXT: .visible .global .align 4 .u32 b2[2] = {b, b}; +; PTX64: .visible .global .align 1 .u8 b = 1; +; PTX64-NEXT: .visible .global .align 8 .u64 b2[2] = {b, b}; +@b2 = addrspace(1) global [2 x i8 addrspace(1)*] [i8 addrspace(1)* @b, i8 addrspace(1)* @b] +@b = addrspace(1) global i8 1 diff --git a/test/CodeGen/NVPTX/pr13291-i1-store.ll b/test/CodeGen/NVPTX/pr13291-i1-store.ll new file mode 100644 index 0000000000..779f7798d8 --- /dev/null +++ b/test/CodeGen/NVPTX/pr13291-i1-store.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 + +define ptx_kernel void @t1(i1* %a) { +; PTX32: mov.u16 %rc{{[0-9]+}}, 0; +; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}; +; PTX64: mov.u16 %rc{{[0-9]+}}, 0; +; PTX64-NEXT: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}; + store i1 false, i1* %a + ret void +} + + +define ptx_kernel void @t2(i1* %a, i8* %b) { +; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: and.b16 temp, %rc{{[0-9]+}}, 1; +; PTX32: setp.b16.eq %p{{[0-9]+}}, temp, 1; +; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: and.b16 temp, %rc{{[0-9]+}}, 1; +; PTX64: setp.b16.eq %p{{[0-9]+}}, temp, 1; + + %t1 = load i1* %a + %t2 = select i1 %t1, i8 1, i8 2 + store i8 %t2, i8* %b + ret void +} diff --git a/test/CodeGen/PowerPC/2012-10-11-dynalloc.ll b/test/CodeGen/PowerPC/2012-10-11-dynalloc.ll new file mode 100644 index 0000000000..41533a8f32 --- /dev/null +++ b/test/CodeGen/PowerPC/2012-10-11-dynalloc.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define void @test(i64 %n) nounwind { +entry: + %0 = alloca i8, i64 %n, align 1 + %1 = alloca i8, i64 %n, align 1 + call void @use(i8* %0, i8* %1) nounwind + ret void +} + +declare void @use(i8*, i8*) + +; Check we actually have two instances of dynamic stack allocation, +; identified by the stdux used to update the back-chain link. +; CHECK: stdux +; CHECK: stdux diff --git a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll index 7d1cda35a2..3d058bc289 100644 --- a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll +++ b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll @@ -10,10 +10,10 @@ entry: ; CHECK: movl ([[REG:%[a-z]+]]), %eax ; CHECK: movl 4([[REG]]), %edx ; CHECK: LBB0_1: -; CHECK: movl $1 -; CHECK: addl -; CHECK: movl $0 -; CHECK: adcl +; CHECK: movl %eax, %ebx +; CHECK: addl {{%[a-z]+}}, %ebx +; CHECK: movl %edx, %ecx +; CHECK: adcl {{%[a-z]+}}, %ecx ; CHECK: lock ; CHECK-NEXT: cmpxchg8b ([[REG]]) ; CHECK-NEXT: jne diff --git a/test/CodeGen/X86/pr14314.ll b/test/CodeGen/X86/pr14314.ll new file mode 100644 index 0000000000..5388a4b01b --- /dev/null +++ b/test/CodeGen/X86/pr14314.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -mtriple=i386-pc-linux -mcpu=corei7 | FileCheck %s + +define i64 @atomicSub(i64* %a, i64 %b) nounwind { +entry: + %0 = atomicrmw sub i64* %a, i64 %b seq_cst + ret i64 %0 +; CHECK: atomicSub +; movl %eax, %ebx +; subl {{%[a-z]+}}, %ebx +; movl %edx, %ecx +; sbbl {{%[a-z]+}}, %ecx +; CHECK: ret +} diff --git a/test/ExecutionEngine/MCJIT/lit.local.cfg b/test/ExecutionEngine/MCJIT/lit.local.cfg index af3d13f746..7bcb9ae82c 100644 --- a/test/ExecutionEngine/MCJIT/lit.local.cfg +++ b/test/ExecutionEngine/MCJIT/lit.local.cfg @@ -8,12 +8,13 @@ def getRoot(config): root = getRoot(config) targets = set(root.targets_to_build.split()) -if ('X86' in targets) | ('ARM' in targets) | ('Mips' in targets): +if ('X86' in targets) | ('ARM' in targets) | ('Mips' in targets) | \ + ('PowerPC' in targets): config.unsupported = False else: config.unsupported = True -if root.host_arch not in ['x86', 'x86_64', 'ARM', 'Mips']: +if root.host_arch not in ['x86', 'x86_64', 'ARM', 'Mips', 'PowerPC']: config.unsupported = True if root.host_os in ['Darwin']: diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg index 19eebc0ac7..f0343263db 100644 --- a/test/ExecutionEngine/lit.local.cfg +++ b/test/ExecutionEngine/lit.local.cfg @@ -1 +1,12 @@ config.suffixes = ['.ll', '.c', '.cpp'] + +def getRoot(config): + if not config.parent: + return config + return getRoot(config.parent) + +root = getRoot(config) + +if root.host_arch in ['PowerPC']: + config.unsupported = True + diff --git a/test/MC/PowerPC/ppc64-tls-relocs-01.ll b/test/MC/PowerPC/ppc64-tls-relocs-01.ll new file mode 100644 index 0000000000..5e37311075 --- /dev/null +++ b/test/MC/PowerPC/ppc64-tls-relocs-01.ll @@ -0,0 +1,28 @@ +;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj %s -o - | \ +;; RUN: elf-dump --dump-section-data | FileCheck %s + +;; FIXME: this file should be in .s form, change when asm parser is available. + +@t = thread_local global i32 0, align 4 + +define i32* @f() nounwind { +entry: + ret i32* @t +} + +;; Check for a pair of R_PPC64_TPREL16_HA / R_PPC64_TPREL16_LO relocs +;; against the thread-local symbol 't'. +;; CHECK: '.rela.text' +;; CHECK: Relocation 0 +;; CHECK-NEXT: 'r_offset', +;; CHECK-NEXT: 'r_sym', 0x00000008 +;; CHECK-NEXT: 'r_type', 0x00000048 +;; CHECK: Relocation 1 +;; CHECK-NEXT: 'r_offset', +;; CHECK-NEXT: 'r_sym', 0x00000008 +;; CHECK-NEXT: 'r_type', 0x00000046 + +;; Check that we got the correct symbol. +;; CHECK: Symbol 8 +;; CHECK-NEXT: 't' + |