diff options
Diffstat (limited to 'lib')
112 files changed, 3023 insertions, 831 deletions
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index c0009cb989..674ce3aea7 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -89,7 +89,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis<AliasAnalysis>(); TD = getAnalysisIfAvailable<DataLayout>(); DT = getAnalysisIfAvailable<DominatorTree>(); - if (PredCache == 0) + if (!PredCache) PredCache.reset(new PredIteratorCache()); return false; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index eb744d243b..7ad4f57f75 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -636,14 +636,13 @@ void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding()); MachineModuleInfo &MMI = MF->getMMI(); - const std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + std::vector<MCCFIInstruction> Instructions = MMI.getFrameInstructions(); bool FoundOne = false; (void)FoundOne; - for (std::vector<MachineMove>::const_iterator I = Moves.begin(), - E = Moves.end(); - I != E; ++I) { + for (std::vector<MCCFIInstruction>::iterator I = Instructions.begin(), + E = Instructions.end(); I != E; ++I) { if (I->getLabel() == Label) { - EmitCFIFrameMove(*I); + emitCFIInstruction(*I); FoundOne = true; } } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 31e42d47cf..e6d67e8822 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -169,28 +169,21 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, // Dwarf Lowering Routines //===----------------------------------------------------------------------===// -/// EmitCFIFrameMove - Emit a frame instruction. -void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const { - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - - const MachineLocation &Dst = Move.getDestination(); - const MachineLocation &Src = Move.getSource(); - - // If advancing cfa. - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - if (Src.getReg() == MachineLocation::VirtualFP) { - OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); - } else { - // Reg + Offset - OutStreamer.EmitCFIDefCfa(RI->getDwarfRegNum(Src.getReg(), true), - Src.getOffset()); - } - } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { - assert(Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); - } else { - assert(!Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), - Dst.getOffset()); +void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { + switch (Inst.getOperation()) { + default: + llvm_unreachable("Unexpected instruction"); + case MCCFIInstruction::OpDefCfaOffset: + OutStreamer.EmitCFIDefCfaOffset(Inst.getOffset()); + break; + case MCCFIInstruction::OpDefCfa: + OutStreamer.EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); + break; + case MCCFIInstruction::OpDefCfaRegister: + OutStreamer.EmitCFIDefCfaRegister(Inst.getRegister()); + break; + case MCCFIInstruction::OpOffset: + OutStreamer.EmitCFIOffset(Inst.getRegister(), Inst.getOffset()); + break; } } diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 673867ada1..cc0cb56e8b 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -114,8 +114,8 @@ DIE::~DIE() { /// Climb up the parent chain to get the compile unit DIE to which this DIE /// belongs. -DIE *DIE::getCompileUnit() const { - DIE *p = getParent(); +DIE *DIE::getCompileUnit() { + DIE *p = this; while (p) { if (p->getTag() == dwarf::DW_TAG_compile_unit) return p; diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 3c06001686..550d873128 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -153,7 +153,7 @@ namespace llvm { DIE *getParent() const { return Parent; } /// Climb up the parent chain to get the compile unit DIE this DIE belongs /// to. - DIE *getCompileUnit() const; + DIE *getCompileUnit(); void setTag(unsigned Tag) { Abbrev.setTag(Tag); } void setOffset(unsigned O) { Offset = O; } void setSize(unsigned S) { Size = S; } @@ -325,7 +325,9 @@ namespace llvm { class DIEEntry : public DIEValue { DIE *const Entry; public: - explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {} + explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) { + assert(E && "Cannot construct a DIEEntry with a null DIE"); + } DIE *getEntry() const { return Entry; } diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index 74b1b13367..49a85d81b4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -23,7 +23,6 @@ namespace llvm { template <typename T> class SmallVectorImpl; struct LandingPadInfo; class MachineModuleInfo; -class MachineMove; class MachineInstr; class MachineFunction; class MCAsmInfo; diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 1a09837834..7ce5cc6f67 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -62,14 +62,8 @@ static bool getVerboseAsm() { llvm_unreachable("Invalid verbose asm state"); } -LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, - StringRef CPU, StringRef FS, - TargetOptions Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : TargetMachine(T, Triple, CPU, FS, Options) { - CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); - AsmInfo = T.createMCAsmInfo(Triple); +void LLVMTargetMachine::initAsmInfo() { + AsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), TargetTriple); // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, // and if the old one gets included then MCAsmInfo will be NULL and // we'll crash later. @@ -79,6 +73,15 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, "and that InitializeAllTargetMCs() is being invoked!"); } +LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, + StringRef CPU, StringRef FS, + TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : TargetMachine(T, Triple, CPU, FS, Options) { + CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); +} + void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) { PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); } diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 8af9d053b1..74cf9f50df 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -268,6 +268,39 @@ MachineModuleInfo::MachineModuleInfo() MachineModuleInfo::~MachineModuleInfo() { } +static MCCFIInstruction convertMoveToCFI(const MCRegisterInfo &MRI, + MCSymbol *Label, + const MachineLocation &Dst, + const MachineLocation &Src) { + // If advancing cfa. + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + if (Src.getReg() == MachineLocation::VirtualFP) + return MCCFIInstruction::createDefCfaOffset(Label, Src.getOffset()); + // Reg + Offset + return MCCFIInstruction::createDefCfa( + Label, MRI.getDwarfRegNum(Src.getReg(), true), -Src.getOffset()); + } + + if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { + assert(Dst.isReg() && "Machine move not supported yet."); + return MCCFIInstruction::createDefCfaRegister( + Label, MRI.getDwarfRegNum(Dst.getReg(), true)); + } + + assert(!Dst.isReg() && "Machine move not supported yet."); + return MCCFIInstruction::createOffset( + Label, MRI.getDwarfRegNum(Src.getReg(), true), Dst.getOffset()); +} + + +void MachineModuleInfo::addFrameMove(MCSymbol *Label, + const MachineLocation &Dst, + const MachineLocation &Src) { + MCCFIInstruction I = + convertMoveToCFI(Context.getRegisterInfo(), Label, Dst, Src); + FrameInstructions.push_back(I); +} + bool MachineModuleInfo::doInitialization(Module &M) { ObjFileMMI = 0; @@ -303,7 +336,7 @@ bool MachineModuleInfo::doFinalization(Module &M) { /// void MachineModuleInfo::EndFunction() { // Clean up frame info. - FrameMoves.clear(); + FrameInstructions.clear(); // Clean up exception info. LandingPads.clear(); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 9eed1fc62a..49748289da 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -713,7 +713,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, Intf.moveToBlock(BC.Number); BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; - BC.ChangesValue = BI.FirstDef; + BC.ChangesValue = BI.FirstDef.isValid(); if (!Intf.hasInterference()) continue; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c54dffbb13..a8621a89a8 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9254,19 +9254,33 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { for (unsigned I = 0; I != NumConcats; ++I) { // Make sure we're dealing with a copy. unsigned Begin = I * NumElemsPerConcat; - if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) - return SDValue(); + bool AllUndef = true, NoUndef = true; + for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { + if (SVN->getMaskElt(J) >= 0) + AllUndef = false; + else + NoUndef = false; + } - for (unsigned J = 1; J != NumElemsPerConcat; ++J) { - if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + if (NoUndef) { + if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) return SDValue(); - } - unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; - if (FirstElt < N0.getNumOperands()) - Ops.push_back(N0.getOperand(FirstElt)); - else - Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + for (unsigned J = 1; J != NumElemsPerConcat; ++J) + if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + return SDValue(); + + unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; + if (FirstElt < N0.getNumOperands()) + Ops.push_back(N0.getOperand(FirstElt)); + else + Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + + } else if (AllUndef) { + Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); + } else { // Mixed with general masks and undefs, can't do optimization. + return SDValue(); + } } return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(), diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 3903743878..23984e9986 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -222,7 +222,9 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { PersonalityFn = LPads[0]->getPersonalityFn(); Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3, "pers_fn_gep"); - Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true); + Builder.CreateStore(Builder.CreateBitCast(PersonalityFn, + Builder.getInt8PtrTy()), + PersonalityFieldPtr, /*isVolatile=*/true); // LSDA address Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr"); diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index a789a2596d..90b93aaa72 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -42,6 +42,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/DebugInfo.h" #include "llvm/IR/Function.h" @@ -528,6 +529,10 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { if (!V) continue; + const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V); + if (PSV && PSV->isConstant(MFI)) + continue; + // Climb up and find the original alloca. V = GetUnderlyingObject(V); // If we did not find one, or if the one that we found is not in our diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index e43ba4f1dd..0191636307 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "jit" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/ExecutionEngine/GenericValue.h" @@ -47,7 +48,7 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)( ExecutionEngine *(*ExecutionEngine::MCJITCtor)( Module *M, std::string *ErrorStr, - JITMemoryManager *JMM, + RTDyldMemoryManager *MCJMM, bool GVsWithCode, TargetMachine *TM) = 0; ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M, @@ -455,10 +456,12 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr)) return 0; + assert(!(JMM && MCJMM)); + // If the user specified a memory manager but didn't specify which engine to // create, we assume they only want the JIT, and we fail if they only want // the interpreter. - if (JMM) { + if (JMM || MCJMM) { if (WhichEngine & EngineKind::JIT) WhichEngine = EngineKind::JIT; else { @@ -467,6 +470,14 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { return 0; } } + + if (MCJMM && ! UseMCJIT) { + if (ErrorStr) + *ErrorStr = + "Cannot create a legacy JIT with a runtime dyld memory " + "manager."; + return 0; + } // Unless the interpreter was explicitly selected or the JIT is not linked, // try making a JIT. @@ -480,7 +491,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { if (UseMCJIT && ExecutionEngine::MCJITCtor) { ExecutionEngine *EE = - ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, + ExecutionEngine::MCJITCtor(M, ErrorStr, MCJMM ? MCJMM : JMM, AllocateGVsWithCode, TheTM.take()); if (EE) return EE; } else if (ExecutionEngine::JITCtor) { diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 38aa5474a3..ced567205a 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -39,7 +39,7 @@ extern "C" void LLVMLinkInMCJIT() { ExecutionEngine *MCJIT::createJIT(Module *M, std::string *ErrorStr, - JITMemoryManager *JMM, + RTDyldMemoryManager *MemMgr, bool GVsWithCode, TargetMachine *TM) { // Try to register the program as a source of symbols to resolve against. @@ -47,14 +47,14 @@ ExecutionEngine *MCJIT::createJIT(Module *M, // FIXME: Don't do this here. sys::DynamicLibrary::LoadLibraryPermanently(0, NULL); - return new MCJIT(M, TM, JMM ? JMM : new SectionMemoryManager(), GVsWithCode); + return new MCJIT(M, TM, MemMgr ? MemMgr : new SectionMemoryManager(), + GVsWithCode); } MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM, bool AllocateGVsWithCode) - : ExecutionEngine(m), TM(tm), Ctx(0), - MemMgr(MM ? MM : new SectionMemoryManager()), Dyld(MemMgr), - IsLoaded(false), M(m), ObjCache(0) { + : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM), + IsLoaded(false), M(m), ObjCache(0) { setDataLayout(TM->getDataLayout()); } diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index 8c4bf6e1db..7f247e2dee 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -98,7 +98,7 @@ public: static ExecutionEngine *createJIT(Module *M, std::string *ErrorStr, - JITMemoryManager *JMM, + RTDyldMemoryManager *MemMgr, bool GVsWithCode, TargetMachine *TM); diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index 89a3c0578c..81d7efa774 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -112,21 +112,20 @@ bool Value::hasNUsesOrMore(unsigned N) const { /// isUsedInBasicBlock - Return true if this value is used in the specified /// basic block. bool Value::isUsedInBasicBlock(const BasicBlock *BB) const { - // Start by scanning over the instructions looking for a use before we start - // the expensive use iteration. - unsigned MaxBlockSize = 3; - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (std::find(I->op_begin(), I->op_end(), this) != I->op_end()) + // This can be computed either by scanning the instructions in BB, or by + // scanning the use list of this Value. Both lists can be very long, but + // usually one is quite short. + // + // Scan both lists simultaneously until one is exhausted. This limits the + // search to the shorter list. + BasicBlock::const_iterator BI = BB->begin(), BE = BB->end(); + const_use_iterator UI = use_begin(), UE = use_end(); + for (; BI != BE && UI != UE; ++BI, ++UI) { + // Scan basic block: Check if this Value is used by the instruction at BI. + if (std::find(BI->op_begin(), BI->op_end(), this) != BI->op_end()) return true; - if (--MaxBlockSize == 0) // If the block is larger fall back to use_iterator - break; - } - - if (MaxBlockSize != 0) // We scanned the entire block and found no use. - return false; - - for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) { - const Instruction *User = dyn_cast<Instruction>(*I); + // Scan use list: Check if the use at UI is in BB. + const Instruction *User = dyn_cast<Instruction>(*UI); if (User && User->getParent() == BB) return true; } diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 3d995484e7..4f66156f6d 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -759,9 +759,6 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); - // FIXME: no tests cover this. Is adjustFixupOffset dead code? - TargetObjectWriter->adjustFixupOffset(Fixup, RelocOffset); - if (!hasRelocationAddend()) Addend = 0; diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index 4766b37476..d3c019246c 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -42,8 +42,12 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error); assert(TheTarget && "Unable to create target!"); + const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple); + if (!MRI) + return 0; + // Get the assembler info needed to setup the MCContext. - const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(Triple); + const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, Triple); if (!MAI) return 0; @@ -51,10 +55,6 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, if (!MII) return 0; - const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple); - if (!MRI) - return 0; - // Package up features to be passed to target/subtarget std::string FeaturesStr; diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 7640a63ee3..efe0c46db8 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -873,17 +873,6 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol, streamer.EmitValue(v, size); } -static const MachineLocation TranslateMachineLocation( - const MCRegisterInfo &MRI, - const MachineLocation &Loc) { - unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ? - MachineLocation::VirtualFP : - unsigned(MRI.getDwarfRegNum(Loc.getReg(), true)); - const MachineLocation &NewLoc = Loc.isReg() ? - MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset()); - return NewLoc; -} - namespace { class FrameEmitterImpl { int CFAOffset; @@ -1316,32 +1305,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, // Initial Instructions const MCAsmInfo &MAI = context.getAsmInfo(); - const std::vector<MachineMove> &Moves = MAI.getInitialFrameState(); - std::vector<MCCFIInstruction> Instructions; - - for (int i = 0, n = Moves.size(); i != n; ++i) { - MCSymbol *Label = Moves[i].getLabel(); - const MachineLocation &Dst = - TranslateMachineLocation(MRI, Moves[i].getDestination()); - const MachineLocation &Src = - TranslateMachineLocation(MRI, Moves[i].getSource()); - - if (Dst.isReg()) { - assert(Dst.getReg() == MachineLocation::VirtualFP); - assert(!Src.isReg()); - MCCFIInstruction Inst = - MCCFIInstruction::createDefCfa(Label, Src.getReg(), -Src.getOffset()); - Instructions.push_back(Inst); - } else { - assert(Src.isReg()); - unsigned Reg = Src.getReg(); - int Offset = Dst.getOffset(); - MCCFIInstruction Inst = - MCCFIInstruction::createOffset(Label, Reg, Offset); - Instructions.push_back(Inst); - } - } - + const std::vector<MCCFIInstruction> &Instructions = + MAI.getInitialFrameState(); EmitCFIInstructions(streamer, Instructions, NULL); // Padding diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp index 4cac84d666..ec7397d748 100644 --- a/lib/MC/MCELFObjectTargetWriter.cpp +++ b/lib/MC/MCELFObjectTargetWriter.cpp @@ -39,13 +39,23 @@ const MCSymbol *MCELFObjectTargetWriter::undefinedExplicitRelSym(const MCValue & return &Symbol.AliasedSymbol(); } -void MCELFObjectTargetWriter::adjustFixupOffset(const MCFixup &Fixup, - uint64_t &RelocOffset) { +// ELF doesn't require relocations to be in any order. We sort by the r_offset, +// just to match gnu as for easier comparison. The use type and index is an +// arbitrary way of making the sort deterministic. +static int cmpRel(const void *AP, const void *BP) { + const ELFRelocationEntry &A = *(const ELFRelocationEntry *)AP; + const ELFRelocationEntry &B = *(const ELFRelocationEntry *)BP; + if (A.r_offset != B.r_offset) + return B.r_offset - A.r_offset; + if (B.Type != A.Type) + return A.Type - B.Type; + if (B.Index != A.Index) + return B.Index - A.Index; + llvm_unreachable("ELFRelocs might be unstable!"); } void MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm, std::vector<ELFRelocationEntry> &Relocs) { - // Sort by the r_offset, just like gnu as does. - array_pod_sort(Relocs.begin(), Relocs.end()); + array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel); } diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index edefdb4c36..f7c71e97e3 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -545,7 +545,7 @@ bool AsmParser::EnterIncludeFile(const std::string &Filename) { return false; } -/// Process the specified .incbin file by seaching for it in the include paths +/// Process the specified .incbin file by searching for it in the include paths /// then just emitting the byte contents of the file to the streamer. This /// returns true on failure. bool AsmParser::ProcessIncbinFile(const std::string &Filename) { diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index af14c72145..654af081f9 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -339,7 +339,7 @@ static void printRelocationTargetName(const MachOObjectFile *O, StringRef S; bool isExtern = O->getPlainRelocationExternal(RE); - uint64_t Val = O->getAnyRelocationAddress(RE); + uint64_t Val = O->getPlainRelocationSymbolNum(RE); if (isExtern) { symbol_iterator SI = O->begin_symbols(); @@ -347,7 +347,8 @@ static void printRelocationTargetName(const MachOObjectFile *O, SI->getName(S); } else { section_iterator SI = O->begin_sections(); - advanceTo(SI, Val); + // Adjust for the fact that sections are 1-indexed. + advanceTo(SI, Val - 1); SI->getName(S); } diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 6182e34150..57e60dac45 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -872,7 +872,21 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; exponent += rhs.exponent; + // Assume the operands involved in the multiplication are single-precision + // FP, and the two multiplicants are: + // *this = a23 . a22 ... a0 * 2^e1 + // rhs = b23 . b22 ... b0 * 2^e2 + // the result of multiplication is: + // *this = c47 c46 . c45 ... c0 * 2^(e1+e2) + // Note that there are two significant bits at the left-hand side of the + // radix point. Move the radix point toward left by one bit, and adjust + // exponent accordingly. + exponent += 1; + if (addend) { + // The intermediate result of the multiplication has "2 * precision" + // signicant bit; adjust the addend to be consistent with mul result. + // Significand savedSignificand = significand; const fltSemantics *savedSemantics = semantics; fltSemantics extendedSemantics; @@ -880,8 +894,9 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) unsigned int extendedPrecision; /* Normalize our MSB. */ - extendedPrecision = precision + precision - 1; + extendedPrecision = 2 * precision; if (omsb != extendedPrecision) { + assert(extendedPrecision > omsb); APInt::tcShiftLeft(fullSignificand, newPartsCount, extendedPrecision - omsb); exponent -= extendedPrecision - omsb; @@ -912,8 +927,18 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; } - exponent -= (precision - 1); + // Convert the result having "2 * precision" significant-bits back to the one + // having "precision" significant-bits. First, move the radix point from + // poision "2*precision - 1" to "precision - 1". The exponent need to be + // adjusted by "2*precision - 1" - "precision - 1" = "precision". + exponent -= precision; + // In case MSB resides at the left-hand side of radix point, shift the + // mantissa right by some amount to make sure the MSB reside right before + // the radix point (i.e. "MSB . rest-significant-bits"). + // + // Note that the result is not normalized when "omsb < precision". So, the + // caller needs to call APFloat::normalize() if normalized value is expected. if (omsb > precision) { unsigned int bits, significantParts; lostFraction lf; diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index fac3cad5cc..4f650b42cc 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -65,7 +65,7 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename, MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf); } - if (NewBuf == 0) return ~0U; + if (!NewBuf) return ~0U; return AddNewSourceBuffer(NewBuf.take(), IncludeLoc); } diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index c9729b5412..9e497a0f63 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -251,7 +251,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("hexagon", Triple::hexagon) .Case("s390x", Triple::systemz) .Case("sparc", Triple::sparc) - .Case("sparcv9", Triple::sparcv9) + .Cases("sparcv9", "sparc64", Triple::sparcv9) .Case("tce", Triple::tce) .Case("xcore", Triple::xcore) .Case("nvptx", Triple::nvptx) diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index 72a8af621d..cdd475c17f 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -33,6 +33,7 @@ #endif extern "C" void sys_icache_invalidate(const void *Addr, size_t len); +extern "C" void __clear_cache(void *, void*); namespace { diff --git a/lib/Support/Windows/Windows.h b/lib/Support/Windows/Windows.h index 5c1da0d617..4cdac788a0 100644 --- a/lib/Support/Windows/Windows.h +++ b/lib/Support/Windows/Windows.h @@ -25,6 +25,7 @@ #define WIN32_LEAN_AND_MEAN #include "llvm/Config/config.h" // Get build system configuration settings +#include "llvm/Support/Compiler.h" #include <windows.h> #include <wincrypt.h> #include <shlobj.h> @@ -75,7 +76,7 @@ public: } // True if Handle is valid. - operator bool() const { + LLVM_EXPLICIT operator bool() const { return HandleTraits::IsValid(Handle) ? true : false; } diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index df599d599d..f1695e2ce2 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -38,6 +38,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { + initAsmInfo(); } namespace { diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 3435217bb2..eeec608820 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -57,13 +57,14 @@ static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) { return X; } -static MCAsmInfo *createAArch64MCAsmInfo(StringRef TT) { +static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, + StringRef TT) { Triple TheTriple(TT); MCAsmInfo *MAI = new AArch64ELFMCAsmInfo(); - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(AArch64::XSP, 0); - MAI->addInitialFrameState(0, Dst, Src); + unsigned Reg = MRI.getDwarfRegNum(AArch64::XSP, true); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0); + MAI->addInitialFrameState(Inst); return MAI; } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b0d34a76b0..4de5b4f41c 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -94,6 +94,7 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(ARM::SP); Reserved.set(ARM::PC); Reserved.set(ARM::FPSCR); + Reserved.set(ARM::APSR_NZCV); if (TFI->hasFP(MF)) Reserved.set(FramePtr); if (hasBasePointer(MF)) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index fd77732364..432e3eefb1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2147,7 +2147,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { Copy = *Copy->use_begin(); if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; - Chain = Copy->getOperand(0); + TCChain = Copy->getOperand(0); } else { return false; } @@ -5257,6 +5257,23 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { return false; } +static EVT getExtensionTo64Bits(const EVT &OrigVT) { + if (OrigVT.getSizeInBits() >= 64) + return OrigVT; + + assert(OrigVT.isSimple() && "Expecting a simple value type"); + + MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; + switch (OrigSimpleTy) { + default: llvm_unreachable("Unexpected Vector Type"); + case MVT::v2i8: + case MVT::v2i16: + return MVT::v2i32; + case MVT::v4i8: + return MVT::v4i16; + } +} + /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. /// We insert the required extension here to get the vector to fill a D register. @@ -5272,18 +5289,8 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, return N; // Must extend size to at least 64 bits to be used as an operand for VMULL. - MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy; - EVT NewVT; - switch (OrigSimpleTy) { - default: llvm_unreachable("Unexpected Orig Vector Type"); - case MVT::v2i8: - case MVT::v2i16: - NewVT = MVT::v2i32; - break; - case MVT::v4i8: - NewVT = MVT::v4i16; - break; - } + EVT NewVT = getExtensionTo64Bits(OrigTy); + return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N); } @@ -5293,22 +5300,22 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, /// reach a total size of 64 bits. We have to add the extension separately /// because ARM does not have a sign/zero extending load for vectors. static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { - SDValue NonExtendingLoad = - DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), + EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT()); + + // The load already has the right type. + if (ExtendedTy == LD->getMemoryVT()) + return DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment()); - unsigned ExtOp = 0; - switch (LD->getExtensionType()) { - default: llvm_unreachable("Unexpected LoadExtType"); - case ISD::EXTLOAD: - case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break; - } - MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy; - MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy; - return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG, - MemType, ExtType, ExtOp); + + // We need to create a zextload/sextload. We cannot just create a load + // followed by a zext/zext node because LowerMUL is also run during normal + // operation legalization where we can't create illegal types. + return DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), ExtendedTy, + LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), + LD->getMemoryVT(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); } /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 1bd174e341..89f92a589d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4636,11 +4636,11 @@ def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, - (outs GPR:$Rt), + (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", - (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), @@ -4650,7 +4650,7 @@ class MovRCopro2<string opc, bit direction, dag oops, dag iops, list<dag> pattern> : ABXI<0b1110, oops, iops, NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> { - let Inst{31-28} = 0b1111; + let Inst{31-24} = 0b11111110; let Inst{20} = direction; let Inst{4} = 1; @@ -4679,11 +4679,11 @@ def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm", (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0)>; def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, - (outs GPR:$Rt), + (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm", - (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0)>; def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index b0f576bc2b..85743d8d5a 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -157,12 +157,15 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>; // Current Program Status Register. // We model fpscr with two registers: FPSCR models the control bits and will be -// reserved. FPSCR_NZCV models the flag bits and will be unreserved. -def CPSR : ARMReg<0, "cpsr">; -def APSR : ARMReg<1, "apsr">; -def SPSR : ARMReg<2, "spsr">; -def FPSCR : ARMReg<3, "fpscr">; -def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> { +// reserved. FPSCR_NZCV models the flag bits and will be unreserved. APSR_NZCV +// models the APSR when it's accessed by some special instructions. In such cases +// it has the same encoding as PC. +def CPSR : ARMReg<0, "cpsr">; +def APSR : ARMReg<1, "apsr">; +def APSR_NZCV : ARMReg<15, "apsr_nzcv">; +def SPSR : ARMReg<2, "spsr">; +def FPSCR : ARMReg<3, "fpscr">; +def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> { let Aliases = [FPSCR]; } def ITSTATE : ARMReg<4, "itstate">; @@ -207,6 +210,16 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> { }]; } +// GPRs without the PC but with APSR. Some instructions allow accessing the +// APSR, while actually encoding PC in the register field. This is usefull +// for assembly and disassembly only. +def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add GPR, APSR_NZCV)> { + let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)]; + let AltOrderSelect = [{ + return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); + }]; +} + // GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the // implied SP argument list. // FIXME: It would be better to not use this at all and refactor the diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 8653c462f0..9ff0d61481 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -162,10 +162,23 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { if (!isThumb() || hasThumb2()) PostRAScheduler = true; - // v6+ may or may not support unaligned mem access depending on the system - // configuration. - if (!StrictAlign && hasV6Ops() && isTargetDarwin()) - AllowsUnalignedMem = true; + if (!StrictAlign) { + // Assume pre-ARMv6 doesn't support unaligned accesses. + // + // ARMv6 may or may not support unaligned accesses depending on the + // SCTLR.U bit, which is architecture-specific. We assume ARMv6 + // Darwin targets support unaligned accesses, and others don't. + // + // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit + // which raises an alignment fault on unaligned accesses. Linux + // defaults this bit to 0 and handles it as a system-wide (not + // per-process) setting. It is therefore safe to assume that ARMv7+ + // targets support unaligned accesses. + // + // The above behavior is consistent with GCC. + if (hasV7Ops() || (hasV6Ops() && isTargetDarwin())) + AllowsUnalignedMem = true; + } // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. uint64_t Bits = getFeatureBits(); diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 42c7d2c437..17c52c94a0 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -85,6 +85,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { + initAsmInfo(); if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " "support ARM mode execution!"); @@ -117,6 +118,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget.hasThumb2() ? new ARMFrameLowering(Subtarget) : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { + initAsmInfo(); } namespace { diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ac937f3534..d2896377cc 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -156,6 +156,9 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, @@ -920,6 +923,21 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, return S; } +static DecodeStatus +DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + if (RegNo == 15) + { + Inst.addOperand(MCOperand::CreateReg(ARM::APSR_NZCV)); + return MCDisassembler::Success; + } + + Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder)); + return S; +} + static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 7) diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 57239f8011..b858fff546 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -159,7 +159,7 @@ static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) { return X; } -static MCAsmInfo *createARMMCAsmInfo(StringRef TT) { +static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); if (TheTriple.isOSDarwin()) diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index 99d7a3a963..57044b27d6 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -35,6 +35,7 @@ add_llvm_target(HexagonCodeGen HexagonTargetObjectFile.cpp HexagonVLIWPacketizer.cpp HexagonNewValueJump.cpp + HexagonCopyToCombine.cpp ) add_subdirectory(TargetInfo) diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index 8e19c61f40..b88637ad57 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -44,6 +44,8 @@ namespace llvm { FunctionPass *createHexagonHardwareLoops(); FunctionPass *createHexagonPeephole(); FunctionPass *createHexagonFixupHwLoops(); + FunctionPass *createHexagonNewValueJump(); + FunctionPass *createHexagonCopyToCombine(); FunctionPass *createHexagonPacketizer(); FunctionPass *createHexagonNewValueJump(); diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp new file mode 100644 index 0000000000..dd63523291 --- /dev/null +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -0,0 +1,676 @@ +//===------- HexagonCopyToCombine.cpp - Hexagon Copy-To-Combine Pass ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass replaces transfer instructions by combine instructions. +// We walk along a basic block and look for two combinable instructions and try +// to move them together. If we can move them next to each other we do so and +// replace them with a combine instruction. +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "hexagon-copy-combine" + +#include "llvm/PassSupport.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "HexagonMachineFunctionInfo.h" + +using namespace llvm; + +static +cl::opt<bool> IsCombinesDisabled("disable-merge-into-combines", + cl::Hidden, cl::ZeroOrMore, + cl::init(false), + cl::desc("Disable merging into combines")); +static +cl::opt<unsigned> +MaxNumOfInstsBetweenNewValueStoreAndTFR("max-num-inst-between-tfr-and-nv-store", + cl::Hidden, cl::init(4), + cl::desc("Maximum distance between a tfr feeding a store we " + "consider the store still to be newifiable")); + +namespace llvm { + void initializeHexagonCopyToCombinePass(PassRegistry&); +} + + +namespace { + +class HexagonCopyToCombine : public MachineFunctionPass { + const HexagonInstrInfo *TII; + const TargetRegisterInfo *TRI; + bool ShouldCombineAggressively; + + DenseSet<MachineInstr *> PotentiallyNewifiableTFR; +public: + static char ID; + + HexagonCopyToCombine() : MachineFunctionPass(ID) { + initializeHexagonCopyToCombinePass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const { + return "Hexagon Copy-To-Combine Pass"; + } + + virtual bool runOnMachineFunction(MachineFunction &Fn); + +private: + MachineInstr *findPairable(MachineInstr *I1, bool &DoInsertAtI1); + + void findPotentialNewifiableTFRs(MachineBasicBlock &); + + void combine(MachineInstr *I1, MachineInstr *I2, + MachineBasicBlock::iterator &MI, bool DoInsertAtI1); + + bool isSafeToMoveTogether(MachineInstr *I1, MachineInstr *I2, + unsigned I1DestReg, unsigned I2DestReg, + bool &DoInsertAtI1); + + void emitCombineRR(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); + + void emitCombineRI(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); + + void emitCombineIR(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); + + void emitCombineII(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); +}; + +} // End anonymous namespace. + +char HexagonCopyToCombine::ID = 0; + +INITIALIZE_PASS(HexagonCopyToCombine, "hexagon-copy-combine", + "Hexagon Copy-To-Combine Pass", false, false) + +static bool isCombinableInstType(MachineInstr *MI, + const HexagonInstrInfo *TII, + bool ShouldCombineAggressively) { + switch(MI->getOpcode()) { + case Hexagon::TFR: { + // A COPY instruction can be combined if its arguments are IntRegs (32bit). + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isReg()); + + unsigned DestReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + return Hexagon::IntRegsRegClass.contains(DestReg) && + Hexagon::IntRegsRegClass.contains(SrcReg); + } + + case Hexagon::TFRI: { + // A transfer-immediate can be combined if its argument is a signed 8bit + // value. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + unsigned DestReg = MI->getOperand(0).getReg(); + + // Only combine constant extended TFRI if we are in aggressive mode. + return Hexagon::IntRegsRegClass.contains(DestReg) && + (ShouldCombineAggressively || isInt<8>(MI->getOperand(1).getImm())); + } + + case Hexagon::TFRI_V4: { + if (!ShouldCombineAggressively) + return false; + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isGlobal()); + + // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a + // workaround for an ABI bug that prevents GOT relocations on combine + // instructions + if (MI->getOperand(1).getTargetFlags() != HexagonII::MO_NO_FLAG) + return false; + + unsigned DestReg = MI->getOperand(0).getReg(); + return Hexagon::IntRegsRegClass.contains(DestReg); + } + + default: + break; + } + + return false; +} + +static bool isGreaterThan8BitTFRI(MachineInstr *I) { + return I->getOpcode() == Hexagon::TFRI && + !isInt<8>(I->getOperand(1).getImm()); +} +static bool isGreaterThan6BitTFRI(MachineInstr *I) { + return I->getOpcode() == Hexagon::TFRI && + !isUInt<6>(I->getOperand(1).getImm()); +} + +/// areCombinableOperations - Returns true if the two instruction can be merge +/// into a combine (ignoring register constraints). +static bool areCombinableOperations(const TargetRegisterInfo *TRI, + MachineInstr *HighRegInst, + MachineInstr *LowRegInst) { + assert((HighRegInst->getOpcode() == Hexagon::TFR || + HighRegInst->getOpcode() == Hexagon::TFRI || + HighRegInst->getOpcode() == Hexagon::TFRI_V4) && + (LowRegInst->getOpcode() == Hexagon::TFR || + LowRegInst->getOpcode() == Hexagon::TFRI || + LowRegInst->getOpcode() == Hexagon::TFRI_V4) && + "Assume individual instructions are of a combinable type"); + + const HexagonRegisterInfo *QRI = + static_cast<const HexagonRegisterInfo *>(TRI); + + // V4 added some combine variations (mixed immediate and register source + // operands), if we are on < V4 we can only combine 2 register-to-register + // moves and 2 immediate-to-register moves. We also don't have + // constant-extenders. + if (!QRI->Subtarget.hasV4TOps()) + return HighRegInst->getOpcode() == LowRegInst->getOpcode() && + !isGreaterThan8BitTFRI(HighRegInst) && + !isGreaterThan6BitTFRI(LowRegInst); + + // There is no combine of two constant extended values. + if ((HighRegInst->getOpcode() == Hexagon::TFRI_V4 || + isGreaterThan8BitTFRI(HighRegInst)) && + (LowRegInst->getOpcode() == Hexagon::TFRI_V4 || + isGreaterThan6BitTFRI(LowRegInst))) + return false; + + return true; +} + +static bool isEvenReg(unsigned Reg) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + Hexagon::IntRegsRegClass.contains(Reg)); + return (Reg - Hexagon::R0) % 2 == 0; +} + +static void removeKillInfo(MachineInstr *MI, unsigned RegNotKilled) { + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + MachineOperand &Op = MI->getOperand(I); + if (!Op.isReg() || Op.getReg() != RegNotKilled || !Op.isKill()) + continue; + Op.setIsKill(false); + } +} + +/// isUnsafeToMoveAccross - Returns true if it is unsafe to move a copy +/// instruction from \p UseReg to \p DestReg over the instruction \p I. +bool isUnsafeToMoveAccross(MachineInstr *I, unsigned UseReg, unsigned DestReg, + const TargetRegisterInfo *TRI) { + return (UseReg && (I->modifiesRegister(UseReg, TRI))) || + I->modifiesRegister(DestReg, TRI) || + I->readsRegister(DestReg, TRI) || + I->hasUnmodeledSideEffects() || + I->isInlineAsm() || I->isDebugValue(); +} + +/// isSafeToMoveTogether - Returns true if it is safe to move I1 next to I2 such +/// that the two instructions can be paired in a combine. +bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, + MachineInstr *I2, + unsigned I1DestReg, + unsigned I2DestReg, + bool &DoInsertAtI1) { + + bool IsImmUseReg = I2->getOperand(1).isImm() || I2->getOperand(1).isGlobal(); + unsigned I2UseReg = IsImmUseReg ? 0 : I2->getOperand(1).getReg(); + + // It is not safe to move I1 and I2 into one combine if I2 has a true + // dependence on I1. + if (I2UseReg && I1->modifiesRegister(I2UseReg, TRI)) + return false; + + bool isSafe = true; + + // First try to move I2 towards I1. + { + // A reverse_iterator instantiated like below starts before I2, and I1 + // respectively. + // Look at instructions I in between I2 and (excluding) I1. + MachineBasicBlock::reverse_iterator I(I2), + End = --(MachineBasicBlock::reverse_iterator(I1)); + // At 03 we got better results (dhrystone!) by being more conservative. + if (!ShouldCombineAggressively) + End = MachineBasicBlock::reverse_iterator(I1); + // If I2 kills its operand and we move I2 over an instruction that also + // uses I2's use reg we need to modify that (first) instruction to now kill + // this reg. + unsigned KilledOperand = 0; + if (I2->killsRegister(I2UseReg)) + KilledOperand = I2UseReg; + MachineInstr *KillingInstr = 0; + + for (; I != End; ++I) { + // If the intervening instruction I: + // * modifies I2's use reg + // * modifies I2's def reg + // * reads I2's def reg + // * or has unmodelled side effects + // we can't move I2 across it. + if (isUnsafeToMoveAccross(&*I, I2UseReg, I2DestReg, TRI)) { + isSafe = false; + break; + } + + // Update first use of the killed operand. + if (!KillingInstr && KilledOperand && + I->readsRegister(KilledOperand, TRI)) + KillingInstr = &*I; + } + if (isSafe) { + // Update the intermediate instruction to with the kill flag. + if (KillingInstr) { + bool Added = KillingInstr->addRegisterKilled(KilledOperand, TRI, true); + (void)Added; // supress compiler warning + assert(Added && "Must successfully update kill flag"); + removeKillInfo(I2, KilledOperand); + } + DoInsertAtI1 = true; + return true; + } + } + + // Try to move I1 towards I2. + { + // Look at instructions I in between I1 and (excluding) I2. + MachineBasicBlock::iterator I(I1), End(I2); + // At O3 we got better results (dhrystone) by being more conservative here. + if (!ShouldCombineAggressively) + End = llvm::next(MachineBasicBlock::iterator(I2)); + IsImmUseReg = I1->getOperand(1).isImm() || I1->getOperand(1).isGlobal(); + unsigned I1UseReg = IsImmUseReg ? 0 : I1->getOperand(1).getReg(); + // Track killed operands. If we move accross an instruction that kills our + // operand, we need to update the kill information on the moved I1. It kills + // the operand now. + MachineInstr *KillingInstr = 0; + unsigned KilledOperand = 0; + + while(++I != End) { + // If the intervening instruction I: + // * modifies I1's use reg + // * modifies I1's def reg + // * reads I1's def reg + // * or has unmodelled side effects + // We introduce this special case because llvm has no api to remove a + // kill flag for a register (a removeRegisterKilled() analogous to + // addRegisterKilled) that handles aliased register correctly. + // * or has a killed aliased register use of I1's use reg + // %D4<def> = TFRI64 16 + // %R6<def> = TFR %R9 + // %R8<def> = KILL %R8, %D4<imp-use,kill> + // If we want to move R6 = across the KILL instruction we would have + // to remove the %D4<imp-use,kill> operand. For now, we are + // conservative and disallow the move. + // we can't move I1 across it. + if (isUnsafeToMoveAccross(I, I1UseReg, I1DestReg, TRI) || + // Check for an aliased register kill. Bail out if we see one. + (!I->killsRegister(I1UseReg) && I->killsRegister(I1UseReg, TRI))) + return false; + + // Check for an exact kill (registers match). + if (I1UseReg && I->killsRegister(I1UseReg)) { + assert(KillingInstr == 0 && "Should only see one killing instruction"); + KilledOperand = I1UseReg; + KillingInstr = &*I; + } + } + if (KillingInstr) { + removeKillInfo(KillingInstr, KilledOperand); + // Update I1 to set the kill flag. This flag will later be picked up by + // the new COMBINE instruction. + bool Added = I1->addRegisterKilled(KilledOperand, TRI); + (void)Added; // supress compiler warning + assert(Added && "Must successfully update kill flag"); + } + DoInsertAtI1 = false; + } + + return true; +} + +/// findPotentialNewifiableTFRs - Finds tranfers that feed stores that could be +/// newified. (A use of a 64 bit register define can not be newified) +void +HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { + DenseMap<unsigned, MachineInstr *> LastDef; + for (MachineBasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { + MachineInstr *MI = I; + // Mark TFRs that feed a potential new value store as such. + if(TII->mayBeNewStore(MI)) { + // Look for uses of TFR instructions. + for (unsigned OpdIdx = 0, OpdE = MI->getNumOperands(); OpdIdx != OpdE; + ++OpdIdx) { + MachineOperand &Op = MI->getOperand(OpdIdx); + + // Skip over anything except register uses. + if (!Op.isReg() || !Op.isUse() || !Op.getReg()) + continue; + + // Look for the defining instruction. + unsigned Reg = Op.getReg(); + MachineInstr *DefInst = LastDef[Reg]; + if (!DefInst) + continue; + if (!isCombinableInstType(DefInst, TII, ShouldCombineAggressively)) + continue; + + // Only close newifiable stores should influence the decision. + MachineBasicBlock::iterator It(DefInst); + unsigned NumInstsToDef = 0; + while (&*It++ != MI) + ++NumInstsToDef; + + if (NumInstsToDef > MaxNumOfInstsBetweenNewValueStoreAndTFR) + continue; + + PotentiallyNewifiableTFR.insert(DefInst); + } + // Skip to next instruction. + continue; + } + + // Put instructions that last defined integer or double registers into the + // map. + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + MachineOperand &Op = MI->getOperand(I); + if (!Op.isReg() || !Op.isDef() || !Op.getReg()) + continue; + unsigned Reg = Op.getReg(); + if (Hexagon::DoubleRegsRegClass.contains(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + LastDef[*SubRegs] = MI; + } + } else if (Hexagon::IntRegsRegClass.contains(Reg)) + LastDef[Reg] = MI; + } + } +} + +bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { + + if (IsCombinesDisabled) return false; + + bool HasChanged = false; + + // Get target info. + TRI = MF.getTarget().getRegisterInfo(); + TII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo()); + + // Combine aggressively (for code size) + ShouldCombineAggressively = + MF.getTarget().getOptLevel() <= CodeGenOpt::Default; + + // Traverse basic blocks. + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; + ++BI) { + PotentiallyNewifiableTFR.clear(); + findPotentialNewifiableTFRs(*BI); + + // Traverse instructions in basic block. + for(MachineBasicBlock::iterator MI = BI->begin(), End = BI->end(); + MI != End;) { + MachineInstr *I1 = MI++; + // Don't combine a TFR whose user could be newified (instructions that + // define double registers can not be newified - Programmer's Ref Manual + // 5.4.2 New-value stores). + if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I1)) + continue; + + // Ignore instructions that are not combinable. + if (!isCombinableInstType(I1, TII, ShouldCombineAggressively)) + continue; + + // Find a second instruction that can be merged into a combine + // instruction. + bool DoInsertAtI1 = false; + MachineInstr *I2 = findPairable(I1, DoInsertAtI1); + if (I2) { + HasChanged = true; + combine(I1, I2, MI, DoInsertAtI1); + } + } + } + + return HasChanged; +} + +/// findPairable - Returns an instruction that can be merged with \p I1 into a +/// COMBINE instruction or 0 if no such instruction can be found. Returns true +/// in \p DoInsertAtI1 if the combine must be inserted at instruction \p I1 +/// false if the combine must be inserted at the returned instruction. +MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr *I1, + bool &DoInsertAtI1) { + MachineBasicBlock::iterator I2 = llvm::next(MachineBasicBlock::iterator(I1)); + unsigned I1DestReg = I1->getOperand(0).getReg(); + + for (MachineBasicBlock::iterator End = I1->getParent()->end(); I2 != End; + ++I2) { + // Bail out early if we see a second definition of I1DestReg. + if (I2->modifiesRegister(I1DestReg, TRI)) + break; + + // Ignore non-combinable instructions. + if (!isCombinableInstType(I2, TII, ShouldCombineAggressively)) + continue; + + // Don't combine a TFR whose user could be newified. + if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I2)) + continue; + + unsigned I2DestReg = I2->getOperand(0).getReg(); + + // Check that registers are adjacent and that the first destination register + // is even. + bool IsI1LowReg = (I2DestReg - I1DestReg) == 1; + bool IsI2LowReg = (I1DestReg - I2DestReg) == 1; + unsigned FirstRegIndex = IsI1LowReg ? I1DestReg : I2DestReg; + if ((!IsI1LowReg && !IsI2LowReg) || !isEvenReg(FirstRegIndex)) + continue; + + // Check that the two instructions are combinable. V4 allows more + // instructions to be merged into a combine. + // The order matters because in a TFRI we might can encode a int8 as the + // hi reg operand but only a uint6 as the low reg operand. + if ((IsI2LowReg && !areCombinableOperations(TRI, I1, I2)) || + (IsI1LowReg && !areCombinableOperations(TRI, I2, I1))) + break; + + if (isSafeToMoveTogether(I1, I2, I1DestReg, I2DestReg, + DoInsertAtI1)) + return I2; + + // Not safe. Stop searching. + break; + } + return 0; +} + +void HexagonCopyToCombine::combine(MachineInstr *I1, MachineInstr *I2, + MachineBasicBlock::iterator &MI, + bool DoInsertAtI1) { + // We are going to delete I2. If MI points to I2 advance it to the next + // instruction. + if ((MachineInstr *)MI == I2) ++MI; + + // Figure out whether I1 or I2 goes into the lowreg part. + unsigned I1DestReg = I1->getOperand(0).getReg(); + unsigned I2DestReg = I2->getOperand(0).getReg(); + bool IsI1Loreg = (I2DestReg - I1DestReg) == 1; + unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg; + + // Get the double word register. + unsigned DoubleRegDest = + TRI->getMatchingSuperReg(LoRegDef, Hexagon::subreg_loreg, + &Hexagon::DoubleRegsRegClass); + assert(DoubleRegDest != 0 && "Expect a valid register"); + + + // Setup source operands. + MachineOperand &LoOperand = IsI1Loreg ? I1->getOperand(1) : + I2->getOperand(1); + MachineOperand &HiOperand = IsI1Loreg ? I2->getOperand(1) : + I1->getOperand(1); + + // Figure out which source is a register and which a constant. + bool IsHiReg = HiOperand.isReg(); + bool IsLoReg = LoOperand.isReg(); + + MachineBasicBlock::iterator InsertPt(DoInsertAtI1 ? I1 : I2); + // Emit combine. + if (IsHiReg && IsLoReg) + emitCombineRR(InsertPt, DoubleRegDest, HiOperand, LoOperand); + else if (IsHiReg) + emitCombineRI(InsertPt, DoubleRegDest, HiOperand, LoOperand); + else if (IsLoReg) + emitCombineIR(InsertPt, DoubleRegDest, HiOperand, LoOperand); + else + emitCombineII(InsertPt, DoubleRegDest, HiOperand, LoOperand); + + I1->eraseFromParent(); + I2->eraseFromParent(); +} + +void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Handle globals. + if (HiOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg) + .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addImm(LoOperand.getImm()); + return; + } + if (LoOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_iI_V4), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + + // Handle constant extended immediates. + if (!isInt<8>(HiOperand.getImm())) { + assert(isInt<8>(LoOperand.getImm())); + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addImm(LoOperand.getImm()); + return; + } + + if (!isUInt<6>(LoOperand.getImm())) { + assert(isInt<8>(HiOperand.getImm())); + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_iI_V4), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addImm(LoOperand.getImm()); + return; + } + + // Insert new combine instruction. + // DoubleRegDest = combine #HiImm, #LoImm + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addImm(LoOperand.getImm()); +} + +void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + unsigned LoReg = LoOperand.getReg(); + unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill()); + + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Handle global. + if (HiOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ir_V4), DoubleDestReg) + .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addReg(LoReg, LoRegKillFlag); + return; + } + // Insert new combine instruction. + // DoubleRegDest = combine #HiImm, LoReg + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ir_V4), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addReg(LoReg, LoRegKillFlag); +} + +void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill()); + unsigned HiReg = HiOperand.getReg(); + + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Handle global. + if (LoOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rI_V4), DoubleDestReg) + .addReg(HiReg, HiRegKillFlag) + .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + + // Insert new combine instruction. + // DoubleRegDest = combine HiReg, #LoImm + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rI_V4), DoubleDestReg) + .addReg(HiReg, HiRegKillFlag) + .addImm(LoOperand.getImm()); +} + +void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill()); + unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill()); + unsigned LoReg = LoOperand.getReg(); + unsigned HiReg = HiOperand.getReg(); + + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Insert new combine instruction. + // DoubleRegDest = combine HiReg, LoReg + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rr), DoubleDestReg) + .addReg(HiReg, HiRegKillFlag) + .addReg(LoReg, LoRegKillFlag); +} + +FunctionPass *llvm::createHexagonCopyToCombine() { + return new HexagonCopyToCombine(); +} diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index d1e32c65ad..c96aaca8f8 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -384,6 +384,12 @@ def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1), // ALU32/PERM + //===----------------------------------------------------------------------===// +let neverHasSideEffects = 1 in +def COMBINE_ii : ALU32_ii<(outs DoubleRegs:$dst), + (ins s8Imm:$src1, s8Imm:$src2), + "$dst = combine(#$src1, #$src2)", + []>; + // Mux. def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, DoubleRegs:$src2, diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 022a7f6136..fee83fb811 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -3188,6 +3188,93 @@ def STriw_offset_ext_V4 : STInst<(outs), (add IntRegs:$src1, u6_2ImmPred:$src2))]>, Requires<[HasV4T]>; +def : Pat<(i64 (ctlz (i64 DoubleRegs:$src1))), + (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTLZ64_rr DoubleRegs:$src1))))>, + Requires<[HasV4T]>; + +def : Pat<(i64 (cttz (i64 DoubleRegs:$src1))), + (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTTZ64_rr DoubleRegs:$src1))))>, + Requires<[HasV4T]>; + + +// i8 -> i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi8. +let Predicates = [HasV4T], AddedComplexity = 120 in { +def: Pat <(i64 (extloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 tglobaladdr:$addr)))>; + +def: Pat <(i64 (zextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 tglobaladdr:$addr)))>; + +def: Pat <(i64 (sextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (SXTW (LDrib_abs_V4 tglobaladdr:$addr)))>; + +def: Pat <(i64 (extloadi8 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 FoldGlobalAddr:$addr)))>; + +def: Pat <(i64 (zextloadi8 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 FoldGlobalAddr:$addr)))>; + +def: Pat <(i64 (sextloadi8 FoldGlobalAddr:$addr)), + (i64 (SXTW (LDrib_abs_V4 FoldGlobalAddr:$addr)))>; +} +// i16 -> i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi16. +let AddedComplexity = 120 in { +def: Pat <(i64 (extloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (SXTW (LDrih_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (extloadi16 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi16 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi16 FoldGlobalAddr:$addr)), + (i64 (SXTW (LDrih_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; +} +// i32->i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi32. +let AddedComplexity = 120 in { +def: Pat <(i64 (extloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (SXTW (LDriw_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (extloadi32 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi32 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi32 FoldGlobalAddr:$addr)), + (i64 (SXTW (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; +} // Indexed store double word - global address. // memw(Rs+#u6:2)=#S8 diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index 05e696865f..f7c4513213 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -631,6 +631,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { .addMBB(jmpTarget); assert(NewMI && "New Value Jump Instruction Not created!"); + (void)NewMI; if (cmpInstr->getOperand(0).isReg() && cmpInstr->getOperand(0).isKill()) cmpInstr->getOperand(0).setIsKill(false); diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index dc44b34cff..676dff2a4a 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -79,6 +79,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget), InstrItins(&Subtarget.getInstrItineraryData()) { setMCUseCFI(false); + initAsmInfo(); } // addPassesForOptimizations - Allow the backend (target) to add Target @@ -161,6 +162,7 @@ bool HexagonPassConfig::addPreSched2() { HexagonTargetObjectFile &TLOF = (HexagonTargetObjectFile&)(getTargetLowering()->getObjFileLowering()); + addPass(createHexagonCopyToCombine()); if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); if (!TLOF.IsSmallDataEnabled()) { @@ -168,9 +170,6 @@ bool HexagonPassConfig::addPreSched2() { printAndVerify("After hexagon split const32/64 pass"); } return true; - if (getOptLevel() != CodeGenOpt::None) - addPass(&IfConverterID); - return false; } bool HexagonPassConfig::addPreEmitPass() { diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index c508d124b3..59b4fabe01 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -837,16 +837,38 @@ bool HexagonPacketizerList::RestrictingDepExistInPacket (MachineInstr* MI, } +/// Gets the predicate register of a predicated instruction. +unsigned getPredicatedRegister(MachineInstr *MI, const HexagonInstrInfo *QII) { + /// We use the following rule: The first predicate register that is a use is + /// the predicate register of a predicated instruction. + + assert(QII->isPredicated(MI) && "Must be predicated instruction"); + + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + MachineOperand &Op = *OI; + if (Op.isReg() && Op.getReg() && Op.isUse() && + Hexagon::PredRegsRegClass.contains(Op.getReg())) + return Op.getReg(); + } + + llvm_unreachable("Unknown instruction operand layout"); + + return 0; +} + // Given two predicated instructions, this function detects whether // the predicates are complements bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1, MachineInstr* MI2, std::map <MachineInstr*, SUnit*> MIToSUnit) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - // Currently can only reason about conditional transfers - if (!QII->isConditionalTransfer(MI1) || !QII->isConditionalTransfer(MI2)) { + + // If we don't know the predicate sense of the instructions bail out early, we + // need it later. + if (getPredicateSense(MI1, QII) == PK_Unknown || + getPredicateSense(MI2, QII) == PK_Unknown) return false; - } // Scheduling unit for candidate SUnit* SU = MIToSUnit[MI1]; @@ -885,9 +907,9 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1, // there already exist anti dep on the same pred in // the packet. if (PacketSU->Succs[i].getSUnit() == SU && + PacketSU->Succs[i].getKind() == SDep::Data && Hexagon::PredRegsRegClass.contains( PacketSU->Succs[i].getReg()) && - PacketSU->Succs[i].getKind() == SDep::Data && // Here I know that *VIN is predicate setting instruction // with true data dep to candidate on the register // we care about - c) in the above example. @@ -908,7 +930,11 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1, // that the predicate sense is different // We also need to differentiate .old vs. .new: // !p0 is not complimentary to p0.new - return ((MI1->getOperand(1).getReg() == MI2->getOperand(1).getReg()) && + unsigned PReg1 = getPredicatedRegister(MI1, QII); + unsigned PReg2 = getPredicatedRegister(MI2, QII); + return ((PReg1 == PReg2) && + Hexagon::PredRegsRegClass.contains(PReg1) && + Hexagon::PredRegsRegClass.contains(PReg2) && (getPredicateSense(MI1, QII) != getPredicateSense(MI2, QII)) && (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2))); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 273bc22b8e..2f93a5299c 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -54,13 +54,14 @@ static MCSubtargetInfo *createHexagonMCSubtargetInfo(StringRef TT, return X; } -static MCAsmInfo *createHexagonMCAsmInfo(StringRef TT) { +static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI, + StringRef TT) { MCAsmInfo *MAI = new HexagonMCAsmInfo(TT); // VirtualFP = (R30 + #0). - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(Hexagon::R30, 0); - MAI->addInitialFrameState(0, Dst, Src); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa( + 0, Hexagon::R30, 0); + MAI->addInitialFrameState(Inst); return MAI; } diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index bcdd32fed9..c75895575d 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -43,6 +43,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { + initAsmInfo(); } namespace { diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp index ec76dba491..5bc0668f35 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp @@ -53,7 +53,7 @@ static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -static MCAsmInfo *createMCAsmInfo(StringRef TT) { +static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { default: diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 164e351df9..6710a09707 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -36,7 +36,9 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, // FIXME: Check DataLayout string. DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget) { } + FrameLowering(Subtarget) { + initAsmInfo(); +} namespace { /// MSP430 Code Generator Pass Configuration Options. diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 26694ffdac..837fabee76 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -93,12 +93,12 @@ static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -static MCAsmInfo *createMipsMCAsmInfo(StringRef TT) { +static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { MCAsmInfo *MAI = new MipsMCAsmInfo(TT); - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(Mips::SP, 0); - MAI->addInitialFrameState(0, Dst, Src); + unsigned SP = MRI.getDwarfRegNum(Mips::SP, true); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, SP, 0); + MAI->addInitialFrameState(Inst); return MAI; } diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index 4d1e61bb99..cc7324f26e 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -18,6 +18,36 @@ #include "llvm/Support/raw_ostream.h" #include <string> +static void inlineAsmOut + (LLVMContext &C, StringRef AsmString, BasicBlock *BB ) { + std::vector<llvm::Type *> AsmArgTypes; + std::vector<llvm::Value*> AsmArgs; + llvm::FunctionType *AsmFTy = + llvm::FunctionType::get(Type::getVoidTy(C), + AsmArgTypes, false); + llvm::InlineAsm *IA = + llvm::InlineAsm::get(AsmFTy, AsmString, "", true, + /* IsAlignStack */ false, + llvm::InlineAsm::AD_ATT); + CallInst::Create(IA, AsmArgs, "", BB); +} + +namespace { + +class InlineAsmHelper { + LLVMContext &C; + BasicBlock *BB; +public: + InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) : + C(C_), BB(BB_) { + } + + void Out(StringRef AsmString) { + inlineAsmOut(C, AsmString, BB); + } + +}; +} // // Return types that matter for hard float are: // float, double, complex float, and complex double @@ -52,6 +82,243 @@ static FPReturnVariant whichFPReturnVariant(Type *T) { } // +// Parameter type that matter are float, (float, float), (float, double), +// double, (double, double), (double, float) +// +enum FPParamVariant { + FSig, FFSig, FDSig, + DSig, DDSig, DFSig, NoSig +}; + +// which floating point parameter signature variant we are dealing with +// +typedef Type::TypeID TypeID; +const Type::TypeID FloatTyID = Type::FloatTyID; +const Type::TypeID DoubleTyID = Type::DoubleTyID; + +static FPParamVariant whichFPParamVariantNeeded(Function &F) { + switch (F.arg_size()) { + case 0: + return NoSig; + case 1:{ + TypeID ArgTypeID = F.getFunctionType()->getParamType(0)->getTypeID(); + switch (ArgTypeID) { + case FloatTyID: + return FSig; + case DoubleTyID: + return DSig; + default: + return NoSig; + } + } + default: { + TypeID ArgTypeID0 = F.getFunctionType()->getParamType(0)->getTypeID(); + TypeID ArgTypeID1 = F.getFunctionType()->getParamType(1)->getTypeID(); + switch(ArgTypeID0) { + case FloatTyID: { + switch (ArgTypeID1) { + case FloatTyID: + return FFSig; + case DoubleTyID: + return FDSig; + default: + return FSig; + } + } + case DoubleTyID: { + switch (ArgTypeID1) { + case FloatTyID: + return DFSig; + case DoubleTyID: + return DDSig; + default: + return DSig; + } + } + default: + return NoSig; + } + } + } + llvm_unreachable("can't get here"); +} + +// Figure out if we need float point based on the function parameters. +// We need to move variables in and/or out of floating point +// registers because of the ABI +// +static bool needsFPStubFromParams(Function &F) { + if (F.arg_size() >=1) { + Type *ArgType = F.getFunctionType()->getParamType(0); + switch (ArgType->getTypeID()) { + case Type::FloatTyID: + case Type::DoubleTyID: + return true; + default: + break; + } + } + return false; +} + +static bool needsFPReturnHelper(Function &F) { + Type* RetType = F.getReturnType(); + return whichFPReturnVariant(RetType) != NoFPRet; +} + +static bool needsFPHelperFromSig(Function &F) { + return needsFPStubFromParams(F) || needsFPReturnHelper(F); +} + +// +// We swap between FP and Integer registers to allow Mips16 and Mips32 to +// interoperate +// + +static void swapFPIntParams + (FPParamVariant PV, Module *M, InlineAsmHelper &IAH, + bool LE, bool ToFP) { + //LLVMContext &Context = M->getContext(); + std::string MI = ToFP? "mtc1 ": "mfc1 "; + switch (PV) { + case FSig: + IAH.Out(MI + "$$4,$$f12"); + break; + case FFSig: + IAH.Out(MI +"$$4,$$f12"); + IAH.Out(MI + "$$5,$$f14"); + break; + case FDSig: + IAH.Out(MI + "$$4,$$f12"); + if (LE) { + IAH.Out(MI + "$$6,$$f14"); + IAH.Out(MI + "$$7,$$f15"); + } else { + IAH.Out(MI + "$$7,$$f14"); + IAH.Out(MI + "$$6,$$f15"); + } + break; + case DSig: + if (LE) { + IAH.Out(MI + "$$4,$$f12"); + IAH.Out(MI + "$$5,$$f13"); + } else { + IAH.Out(MI + "$$5,$$f12"); + IAH.Out(MI + "$$4,$$f13"); + } + break; + case DDSig: + if (LE) { + IAH.Out(MI + "$$4,$$f12"); + IAH.Out(MI + "$$5,$$f13"); + IAH.Out(MI + "$$6,$$f14"); + IAH.Out(MI + "$$7,$$f15"); + } else { + IAH.Out(MI + "$$5,$$f12"); + IAH.Out(MI + "$$4,$$f13"); + IAH.Out(MI + "$$7,$$f14"); + IAH.Out(MI + "$$6,$$f15"); + } + break; + case DFSig: + if (LE) { + IAH.Out(MI + "$$4,$$f12"); + IAH.Out(MI + "$$5,$$f13"); + } else { + IAH.Out(MI + "$$5,$$f12"); + IAH.Out(MI + "$$4,$$f13"); + } + IAH.Out(MI + "$$6,$$f14"); + break; + case NoSig: + return; + } +} +// +// Make sure that we know we already need a stub for this function. +// Having called needsFPHelperFromSig +// +static void assureFPCallStub(Function &F, Module *M, + const MipsSubtarget &Subtarget){ + // for now we only need them for static relocation + if (Subtarget.getRelocationModel() == Reloc::PIC_) + return; + LLVMContext &Context = M->getContext(); + bool LE = Subtarget.isLittle(); + std::string Name = F.getName(); + std::string SectionName = ".mips16.call.fp." + Name; + std::string StubName = "__call_stub_" + Name; + // + // see if we already have the stub + // + Function *FStub = M->getFunction(StubName); + if (FStub && !FStub->isDeclaration()) return; + FStub = Function::Create(F.getFunctionType(), + Function::InternalLinkage, StubName, M); + FStub->addFnAttr("mips16_fp_stub"); + FStub->addFnAttr(llvm::Attribute::Naked); + FStub->addFnAttr(llvm::Attribute::NoUnwind); + FStub->addFnAttr("nomips16"); + FStub->setSection(SectionName); + BasicBlock *BB = BasicBlock::Create(Context, "entry", FStub); + InlineAsmHelper IAH(Context, BB); + FPReturnVariant RV = whichFPReturnVariant(FStub->getReturnType()); + FPParamVariant PV = whichFPParamVariantNeeded(F); + swapFPIntParams(PV, M, IAH, LE, true); + if (RV != NoFPRet) { + IAH.Out("move $$18, $$31"); + IAH.Out("jal " + Name); + } else { + IAH.Out("lui $$25,%hi(" + Name + ")"); + IAH.Out("addiu $$25,$$25,%lo(" + Name + ")" ); + } + switch (RV) { + case FRet: + IAH.Out("mfc1 $$2,$$f0"); + break; + case DRet: + if (LE) { + IAH.Out("mfc1 $$2,$$f0"); + IAH.Out("mfc1 $$3,$$f1"); + } else { + IAH.Out("mfc1 $$3,$$f0"); + IAH.Out("mfc1 $$2,$$f1"); + } + break; + case CFRet: + if (LE) { + IAH.Out("mfc1 $$2,$$f0"); + IAH.Out("mfc1 $$3,$$f2"); + } else { + IAH.Out("mfc1 $$3,$$f0"); + IAH.Out("mfc1 $$3,$$f2"); + } + break; + case CDRet: + if (LE) { + IAH.Out("mfc1 $$4,$$f2"); + IAH.Out("mfc1 $$5,$$f3"); + IAH.Out("mfc1 $$2,$$f0"); + IAH.Out("mfc1 $$3,$$f1"); + + } else { + IAH.Out("mfc1 $$5,$$f2"); + IAH.Out("mfc1 $$4,$$f3"); + IAH.Out("mfc1 $$3,$$f0"); + IAH.Out("mfc1 $$2,$$f1"); + } + break; + case NoFPRet: + break; + } + if (RV != NoFPRet) + IAH.Out("jr $$18"); + else + IAH.Out("jr $$25"); + new UnreachableInst(Context, BB); +} + +// // Returns of float, double and complex need to be handled with a helper // function. The "AndCal" part is coming in a later patch. // @@ -96,6 +363,16 @@ static bool fixupFPReturnAndCall Attribute::ReadNone); Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, NULL)); CallInst::Create(F, Params, "", &Inst ); + } else if (const CallInst *CI = dyn_cast<CallInst>(I)) { + // pic mode calls are handled by already defined + // helper functions + if (Subtarget.getRelocationModel() != Reloc::PIC_ ) { + Function *F_ = CI->getCalledFunction(); + if (F_ && needsFPHelperFromSig(*F_)) { + assureFPCallStub(*F_, M, Subtarget); + Modified=true; + } + } } } return Modified; diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 17dd2c0796..ab9e62703b 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -145,7 +145,7 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { /// GetOppositeBranchOpc - Return the inverse of the specified /// opcode, e.g. turning BEQ to BNE. -unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const { +unsigned Mips16InstrInfo::getOppositeBranchOpc(unsigned Opc) const { switch (Opc) { default: llvm_unreachable("Illegal opcode!"); case Mips::BeqzRxImmX16: return Mips::BnezRxImmX16; @@ -380,7 +380,7 @@ Mips16InstrInfo::loadImmediate(unsigned FrameReg, return Reg; } -unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { +unsigned Mips16InstrInfo::getAnalyzableBrOpc(unsigned Opc) const { return (Opc == Mips::BeqzRxImmX16 || Opc == Mips::BimmX16 || Opc == Mips::BnezRxImmX16 || Opc == Mips::BteqzX16 || Opc == Mips::BteqzT8CmpX16 || Opc == Mips::BteqzT8CmpiX16 || diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index a77a9043bb..a3bd31e94f 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -64,7 +64,7 @@ public: virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; - virtual unsigned GetOppositeBranchOpc(unsigned Opc) const; + virtual unsigned getOppositeBranchOpc(unsigned Opc) const; // Adjust SP by FrameSize bytes. Save RA, S0, S1 void makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB, @@ -102,7 +102,7 @@ public: (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const; private: - virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; + virtual unsigned getAnalyzableBrOpc(unsigned Opc) const; void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opc) const; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 6b23057c9c..5fa79cb159 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -154,6 +154,7 @@ class LW_FT<string opstr, RegisterClass RC, InstrItinClass Itin, InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI> { let DecoderMethod = "DecodeFMem"; + let mayLoad = 1; } class SW_FT<string opstr, RegisterClass RC, InstrItinClass Itin, @@ -161,6 +162,7 @@ class SW_FT<string opstr, RegisterClass RC, InstrItinClass Itin, InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI> { let DecoderMethod = "DecodeFMem"; + let mayStore = 1; } class MADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin, @@ -314,8 +316,12 @@ let Predicates = [NotN64, HasMips64, HasStdEnc], } let Predicates = [NotN64, NotMips64, HasStdEnc] in { - def LDC1 : LW_FT<"ldc1", AFGR64, IILoad, mem, load>, LW_FM<0x35>; - def SDC1 : SW_FT<"sdc1", AFGR64, IIStore, mem, store>, LW_FM<0x3d>; + let isPseudo = 1, isCodeGenOnly = 1 in { + def PseudoLDC1 : LW_FT<"", AFGR64, IILoad, mem, load>; + def PseudoSDC1 : SW_FT<"", AFGR64, IIStore, mem, store>; + } + def LDC1 : LW_FT<"ldc1", AFGR64, IILoad, mem>, LW_FM<0x35>; + def SDC1 : SW_FT<"sdc1", AFGR64, IIStore, mem>, LW_FM<0x3d>; } // Indexed loads and stores. @@ -523,7 +529,7 @@ let AddedComplexity = 40 in { } let Predicates = [NotN64, NotMips64, HasStdEnc] in { - def : LoadRegImmPat<LDC1, f64, load>; - def : StoreRegImmPat<SDC1, f64>; + def : LoadRegImmPat<PseudoLDC1, f64, load>; + def : StoreRegImmPat<PseudoSDC1, f64>; } } diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index ad92d41209..3144daebd7 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -77,7 +77,7 @@ MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, void MipsInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, MachineBasicBlock *&BB, SmallVectorImpl<MachineOperand> &Cond) const { - assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch"); + assert(getAnalyzableBrOpc(Opc) && "Not an analyzable branch"); int NumOp = Inst->getNumExplicitOperands(); // for both int and fp branches, the last explicit operand is the @@ -167,7 +167,7 @@ RemoveBranch(MachineBasicBlock &MBB) const // Up to 2 branches are removed. // Note that indirect branches are not removed. for(removed = 0; I != REnd && removed < 2; ++I, ++removed) - if (!GetAnalyzableBrOpc(I->getOpcode())) + if (!getAnalyzableBrOpc(I->getOpcode())) break; MBB.erase(I.base(), FirstBr.base()); @@ -182,7 +182,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert( (Cond.size() && Cond.size() <= 3) && "Invalid Mips branch condition!"); - Cond[0].setImm(GetOppositeBranchOpc(Cond[0].getImm())); + Cond[0].setImm(getOppositeBranchOpc(Cond[0].getImm())); return false; } @@ -210,7 +210,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, BranchInstrs.push_back(LastInst); // Not an analyzable branch (e.g., indirect jump). - if (!GetAnalyzableBrOpc(LastOpc)) + if (!getAnalyzableBrOpc(LastOpc)) return LastInst->isIndirectBranch() ? BT_Indirect : BT_None; // Get the second to last instruction in the block. @@ -219,7 +219,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, if (++I != REnd) { SecondLastInst = &*I; - SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode()); + SecondLastOpc = getAnalyzableBrOpc(SecondLastInst->getOpcode()); // Not an analyzable branch (must be an indirect jump). if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc) @@ -282,3 +282,16 @@ unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { } } } + +MachineInstrBuilder +MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, + MachineBasicBlock::iterator I) const { + MachineInstrBuilder MIB; + MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); + + for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) + MIB.addOperand(I->getOperand(J)); + + MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end()); + return MIB; +} diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 8c05d97bea..0f075ec6d0 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -17,6 +17,7 @@ #include "Mips.h" #include "MipsAnalyzeImmediate.h" #include "MipsRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" @@ -81,7 +82,7 @@ public: /// virtual const MipsRegisterInfo &getRegisterInfo() const = 0; - virtual unsigned GetOppositeBranchOpc(unsigned Opc) const = 0; + virtual unsigned getOppositeBranchOpc(unsigned Opc) const = 0; /// Return the number of bytes of code the specified instruction may be. unsigned GetInstSizeInBytes(const MachineInstr *MI) const; @@ -116,6 +117,11 @@ public: const TargetRegisterInfo *TRI, int64_t Offset) const = 0; + /// Create an instruction which has the same operands and memory operands + /// as MI but has a new opcode. + MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc, + MachineBasicBlock::iterator I) const; + protected: bool isZeroImm(const MachineOperand &op) const; @@ -123,7 +129,7 @@ protected: unsigned Flag) const; private: - virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const = 0; + virtual unsigned getAnalyzableBrOpc(unsigned Opc) const = 0; void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, MachineBasicBlock *&BB, diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 3d319373fe..5ada1df267 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -1095,7 +1095,8 @@ def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>; def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>; - +def : InstAlias<"addiu $rs, $imm", + (ADDiu CPURegsOpnd:$rs, CPURegsOpnd:$rs, simm16:$imm), 0>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index bf5ad37031..daabf3d25a 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -217,7 +217,7 @@ int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) { // MachineBasicBlock operand MBBOpnd. void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br, DebugLoc DL, MachineBasicBlock *MBBOpnd) { - unsigned NewOpc = TII->GetOppositeBranchOpc(Br->getOpcode()); + unsigned NewOpc = TII->getOppositeBranchOpc(Br->getOpcode()); const MCInstrDesc &NewDesc = TII->get(NewOpc); MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc); diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index a0768e51c0..12ed1bc186 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -18,11 +18,17 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; +static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), + cl::desc("Expand double precision loads and " + "stores to their single precision " + "counterparts.")); + MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm) : MipsInstrInfo(tm, tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J), @@ -245,17 +251,23 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { default: return false; case Mips::RetRA: - ExpandRetRA(MBB, MI, Mips::RET); + expandRetRA(MBB, MI, Mips::RET); break; case Mips::BuildPairF64: - ExpandBuildPairF64(MBB, MI); + expandBuildPairF64(MBB, MI); break; case Mips::ExtractElementF64: - ExpandExtractElementF64(MBB, MI); + expandExtractElementF64(MBB, MI); + break; + case Mips::PseudoLDC1: + expandDPLoadStore(MBB, MI, Mips::LDC1, Mips::LWC1); + break; + case Mips::PseudoSDC1: + expandDPLoadStore(MBB, MI, Mips::SDC1, Mips::SWC1); break; case Mips::MIPSeh_return32: case Mips::MIPSeh_return64: - ExpandEhReturn(MBB, MI); + expandEhReturn(MBB, MI); break; } @@ -263,9 +275,9 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { return true; } -/// GetOppositeBranchOpc - Return the inverse of the specified +/// getOppositeBranchOpc - Return the inverse of the specified /// opcode, e.g. turning BEQ to BNE. -unsigned MipsSEInstrInfo::GetOppositeBranchOpc(unsigned Opc) const { +unsigned MipsSEInstrInfo::getOppositeBranchOpc(unsigned Opc) const { switch (Opc) { default: llvm_unreachable("Illegal opcode!"); case Mips::BEQ: return Mips::BNE; @@ -346,7 +358,7 @@ MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, return Reg; } -unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { +unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const { return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || @@ -356,13 +368,13 @@ unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { Opc : 0; } -void MipsSEInstrInfo::ExpandRetRA(MachineBasicBlock &MBB, +void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opc) const { BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA); } -void MipsSEInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB, +void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { unsigned DstReg = I->getOperand(0).getReg(); unsigned SrcReg = I->getOperand(1).getReg(); @@ -377,7 +389,7 @@ void MipsSEInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB, BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg); } -void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB, +void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { unsigned DstReg = I->getOperand(0).getReg(); unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); @@ -393,7 +405,57 @@ void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB, .addReg(HiReg); } -void MipsSEInstrInfo::ExpandEhReturn(MachineBasicBlock &MBB, +/// Add 4 to the displacement of operand MO. +static void fixDisp(MachineOperand &MO) { + switch (MO.getType()) { + default: + llvm_unreachable("Unhandled operand type."); + case MachineOperand::MO_Immediate: + MO.setImm(MO.getImm() + 4); + break; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_TargetIndex: + case MachineOperand::MO_ExternalSymbol: + MO.setOffset(MO.getOffset() + 4); + break; + } +} + +void MipsSEInstrInfo::expandDPLoadStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned OpcD, unsigned OpcS) const { + // If NoDPLoadStore is false, just change the opcode. + if (!NoDPLoadStore) { + genInstrWithNewOpc(OpcD, I); + return; + } + + // Expand a double precision FP load or store to two single precision + // instructions. + + const TargetRegisterInfo &TRI = getRegisterInfo(); + const MachineOperand &ValReg = I->getOperand(0); + unsigned LoReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_fpeven); + unsigned HiReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_fpodd); + + if (!TM.getSubtarget<MipsSubtarget>().isLittle()) + std::swap(LoReg, HiReg); + + // Create an instruction which loads from or stores to the lower memory + // address. + MachineInstrBuilder MIB = genInstrWithNewOpc(OpcS, I); + MIB->getOperand(0).setReg(LoReg); + + // Create an instruction which loads from or stores to the higher memory + // address. + MIB = genInstrWithNewOpc(OpcS, I); + MIB->getOperand(0).setReg(HiReg); + fixDisp(MIB->getOperand(2)); +} + +void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { // This pseudo instruction is generated as part of the lowering of // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index 0bf7876f0f..416fff8a60 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -65,7 +65,7 @@ public: virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; - virtual unsigned GetOppositeBranchOpc(unsigned Opc) const; + virtual unsigned getOppositeBranchOpc(unsigned Opc) const; /// Adjust SP by Amount bytes. void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, @@ -79,15 +79,18 @@ public: unsigned *NewImm) const; private: - virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; + virtual unsigned getAnalyzableBrOpc(unsigned Opc) const; - void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opc) const; - void ExpandExtractElementF64(MachineBasicBlock &MBB, + void expandExtractElementF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void ExpandBuildPairF64(MachineBasicBlock &MBB, + void expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void ExpandEhReturn(MachineBasicBlock &MBB, + void expandDPLoadStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, unsigned OpcD, + unsigned OpcS) const; + void expandEhReturn(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; }; diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index a876f1c7f0..89407351a0 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -72,6 +72,7 @@ MipsTargetMachine(const Target &T, StringRef TT, FrameLowering(MipsFrameLowering::create(*this, Subtarget)), TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() { + initAsmInfo(); } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 67ca6b58e5..5f35edf219 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -72,7 +72,9 @@ NVPTXTargetMachine::NVPTXTargetMachine( Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()), InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering( - *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {} + *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { + initAsmInfo(); +} void NVPTXTargetMachine32::anchor() {} diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 71803cdac9..e5c5204708 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -27,6 +27,7 @@ add_llvm_target(PowerPCCodeGen PPCRegisterInfo.cpp PPCSubtarget.cpp PPCTargetMachine.cpp + PPCTargetObjectFile.cpp PPCTargetTransformInfo.cpp PPCSelectionDAGInfo.cpp ) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index ec2657403e..b1ac4a6f27 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -22,7 +22,7 @@ #include "llvm/Support/TargetRegistry.h" using namespace llvm; -static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { +static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { switch (Kind) { default: llvm_unreachable("Unknown fixup kind!"); @@ -50,6 +50,29 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { } } +static unsigned getFixupKindNumBytes(unsigned Kind) { + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + case FK_Data_1: + return 1; + case FK_Data_2: + case PPC::fixup_ppc_ha16: + case PPC::fixup_ppc_lo16: + case PPC::fixup_ppc_lo16_ds: + return 2; + case FK_Data_4: + case PPC::fixup_ppc_brcond14: + case PPC::fixup_ppc_br24: + return 4; + case FK_Data_8: + return 8; + case PPC::fixup_ppc_tlsreg: + case PPC::fixup_ppc_nofixup: + return 0; + } +} + namespace { class PPCMachObjectWriter : public MCMachObjectTargetWriter { public: @@ -77,9 +100,9 @@ public: // name offset bits flags { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_ppc_lo16", 16, 16, 0 }, - { "fixup_ppc_ha16", 16, 16, 0 }, - { "fixup_ppc_lo16_ds", 16, 14, 0 }, + { "fixup_ppc_lo16", 0, 16, 0 }, + { "fixup_ppc_ha16", 0, 16, 0 }, + { "fixup_ppc_lo16_ds", 0, 14, 0 }, { "fixup_ppc_tlsreg", 0, 0, 0 }, { "fixup_ppc_nofixup", 0, 0, 0 } }; @@ -98,12 +121,13 @@ public: if (!Value) return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); + unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); // For each byte of the fragment that the fixup touches, mask in the bits // from the fixup value. The Value has been "split up" into the appropriate // bitfields above. - for (unsigned i = 0; i != 4; ++i) - Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff); + for (unsigned i = 0; i != NumBytes; ++i) + Data[Offset + i] |= uint8_t((Value >> ((NumBytes - i - 1)*8)) & 0xff); } bool mayNeedRelaxation(const MCInst &Inst) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 7a84723ed5..2508cc2f37 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -33,26 +33,9 @@ namespace { virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const; - virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset); - - virtual void sortRelocs(const MCAssembler &Asm, - std::vector<ELFRelocationEntry> &Relocs); - }; - - class PPCELFRelocationEntry : public ELFRelocationEntry { - public: - PPCELFRelocationEntry(const ELFRelocationEntry &RE); - bool operator<(const PPCELFRelocationEntry &RE) const { - return (RE.r_offset < r_offset || - (RE.r_offset == r_offset && RE.Type > Type)); - } }; } -PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE) - : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol, - RE.r_addend, *RE.Fixup) {} - PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) : MCELFObjectTargetWriter(Is64Bit, OSABI, Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC, @@ -240,47 +223,6 @@ const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Targe return NULL; } -void PPCELFObjectWriter:: -adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { - switch ((unsigned)Fixup.getKind()) { - case PPC::fixup_ppc_ha16: - case PPC::fixup_ppc_lo16: - case PPC::fixup_ppc_lo16_ds: - RelocOffset += 2; - break; - default: - break; - } -} - -// The standard sorter only sorts on the r_offset field, but PowerPC can -// have multiple relocations at the same offset. Sort secondarily on the -// relocation type to avoid nondeterminism. -void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm, - std::vector<ELFRelocationEntry> &Relocs) { - - // Copy to a temporary vector of relocation entries having a different - // sort function. - std::vector<PPCELFRelocationEntry> TmpRelocs; - - for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin(); - R != Relocs.end(); ++R) { - TmpRelocs.push_back(PPCELFRelocationEntry(*R)); - } - - // Sort in place by ascending r_offset and descending r_type. - array_pod_sort(TmpRelocs.begin(), TmpRelocs.end()); - - // Copy back to the original vector. - unsigned I = 0; - for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin(); - R != TmpRelocs.end(); ++R, ++I) { - Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type, - R->Symbol, R->r_addend, *R->Fixup); - } -} - - MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, bool Is64Bit, uint8_t OSABI) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 2223cd623c..3f04a4ec0a 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -142,7 +142,7 @@ unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo, if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); // Add a fixup for the branch target. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + Fixups.push_back(MCFixup::Create(2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_ha16)); return 0; } @@ -153,7 +153,7 @@ unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo, if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); // Add a fixup for the branch target. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + Fixups.push_back(MCFixup::Create(2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_lo16)); return 0; } @@ -170,7 +170,7 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits; // Add a fixup for the displacement field. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + Fixups.push_back(MCFixup::Create(2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_lo16)); return RegBits; } @@ -188,7 +188,7 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits; // Add a fixup for the displacement field. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + Fixups.push_back(MCFixup::Create(2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_lo16_ds)); return RegBits; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index a01fa44a9a..2da30f9038 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -58,7 +58,7 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -static MCAsmInfo *createPPCMCAsmInfo(StringRef TT) { +static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); bool isPPC64 = TheTriple.getArch() == Triple::ppc64; @@ -69,9 +69,10 @@ static MCAsmInfo *createPPCMCAsmInfo(StringRef TT) { MAI = new PPCLinuxMCAsmInfo(isPPC64); // Initial state of the frame pointer is R1. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(isPPC64? PPC::X1 : PPC::R1, 0); - MAI->addInitialFrameState(0, Dst, Src); + unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1; + MCCFIInstruction Inst = + MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(Reg, true), 0); + MAI->addInitialFrameState(Inst); return MAI; } diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index cd70aeed87..1f0c3c4b5d 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1168,6 +1168,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, FuncInfo->addMustSaveCR(Reg); } else { CRSpilled = true; + FuncInfo->setSpillsCR(); // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3819bc8f15..eee2bb87de 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -16,6 +16,7 @@ #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" +#include "PPCTargetObjectFile.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -64,6 +65,9 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) return new TargetLoweringObjectFileMachO(); + if (TM.getSubtargetImpl()->isSVR4ABI()) + return new PPC64LinuxTargetObjectFile(); + return new TargetLoweringObjectFileELF(); } @@ -662,6 +666,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; + case PPCISD::SC: return "PPCISD::SC"; } } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index b219de38d5..2a1cc121da 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -175,61 +175,61 @@ namespace llvm { /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec /// TLS model, produces an ADDIS8 instruction that adds the GOT - /// base to sym@got@tprel@ha. + /// base to sym\@got\@tprel\@ha. ADDIS_GOT_TPREL_HA, /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec /// TLS model, produces a LD instruction with base register G8RReg - /// and offset sym@got@tprel@l. This completes the addition that + /// and offset sym\@got\@tprel\@l. This completes the addition that /// finds the offset of "sym" relative to the thread pointer. LD_GOT_TPREL_L, /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS /// model, produces an ADD instruction that adds the contents of /// G8RReg to the thread pointer. Symbol contains a relocation - /// sym@tls which is to be replaced by the thread pointer and + /// sym\@tls which is to be replaced by the thread pointer and /// identifies to the linker that the instruction is part of a /// TLS sequence. ADD_TLS, /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base - /// register to sym@got@tlsgd@ha. + /// register to sym\@got\@tlsgd\@ha. ADDIS_TLSGD_HA, /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym@got@tlsgd@l. + /// sym\@got\@tlsgd\@l. ADDI_TLSGD_L, /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS - /// model, produces a call to __tls_get_addr(sym@tlsgd). + /// model, produces a call to __tls_get_addr(sym\@tlsgd). GET_TLS_ADDR, /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base - /// register to sym@got@tlsld@ha. + /// register to sym\@got\@tlsld\@ha. ADDIS_TLSLD_HA, /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym@got@tlsld@l. + /// sym\@got\@tlsld\@l. ADDI_TLSLD_L, /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS - /// model, produces a call to __tls_get_addr(sym@tlsld). + /// model, produces a call to __tls_get_addr(sym\@tlsld). GET_TLSLD_ADDR, /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the /// local-dynamic TLS model, produces an ADDIS8 instruction - /// that adds X3 to sym@dtprel@ha. The Chain operand is needed + /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed /// to tie this in place following a copy to %X3 from the result /// of a GET_TLSLD_ADDR. ADDIS_DTPREL_HA, /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym@got@dtprel@l. + /// sym\@got\@dtprel\@l. ADDI_DTPREL_L, /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded @@ -238,6 +238,10 @@ namespace llvm { /// optimizations due to constant folding. VADD_SPLAT, + /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned + /// operand identifies the operating system entry point. + SC, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or @@ -266,16 +270,16 @@ namespace llvm { /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model, /// produces an ADDIS8 instruction that adds the TOC base register to - /// sym@toc@ha. + /// sym\@toc\@ha. ADDIS_TOC_HA, /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model, /// produces a LD instruction with base register G8RReg and offset - /// sym@toc@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. + /// sym\@toc\@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. LD_TOC_L, /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces - /// an ADDI8 instruction that adds G8RReg to sym@toc@l. + /// an ADDI8 instruction that adds G8RReg to sym\@toc\@l. /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. ADDI_TOC_L }; @@ -450,7 +454,7 @@ namespace llvm { /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index b6f4e85215..a24405851c 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -145,6 +145,19 @@ class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, let Inst{31} = lk; } +// 1.7.3 SC-Form +class SCForm<bits<6> opcode, bits<1> xo, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, + list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<7> LEV; + + let Pattern = pattern; + + let Inst{20-26} = LEV; + let Inst{30} = xo; +} + // 1.7.4 D-Form class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 4763069f25..9c39b34ab0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -162,6 +162,10 @@ def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPSideEffect]>; +def SDT_PPCsc : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def PPCsc : SDNode<"PPCISD::SC", SDT_PPCsc, + [SDNPHasChain, SDNPSideEffect]>; + def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; @@ -987,6 +991,12 @@ let isBranch = 1, isTerminator = 1 in { "#EH_SjLj_Setup\t$dst", []>; } +// System call. +let PPC970_Unit = 7 in { + def SC : SCForm<17, 1, (outs), (ins i32imm:$lev), + "sc $lev", BrB, [(PPCsc (i32 imm:$lev))]>; +} + // DCB* instructions. def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>, diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 14dc794195..0b099edff4 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -48,6 +48,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, // The binutils for the BG/P are too old for CFI. if (Subtarget.isBGP()) setMCUseCFI(false); + initAsmInfo(); } void PPC32TargetMachine::anchor() { } diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp new file mode 100644 index 0000000000..90e4f15452 --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp @@ -0,0 +1,57 @@ +//===-- PPCTargetObjectFile.cpp - PPC Object Info -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PPCTargetObjectFile.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +void +PPC64LinuxTargetObjectFile:: +Initialize(MCContext &Ctx, const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} + +const MCSection * PPC64LinuxTargetObjectFile:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + + const MCSection *DefaultSection = + TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM); + + if (DefaultSection != ReadOnlySection) + return DefaultSection; + + // Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI + // when we have a constant that contains global relocations. This is + // necessary because of this ABI's handling of pointers to functions in + // a shared library. The address of a function is actually the address + // of a function descriptor, which resides in the .opd section. Generated + // code uses the descriptor directly rather than going via the GOT as some + // other ABIs do, which means that initialized function pointers must + // reference the descriptor. The linker must convert copy relocs of + // pointers to functions in shared libraries into dynamic relocations, + // because of an ordering problem with initialization of copy relocs and + // PLT entries. The dynamic relocation will be initialized by the dynamic + // linker, so we must use DataRelROSection instead of ReadOnlySection. + // For more information, see the description of ELIMINATE_COPY_RELOCS in + // GNU ld. + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + + if (GVar && GVar->isConstant() && + (GVar->getInitializer()->getRelocationInfo() == + Constant::GlobalRelocations)) + return DataRelROSection; + + return DefaultSection; +} diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h new file mode 100644 index 0000000000..9203e23574 --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetObjectFile.h @@ -0,0 +1,32 @@ +//===-- PPCTargetObjectFile.h - PPC Object Info -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_PPC_TARGETOBJECTFILE_H +#define LLVM_TARGET_PPC_TARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + + /// PPC64LinuxTargetObjectFile - This implementation is used for + /// 64-bit PowerPC Linux. + class PPC64LinuxTargetObjectFile : public TargetLoweringObjectFileELF { + + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + + virtual const MCSection * + SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const; + }; + +} // end namespace llvm + +#endif diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 31fbf32d0c..7175ec941a 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -70,6 +70,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, InstrInfo = new SIInstrInfo(*this); TLInfo = new SITargetLowering(*this); } + initAsmInfo(); } AMDGPUTargetMachine::~AMDGPUTargetMachine() { diff --git a/lib/Target/R600/AMDILDeviceInfo.cpp b/lib/Target/R600/AMDILDeviceInfo.cpp index 178795936a..126514b976 100644 --- a/lib/Target/R600/AMDILDeviceInfo.cpp +++ b/lib/Target/R600/AMDILDeviceInfo.cpp @@ -81,7 +81,8 @@ AMDGPUDevice* getDeviceFromName(const std::string &deviceName, return new AMDGPUNIDevice(ptr); } else if (deviceName == "SI" || deviceName == "tahiti" || deviceName == "pitcairn" || - deviceName == "verde" || deviceName == "oland") { + deviceName == "verde" || deviceName == "oland" || + deviceName == "hainan") { return new AMDGPUSIDevice(ptr); } else { #if DEBUG diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 45d009c2a0..6f66aa898a 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -78,7 +78,7 @@ static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII, if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) { return createSIMCCodeEmitter(MCII, MRI, STI, Ctx); } else { - return createR600MCCodeEmitter(MCII, MRI, STI, Ctx); + return createR600MCCodeEmitter(MCII, MRI); } } diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h index 09d0d5b61c..95c572c21b 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -32,9 +32,7 @@ class raw_ostream; extern Target TheAMDGPUTarget; MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx); + const MCRegisterInfo &MRI); MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 271a974734..3404844435 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -35,14 +35,11 @@ class R600MCCodeEmitter : public AMDGPUMCCodeEmitter { void operator=(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCRegisterInfo &MRI; - const MCSubtargetInfo &STI; - MCContext &Ctx; public: - R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, - const MCSubtargetInfo &sti, MCContext &ctx) - : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { } + R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri) + : MCII(mcii), MRI(mri) { } /// \brief Encode the instruction and write it to the OS. virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, @@ -98,10 +95,8 @@ enum TextureTypes { }; MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new R600MCCodeEmitter(MCII, MRI, STI, Ctx); + const MCRegisterInfo &MRI) { + return new R600MCCodeEmitter(MCII, MRI); } void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index 5ee1c0d8ae..0cbe919d81 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -45,3 +45,4 @@ def : Proc<"tahiti", SI_Itin, [Feature64BitPtr, FeatureFP64]>; def : Proc<"pitcairn", SI_Itin, [Feature64BitPtr, FeatureFP64]>; def : Proc<"verde", SI_Itin, [Feature64BitPtr, FeatureFP64]>; def : Proc<"oland", SI_Itin, [Feature64BitPtr, FeatureFP64]>; +def : Proc<"hainan", SI_Itin, [Feature64BitPtr, FeatureFP64]>; diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 60bceb708f..3e7a24aecf 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -37,6 +37,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { + initAsmInfo(); } namespace { diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index c7725a1459..7f2159f79e 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -250,46 +251,6 @@ public: bool isS32Imm() const { return isImm(-(1LL << 31), (1LL << 31) - 1); } }; -// Maps of asm register numbers to LLVM register numbers, with 0 indicating -// an invalid register. We don't use register class directly because that -// specifies the allocation order. -static const unsigned GR32Regs[] = { - SystemZ::R0W, SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, - SystemZ::R4W, SystemZ::R5W, SystemZ::R6W, SystemZ::R7W, - SystemZ::R8W, SystemZ::R9W, SystemZ::R10W, SystemZ::R11W, - SystemZ::R12W, SystemZ::R13W, SystemZ::R14W, SystemZ::R15W -}; -static const unsigned GR64Regs[] = { - SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, - SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D, - SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D, - SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D -}; -static const unsigned GR128Regs[] = { - SystemZ::R0Q, 0, SystemZ::R2Q, 0, - SystemZ::R4Q, 0, SystemZ::R6Q, 0, - SystemZ::R8Q, 0, SystemZ::R10Q, 0, - SystemZ::R12Q, 0, SystemZ::R14Q, 0 -}; -static const unsigned FP32Regs[] = { - SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, - SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, - SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, - SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S -}; -static const unsigned FP64Regs[] = { - SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D, - SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D, - SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, - SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D -}; -static const unsigned FP128Regs[] = { - SystemZ::F0Q, SystemZ::F1Q, 0, 0, - SystemZ::F4Q, SystemZ::F5Q, 0, 0, - SystemZ::F8Q, SystemZ::F9Q, 0, 0, - SystemZ::F12Q, SystemZ::F13Q, 0, 0 -}; - class SystemZAsmParser : public MCTargetAsmParser { #define GET_ASSEMBLER_HEADER #include "SystemZGenAsmMatcher.inc" @@ -349,25 +310,28 @@ public: // Used by the TableGen code to parse particular operand types. OperandMatchResultTy parseGR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::GR32Reg); + return parseRegister(Operands, 'r', SystemZMC::GR32Regs, + SystemZOperand::GR32Reg); } OperandMatchResultTy parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::GR64Reg); + return parseRegister(Operands, 'r', SystemZMC::GR64Regs, + SystemZOperand::GR64Reg); } OperandMatchResultTy parseGR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR128Regs, SystemZOperand::GR128Reg); + return parseRegister(Operands, 'r', SystemZMC::GR128Regs, + SystemZOperand::GR128Reg); } OperandMatchResultTy parseADDR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::ADDR32Reg, - true); + return parseRegister(Operands, 'r', SystemZMC::GR32Regs, + SystemZOperand::ADDR32Reg, true); } OperandMatchResultTy parseADDR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::ADDR64Reg, - true); + return parseRegister(Operands, 'r', SystemZMC::GR64Regs, + SystemZOperand::ADDR64Reg, true); } OperandMatchResultTy parseADDR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { @@ -375,30 +339,47 @@ public: } OperandMatchResultTy parseFP32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'f', FP32Regs, SystemZOperand::FP32Reg); + return parseRegister(Operands, 'f', SystemZMC::FP32Regs, + SystemZOperand::FP32Reg); } OperandMatchResultTy parseFP64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'f', FP64Regs, SystemZOperand::FP64Reg); + return parseRegister(Operands, 'f', SystemZMC::FP64Regs, + SystemZOperand::FP64Reg); } OperandMatchResultTy parseFP128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'f', FP128Regs, SystemZOperand::FP128Reg); + return parseRegister(Operands, 'f', SystemZMC::FP128Regs, + SystemZOperand::FP128Reg); } OperandMatchResultTy parseBDAddr32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseAddress(Operands, GR32Regs, SystemZOperand::ADDR32Reg, false); + return parseAddress(Operands, SystemZMC::GR32Regs, + SystemZOperand::ADDR32Reg, false); } OperandMatchResultTy parseBDAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, false); + return parseAddress(Operands, SystemZMC::GR64Regs, + SystemZOperand::ADDR64Reg, false); } OperandMatchResultTy parseBDXAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, true); + return parseAddress(Operands, SystemZMC::GR64Regs, + SystemZOperand::ADDR64Reg, true); } OperandMatchResultTy parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + OperandMatchResultTy + parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + int64_t MinVal, int64_t MaxVal); + OperandMatchResultTy + parsePCRel16(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1); + } + OperandMatchResultTy + parsePCRel32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1); + } }; } @@ -502,7 +483,8 @@ SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands, // Parse the first register. Register Reg; - OperandMatchResultTy Result = parseRegister(Reg, 'r', GR64Regs, true); + OperandMatchResultTy Result = parseRegister(Reg, 'r', SystemZMC::GR64Regs, + true); if (Result != MatchOperand_Success) return Result; @@ -517,7 +499,7 @@ SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands, } Index = Reg.Number; - Result = parseRegister(Reg, 'r', GR64Regs, true); + Result = parseRegister(Reg, 'r', SystemZMC::GR64Regs, true); if (Result != MatchOperand_Success) return Result; } @@ -546,9 +528,9 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, if (parseRegister(Reg)) return Error(Reg.StartLoc, "register expected"); if (Reg.Prefix == 'r' && Reg.Number < 16) - RegNo = GR64Regs[Reg.Number]; + RegNo = SystemZMC::GR64Regs[Reg.Number]; else if (Reg.Prefix == 'f' && Reg.Number < 16) - RegNo = FP64Regs[Reg.Number]; + RegNo = SystemZMC::FP64Regs[Reg.Number]; else return Error(Reg.StartLoc, "invalid register"); StartLoc = Reg.StartLoc; @@ -683,6 +665,37 @@ parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } +SystemZAsmParser::OperandMatchResultTy SystemZAsmParser:: +parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + int64_t MinVal, int64_t MaxVal) { + MCContext &Ctx = getContext(); + MCStreamer &Out = getStreamer(); + const MCExpr *Expr; + SMLoc StartLoc = Parser.getTok().getLoc(); + if (getParser().parseExpression(Expr)) + return MatchOperand_NoMatch; + + // For consistency with the GNU assembler, treat immediates as offsets + // from ".". + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) { + int64_t Value = CE->getValue(); + if ((Value & 1) || Value < MinVal || Value > MaxVal) { + Error(StartLoc, "offset out of range"); + return MatchOperand_ParseFail; + } + MCSymbol *Sym = Ctx.CreateTempSymbol(); + Out.EmitLabel(Sym); + const MCExpr *Base = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + Ctx); + Expr = Value == 0 ? Base : MCBinaryExpr::CreateAdd(Base, Expr, Ctx); + } + + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); + return MatchOperand_Success; +} + // Force static initialization. extern "C" void LLVMInitializeSystemZAsmParser() { RegisterMCAsmParser<SystemZAsmParser> X(TheSystemZTarget); diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index 67b17fcc59..757d5a8898 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -4,6 +4,7 @@ tablegen(LLVM SystemZGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM SystemZGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM SystemZGenCallingConv.inc -gen-callingconv) tablegen(LLVM SystemZGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM SystemZGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM SystemZGenInstrInfo.inc -gen-instr-info) tablegen(LLVM SystemZGenRegisterInfo.inc -gen-register-info) @@ -27,6 +28,7 @@ add_llvm_target(SystemZCodeGen add_dependencies(LLVMSystemZCodeGen intrinsics_gen) add_subdirectory(AsmParser) +add_subdirectory(Disassembler) add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/SystemZ/Disassembler/CMakeLists.txt b/lib/Target/SystemZ/Disassembler/CMakeLists.txt new file mode 100644 index 0000000000..5bc1859816 --- /dev/null +++ b/lib/Target/SystemZ/Disassembler/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMSystemZDisassembler + SystemZDisassembler.cpp + ) + +add_dependencies(LLVMSystemZDisassembler SystemZCommonTableGen) diff --git a/lib/Target/SystemZ/Disassembler/LLVMBuild.txt b/lib/Target/SystemZ/Disassembler/LLVMBuild.txt new file mode 100644 index 0000000000..c3081f5447 --- /dev/null +++ b/lib/Target/SystemZ/Disassembler/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===-- ./lib/Target/SystemZ/Disassembler/LLVMBuild.txt ---------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = SystemZDisassembler +parent = SystemZ +required_libraries = MC Support SystemZDesc SystemZInfo +add_to_library_groups = SystemZ diff --git a/lib/Target/SystemZ/Disassembler/Makefile b/lib/Target/SystemZ/Disassembler/Makefile new file mode 100644 index 0000000000..efc4cc8e9c --- /dev/null +++ b/lib/Target/SystemZ/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===-- lib/Target/SystemZ/Disassembler/Makefile -----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMSystemZDisassembler + +# Hack: we need to include 'main' x86 target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp new file mode 100644 index 0000000000..9a9de78224 --- /dev/null +++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -0,0 +1,301 @@ +//===-- SystemZDisassembler.cpp - Disassembler for SystemZ ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +class SystemZDisassembler : public MCDisassembler { +public: + SystemZDisassembler(const MCSubtargetInfo &STI) + : MCDisassembler(STI) {} + virtual ~SystemZDisassembler() {} + + // Override MCDisassembler. + virtual DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const LLVM_OVERRIDE; +}; +} // end anonymous namespace + +static MCDisassembler *createSystemZDisassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new SystemZDisassembler(STI); +} + +extern "C" void LLVMInitializeSystemZDisassembler() { + // Register the disassembler. + TargetRegistry::RegisterMCDisassembler(TheSystemZTarget, + createSystemZDisassembler); +} + +static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, + const unsigned *Regs, + bool isAddress = false) { + assert(RegNo < 16 && "Invalid register"); + if (!isAddress || RegNo) { + RegNo = Regs[RegNo]; + if (RegNo == 0) + return MCDisassembler::Fail; + } + Inst.addOperand(MCOperand::CreateReg(RegNo)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs); +} + +static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs); +} + +static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs); +} + +static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, true); +} + +static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs); +} + +static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs); +} + +static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs); +} + +template<unsigned N> +static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) { + assert(isUInt<N>(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::CreateImm(Imm)); + return MCDisassembler::Success; +} + +template<unsigned N> +static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) { + assert(isUInt<N>(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm))); + return MCDisassembler::Success; +} + +static DecodeStatus decodeAccessRegOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const void *Decoder) { + return decodeUImmOperand<4>(Inst, Imm); +} + +static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<4>(Inst, Imm); +} + +static DecodeStatus decodeU6ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<6>(Inst, Imm); +} + +static DecodeStatus decodeU8ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<8>(Inst, Imm); +} + +static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<16>(Inst, Imm); +} + +static DecodeStatus decodeU32ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<32>(Inst, Imm); +} + +static DecodeStatus decodeS8ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeSImmOperand<8>(Inst, Imm); +} + +static DecodeStatus decodeS16ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeSImmOperand<16>(Inst, Imm); +} + +static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeSImmOperand<32>(Inst, Imm); +} + +template<unsigned N> +static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address) { + assert(isUInt<N>(Imm) && "Invalid PC-relative offset"); + Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm) * 2 + Address)); + return MCDisassembler::Success; +} + +static DecodeStatus decodePC16DBLOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const void *Decoder) { + return decodePCDBLOperand<16>(Inst, Imm, Address); +} + +static DecodeStatus decodePC32DBLOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const void *Decoder) { + return decodePCDBLOperand<32>(Inst, Imm, Address); +} + +static DecodeStatus decodeBDAddr12Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Base = Field >> 12; + uint64_t Disp = Field & 0xfff; + assert(Base < 16 && "Invalid BDAddr12"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(Disp)); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDAddr20Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Base = Field >> 20; + uint64_t Disp = ((Field << 12) & 0xff000) | ((Field >> 8) & 0xfff); + assert(Base < 16 && "Invalid BDAddr20"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp))); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDXAddr12Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Index = Field >> 16; + uint64_t Base = (Field >> 12) & 0xf; + uint64_t Disp = Field & 0xfff; + assert(Index < 16 && "Invalid BDXAddr12"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(Disp)); + Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDXAddr20Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Index = Field >> 24; + uint64_t Base = (Field >> 20) & 0xf; + uint64_t Disp = ((Field & 0xfff00) >> 8) | ((Field & 0xff) << 12); + assert(Index < 16 && "Invalid BDXAddr20"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp))); + Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR32Regs); +} + +static DecodeStatus decodeBDAddr32Disp20Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR32Regs); +} + +static DecodeStatus decodeBDAddr64Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDAddr64Disp20Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDXAddr64Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDXAddr12Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDXAddr20Operand(Inst, Field, SystemZMC::GR64Regs); +} + +#include "SystemZGenDisassemblerTables.inc" + +DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + // Get the first two bytes of the instruction. + uint8_t Bytes[6]; + Size = 0; + if (Region.readBytes(Address, 2, Bytes, 0) == -1) + return MCDisassembler::Fail; + + // The top 2 bits of the first byte specify the size. + const uint8_t *Table; + if (Bytes[0] < 0x40) { + Size = 2; + Table = DecoderTable16; + } else if (Bytes[0] < 0xc0) { + Size = 4; + Table = DecoderTable32; + } else { + Size = 6; + Table = DecoderTable48; + } + + // Read any remaining bytes. + if (Size > 2 && Region.readBytes(Address + 2, Size - 2, Bytes + 2, 0) == -1) + return MCDisassembler::Fail; + + // Construct the instruction. + uint64_t Inst = 0; + for (uint64_t I = 0; I < Size; ++I) + Inst = (Inst << 8) | Bytes[I]; + + return decodeInstruction(Table, MI, Inst, Address, this, STI); +} diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp index d73cf49808..369802b2b8 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -114,10 +114,26 @@ void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum, O << "%a" << (unsigned int)Value; } +void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.isImm()) { + O << "0x"; + O.write_hex(MO.getImm()); + } else + O << *MO.getExpr(); +} + void SystemZInstPrinter::printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O) { - printOperand(MI, OpNum, O); - O << "@PLT"; + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.isImm()) { + O << "0x"; + O.write_hex(MO.getImm()); + } else { + O << *MO.getExpr(); + O << "@PLT"; + } } void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum, diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h index b82e79d93c..f77282efcb 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -56,6 +56,7 @@ private: void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O); diff --git a/lib/Target/SystemZ/LLVMBuild.txt b/lib/Target/SystemZ/LLVMBuild.txt index aba0de27ac..95e657f7bd 100644 --- a/lib/Target/SystemZ/LLVMBuild.txt +++ b/lib/Target/SystemZ/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup @@ -24,6 +24,7 @@ name = SystemZ parent = Target has_asmparser = 1 has_asmprinter = 1 +has_disassembler = 1 has_jit = 1 [component_1] diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index ea2250f546..7721b1ffab 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -45,30 +45,43 @@ private: // Called by the TableGen code to get the binary encoding of operand // MO in MI. Fixups is the list of fixups against MI. - unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups) const; + // Called by the TableGen code to get the binary encoding of an address. + // The index, if any, is encoded first, followed by the base, + // followed by the displacement. In a 20-bit displacement, + // the low 12 bits are encoded before the high 8 bits. + uint64_t getBDAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + uint64_t getBDAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + uint64_t getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + uint64_t getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + // Operand OpNum of MI needs a PC-relative fixup of kind Kind at // Offset bytes from the start of MI. Add the fixup to Fixups // and return the in-place addend, which since we're a RELA target // is always 0. - unsigned getPCRelEncoding(const MCInst &MI, unsigned int OpNum, + uint64_t getPCRelEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups, unsigned Kind, int64_t Offset) const; - unsigned getPC16DBLEncoding(const MCInst &MI, unsigned int OpNum, + uint64_t getPC16DBLEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups) const { return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2); } - unsigned getPC32DBLEncoding(const MCInst &MI, unsigned int OpNum, + uint64_t getPC32DBLEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups) const { return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2); } - unsigned getPLT16DBLEncoding(const MCInst &MI, unsigned int OpNum, + uint64_t getPLT16DBLEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups) const { return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT16DBL, 2); } - unsigned getPLT32DBLEncoding(const MCInst &MI, unsigned int OpNum, + uint64_t getPLT32DBLEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups) const { return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT32DBL, 2); } @@ -95,34 +108,73 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, } } -unsigned SystemZMCCodeEmitter:: +uint64_t SystemZMCCodeEmitter:: getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups) const { if (MO.isReg()) return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()); if (MO.isImm()) - return static_cast<unsigned>(MO.getImm()); + return static_cast<uint64_t>(MO.getImm()); llvm_unreachable("Unexpected operand type!"); } -unsigned -SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned int OpNum, +uint64_t SystemZMCCodeEmitter:: +getBDAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + assert(isUInt<4>(Base) && isUInt<12>(Disp)); + return (Base << 12) | Disp; +} + +uint64_t SystemZMCCodeEmitter:: +getBDAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + assert(isUInt<4>(Base) && isInt<20>(Disp)); + return (Base << 20) | ((Disp & 0xfff) << 8) | ((Disp & 0xff000) >> 12); +} + +uint64_t SystemZMCCodeEmitter:: +getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups); + assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Index)); + return (Index << 16) | (Base << 12) | Disp; +} + +uint64_t SystemZMCCodeEmitter:: +getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups); + assert(isUInt<4>(Base) && isInt<20>(Disp) && isUInt<4>(Index)); + return (Index << 24) | (Base << 20) | ((Disp & 0xfff) << 8) + | ((Disp & 0xff000) >> 12); +} + +uint64_t +SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups, unsigned Kind, int64_t Offset) const { const MCOperand &MO = MI.getOperand(OpNum); - // For compatibility with the GNU assembler, treat constant operands as - // unadjusted PC-relative offsets. + const MCExpr *Expr; if (MO.isImm()) - return MO.getImm() / 2; - - const MCExpr *Expr = MO.getExpr(); - if (Offset) { - // The operand value is relative to the start of MI, but the fixup - // is relative to the operand field itself, which is Offset bytes - // into MI. Add Offset to the relocation value to cancel out - // this difference. - const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); - Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); + Expr = MCConstantExpr::Create(MO.getImm() + Offset, Ctx); + else { + Expr = MO.getExpr(); + if (Offset) { + // The operand value is relative to the start of MI, but the fixup + // is relative to the operand field itself, which is Offset bytes + // into MI. Add Offset to the relocation value to cancel out + // this difference. + const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); + Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); + } } Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind)); return 0; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 6844f92ec9..3653192d85 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -27,11 +27,55 @@ using namespace llvm; -static MCAsmInfo *createSystemZMCAsmInfo(StringRef TT) { +const unsigned SystemZMC::GR32Regs[16] = { + SystemZ::R0W, SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, + SystemZ::R4W, SystemZ::R5W, SystemZ::R6W, SystemZ::R7W, + SystemZ::R8W, SystemZ::R9W, SystemZ::R10W, SystemZ::R11W, + SystemZ::R12W, SystemZ::R13W, SystemZ::R14W, SystemZ::R15W +}; + +const unsigned SystemZMC::GR64Regs[16] = { + SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, + SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D, + SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D, + SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D +}; + +const unsigned SystemZMC::GR128Regs[16] = { + SystemZ::R0Q, 0, SystemZ::R2Q, 0, + SystemZ::R4Q, 0, SystemZ::R6Q, 0, + SystemZ::R8Q, 0, SystemZ::R10Q, 0, + SystemZ::R12Q, 0, SystemZ::R14Q, 0 +}; + +const unsigned SystemZMC::FP32Regs[16] = { + SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, + SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, + SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, + SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S +}; + +const unsigned SystemZMC::FP64Regs[16] = { + SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D, + SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D, + SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, + SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D +}; + +const unsigned SystemZMC::FP128Regs[16] = { + SystemZ::F0Q, SystemZ::F1Q, 0, 0, + SystemZ::F4Q, SystemZ::F5Q, 0, 0, + SystemZ::F8Q, SystemZ::F9Q, 0, 0, + SystemZ::F12Q, SystemZ::F13Q, 0, 0 +}; + +static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI, + StringRef TT) { MCAsmInfo *MAI = new SystemZMCAsmInfo(TT); - MachineLocation FPDst(MachineLocation::VirtualFP); - MachineLocation FPSrc(SystemZ::R15D, -SystemZMC::CFAOffsetFromInitialSP); - MAI->addInitialFrameState(0, FPDst, FPSrc); + MCCFIInstruction Inst = + MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(SystemZ::R15D, true), + SystemZMC::CFAOffsetFromInitialSP); + MAI->addInitialFrameState(Inst); return MAI; } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index 229912f161..1f70047db6 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -34,6 +34,16 @@ namespace SystemZMC { // The offset of the DWARF CFA from the incoming stack pointer. const int64_t CFAOffsetFromInitialSP = CallFrameSize; + + // Maps of asm register numbers to LLVM register numbers, with 0 indicating + // an invalid register. We don't use the register classes directly because + // they specify the allocation order. + extern const unsigned GR32Regs[16]; + extern const unsigned GR64Regs[16]; + extern const unsigned GR128Regs[16]; + extern const unsigned FP32Regs[16]; + extern const unsigned FP64Regs[16]; + extern const unsigned FP128Regs[16]; } MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile index c992584af9..445725bd1e 100644 --- a/lib/Target/SystemZ/Makefile +++ b/lib/Target/SystemZ/Makefile @@ -16,13 +16,14 @@ BUILT_SOURCES = SystemZGenRegisterInfo.inc \ SystemZGenAsmWriter.inc \ SystemZGenAsmMatcher.inc \ SystemZGenCodeEmitter.inc \ + SystemZGenDisassemblerTables.inc \ SystemZGenInstrInfo.inc \ SystemZGenDAGISel.inc \ SystemZGenSubtargetInfo.inc \ SystemZGenCallingConv.inc \ SystemZGenMCCodeEmitter.inc -DIRS = InstPrinter AsmParser TargetInfo MCTargetDesc +DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index d1f56a4916..8f5a5476b4 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -29,17 +29,44 @@ to load 103. This seems to be a general target-independent problem. -- -The tuning of the choice between Load Address (LA) and addition in +The tuning of the choice between LOAD ADDRESS (LA) and addition in SystemZISelDAGToDAG.cpp is suspect. It should be tweaked based on performance measurements. -- +We don't support tail calls at present. + +-- + +We don't support prefetching yet. + +-- + There is no scheduling support. -- -We don't use the Branch on Count or Branch on Index families of instruction. +We don't use the BRANCH ON COUNT or BRANCH ON INDEX families of instruction. + +-- + +We might want to use BRANCH ON CONDITION for conditional indirect calls +and conditional returns. + +-- + +We don't use the combined COMPARE AND BRANCH instructions. Using them +would require a change to the way we handle out-of-range branches. +At the moment, we start with 32-bit forms like BRCL and shorten them +to forms like BRC where possible, but COMPARE AND BRANCH does not have +a 32-bit form. + +-- + +We should probably model just CC, not the PSW as a whole. Strictly +speaking, every instruction changes the PSW since the PSW contains the +current instruction address. -- @@ -54,7 +81,30 @@ equality after an integer comparison, etc. -- -We don't optimize string and block memory operations. +We don't use the LOAD AND TEST or TEST DATA CLASS instructions. + +-- + +We could use the generic floating-point forms of LOAD COMPLEMENT, +LOAD NEGATIVE and LOAD POSITIVE in cases where we don't need the +condition codes. For example, we could use LCDFR instead of LCDBR. + +-- + +We don't optimize block memory operations. + +It's definitely worth using things like MVC, CLC, NC, XC and OC with +constant lengths. MVCIN may be worthwhile too. + +We should probably implement things like memcpy using MVC with EXECUTE. +Likewise memcmp and CLC. MVCLE and CLCLE could be useful too. + +-- + +We don't optimize string operations. + +MVST, CLST, SRST and CUSE could be useful here. Some of the TRANSLATE +family might be too, although they are probably more difficult to exploit. -- @@ -63,9 +113,33 @@ conventions require f128s to be returned by invisible reference. -- +ADD LOGICAL WITH SIGNED IMMEDIATE could be useful when we need to +produce a carry. SUBTRACT LOGICAL IMMEDIATE could be useful when we +need to produce a borrow. (Note that there are no memory forms of +ADD LOGICAL WITH CARRY and SUBTRACT LOGICAL WITH BORROW, so the high +part of 128-bit memory operations would probably need to be done +via a register.) + +-- + +We don't use the halfword forms of LOAD REVERSED and STORE REVERSED +(LRVH and STRVH). + +-- + +We could take advantage of the various ... UNDER MASK instructions, +such as ICM and STCM. + +-- + +We could make more use of the ROTATE AND ... SELECTED BITS instructions. +At the moment we only use RISBG, and only then for subword atomic operations. + +-- + DAGCombiner can detect integer absolute, but there's not yet an associated -ISD opcode. We could add one and implement it using Load Positive. -Negated absolutes could use Load Negative. +ISD opcode. We could add one and implement it using LOAD POSITIVE. +Negated absolutes could use LOAD NEGATIVE. -- @@ -142,5 +216,15 @@ See CodeGen/SystemZ/alloca-01.ll for an example. -- Atomic loads and stores use the default compare-and-swap based implementation. -This is probably much too conservative in practice, and the overhead is -especially bad for 8- and 16-bit accesses. +This is much too conservative in practice, since the architecture guarantees +that 1-, 2-, 4- and 8-byte loads and stores to aligned addresses are +inherently atomic. + +-- + +If needed, we can support 16-byte atomics using LPQ, STPQ and CSDG. + +-- + +We might want to model all access registers and use them to spill +32-bit values. diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 7c9f0e668b..104af6e99d 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -40,24 +40,22 @@ def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>; // fcopysign with an FP32 result. let isCodeGenOnly = 1 in { - def CPSDRss : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP32>; - def CPSDRsd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP64>; + def CPSDRss : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP32, FP32>; + def CPSDRsd : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP32, FP64>; } -// The sign of an FP128 is in the high register. Give the CPSDRsd -// operands in R1, R2, R3 order. +// The sign of an FP128 is in the high register. def : Pat<(fcopysign FP32:$src1, FP128:$src2), - (CPSDRsd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP32:$src1)>; + (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_high))>; // fcopysign with an FP64 result. let isCodeGenOnly = 1 in - def CPSDRds : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP32>; -def CPSDRdd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP64>; + def CPSDRds : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP64, FP32>; +def CPSDRdd : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP64, FP64>; -// The sign of an FP128 is in the high register. Give the CPSDRdd -// operands in R1, R2, R3 order. +// The sign of an FP128 is in the high register. def : Pat<(fcopysign FP64:$src1, FP128:$src2), - (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP64:$src1)>; + (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_high))>; // fcopysign with an FP128 result. Use "upper" as the high half and leave // the low half as-is. @@ -65,13 +63,12 @@ class CopySign128<RegisterOperand cls, dag upper> : Pat<(fcopysign FP128:$src1, cls:$src2), (INSERT_SUBREG FP128:$src1, upper, subreg_high)>; -// Give the CPSDR* operands in R1, R2, R3 order. -def : CopySign128<FP32, (CPSDRds FP32:$src2, - (EXTRACT_SUBREG FP128:$src1, subreg_high))>; -def : CopySign128<FP64, (CPSDRdd FP64:$src2, - (EXTRACT_SUBREG FP128:$src1, subreg_high))>; -def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high), - (EXTRACT_SUBREG FP128:$src1, subreg_high))>; +def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_high), + FP32:$src2)>; +def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high), + FP64:$src2)>; +def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high), + (EXTRACT_SUBREG FP128:$src2, subreg_high))>; //===----------------------------------------------------------------------===// // Load instructions @@ -155,13 +152,13 @@ let Defs = [PSW] in { } // fp_to_sint always rounds towards zero, which is modifier value 5. -def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR FP32:$src, 5)>; -def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR FP64:$src, 5)>; -def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR FP128:$src, 5)>; +def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>; +def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>; +def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>; -def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR FP32:$src, 5)>; -def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR FP64:$src, 5)>; -def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR FP128:$src, 5)>; +def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>; +def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>; +def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>; //===----------------------------------------------------------------------===// // Unary arithmetic @@ -210,9 +207,9 @@ let Defs = [PSW] in { // frint rounds according to the current mode (modifier 0) and detects // inexact conditions. -def : Pat<(frint FP32:$src), (FIEBR FP32:$src, 0)>; -def : Pat<(frint FP64:$src), (FIDBR FP64:$src, 0)>; -def : Pat<(frint FP128:$src), (FIXBR FP128:$src, 0)>; +def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>; +def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>; +def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>; //===----------------------------------------------------------------------===// // Binary arithmetic diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index b32b7eb0fc..bf5aa8dbeb 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -82,25 +82,24 @@ def getDisp20Opcode : InstrMapping { // // Formats are specified using operand field declarations of the form: // -// bits<4> Rn : register input or output for operand n -// bits<m> In : immediate value of width m for operand n -// bits<4> Bn : base register for address operand n -// bits<m> Dn : displacement value of width m for address operand n -// bits<4> Xn : index register for address operand n -// bits<4> Mn : mode value for operand n +// bits<4> Rn : register input or output for operand n +// bits<m> In : immediate value of width m for operand n +// bits<4> BDn : address operand n, which has a base and a displacement +// bits<m> XBDn : address operand n, which has an index, a base and a +// displacement +// bits<4> Xn : index register for address operand n +// bits<4> Mn : mode value for operand n // -// The operand numbers ("n" in the list above) follow the architecture manual, -// but the fields are always declared in assembly order, so there are some -// cases where operand "2" comes after operand "3". For address operands, -// the base register field is declared first, followed by the displacement, -// followed by the index (if any). This matches the bdaddr* and bdxaddr* -// orders. +// The operand numbers ("n" in the list above) follow the architecture manual. +// Assembly operands sometimes have a different order; in particular, R3 often +// is often written between operands 1 and 2. // //===----------------------------------------------------------------------===// class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<16> I2; @@ -114,6 +113,7 @@ class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; bits<4> R2; @@ -133,6 +133,7 @@ class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; bits<32> I2; @@ -146,6 +147,7 @@ class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<2, outs, ins, asmstr, pattern> { field bits<16> Inst; + field bits<16> SoftFail = 0; bits<4> R1; bits<4> R2; @@ -158,6 +160,7 @@ class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<4> R3; @@ -173,6 +176,7 @@ class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<4> R2; @@ -186,6 +190,7 @@ class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<4> R2; @@ -201,17 +206,14 @@ class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; - bits<4> B2; - bits<12> D2; - bits<4> X2; + bits<20> XBD2; let Inst{31-24} = op; let Inst{23-20} = R1; - let Inst{19-16} = X2; - let Inst{15-12} = B2; - let Inst{11-0} = D2; + let Inst{19-0} = XBD2; let HasIndex = 1; } @@ -219,17 +221,14 @@ class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; - bits<4> B2; - bits<12> D2; - bits<4> X2; + bits<20> XBD2; let Inst{47-40} = op{15-8}; let Inst{39-36} = R1; - let Inst{35-32} = X2; - let Inst{31-28} = B2; - let Inst{27-16} = D2; + let Inst{35-16} = XBD2; let Inst{15-8} = 0; let Inst{7-0} = op{7-0}; @@ -239,18 +238,15 @@ class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; bits<4> R3; - bits<4> B2; - bits<12> D2; - bits<4> X2; + bits<20> XBD2; let Inst{47-40} = op{15-8}; let Inst{39-36} = R3; - let Inst{35-32} = X2; - let Inst{31-28} = B2; - let Inst{27-16} = D2; + let Inst{35-16} = XBD2; let Inst{15-12} = R1; let Inst{11-8} = 0; let Inst{7-0} = op{7-0}; @@ -261,18 +257,14 @@ class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; - bits<4> B2; - bits<20> D2; - bits<4> X2; + bits<28> XBD2; let Inst{47-40} = op{15-8}; let Inst{39-36} = R1; - let Inst{35-32} = X2; - let Inst{31-28} = B2; - let Inst{27-16} = D2{11-0}; - let Inst{15-8} = D2{19-12}; + let Inst{35-8} = XBD2; let Inst{7-0} = op{7-0}; let Has20BitOffset = 1; @@ -282,34 +274,31 @@ class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<4> R3; - bits<4> B2; - bits<12> D2; + bits<16> BD2; let Inst{31-24} = op; let Inst{23-20} = R1; let Inst{19-16} = R3; - let Inst{15-12} = B2; - let Inst{11-0} = D2; + let Inst{15-0} = BD2; } class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; bits<4> R3; - bits<4> B2; - bits<20> D2; + bits<24> BD2; let Inst{47-40} = op{15-8}; let Inst{39-36} = R1; let Inst{35-32} = R3; - let Inst{31-28} = B2; - let Inst{27-16} = D2{11-0}; - let Inst{15-8} = D2{19-12}; + let Inst{31-8} = BD2; let Inst{7-0} = op{7-0}; let Has20BitOffset = 1; @@ -318,44 +307,40 @@ class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; - bits<4> B1; - bits<12> D1; + bits<16> BD1; bits<8> I2; let Inst{31-24} = op; let Inst{23-16} = I2; - let Inst{15-12} = B1; - let Inst{11-0} = D1; + let Inst{15-0} = BD1; } class InstSIL<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; - bits<4> B1; - bits<12> D1; + bits<16> BD1; bits<16> I2; let Inst{47-32} = op; - let Inst{31-28} = B1; - let Inst{27-16} = D1; + let Inst{31-16} = BD1; let Inst{15-0} = I2; } class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; - bits<4> B1; - bits<20> D1; + bits<24> BD1; bits<8> I2; let Inst{47-40} = op{15-8}; let Inst{39-32} = I2; - let Inst{31-28} = B1; - let Inst{27-16} = D1{11-0}; - let Inst{15-8} = D1{19-12}; + let Inst{31-8} = BD1; let Inst{7-0} = op{7-0}; let Has20BitOffset = 1; @@ -432,23 +417,23 @@ class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls, dag src> - : InstRRE<opcode, (outs cls:$dst), (ins), - mnemonic#"\t$dst", - [(set cls:$dst, src)]> { + : InstRRE<opcode, (outs cls:$R1), (ins), + mnemonic#"\t$R1", + [(set cls:$R1, src)]> { let R2 = 0; } class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> - : InstRSY<opcode, (outs cls:$dst1, cls:$dst2), (ins bdaddr20only:$addr), - mnemonic#"\t$dst1, $dst2, $addr", []> { + : InstRSY<opcode, (outs cls:$R1, cls:$R3), (ins bdaddr20only:$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { let mayLoad = 1; } class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls> - : InstRIL<opcode, (outs), (ins cls:$src, pcrel32:$addr), - mnemonic#"\t$src, $addr", - [(operator cls:$src, pcrel32:$addr)]> { + : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, pcrel32:$I2)]> { let mayStore = 1; // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more @@ -458,17 +443,17 @@ class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdxaddr12only> - : InstRX<opcode, (outs), (ins cls:$src, mode:$addr), - mnemonic#"\t$src, $addr", - [(operator cls:$src, mode:$addr)]> { + : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, mode:$XBD2)]> { let mayStore = 1; } class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdxaddr20only> - : InstRXY<opcode, (outs), (ins cls:$src, mode:$addr), - mnemonic#"\t$src, $addr", - [(operator cls:$src, mode:$addr)]> { + : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, mode:$XBD2)]> { let mayStore = 1; } @@ -483,32 +468,32 @@ multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, } class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> - : InstRSY<opcode, (outs), (ins cls:$from, cls:$to, bdaddr20only:$addr), - mnemonic#"\t$from, $to, $addr", []> { + : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, bdaddr20only:$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { let mayStore = 1; } class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, Immediate imm, AddressingMode mode = bdaddr12only> - : InstSI<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator imm:$src, mode:$addr)]> { + : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, mode:$BD1)]> { let mayStore = 1; } class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, Immediate imm, AddressingMode mode = bdaddr20only> - : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator imm:$src, mode:$addr)]> { + : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, mode:$BD1)]> { let mayStore = 1; } class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, Immediate imm> - : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator imm:$src, bdaddr12only:$addr)]> { + : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, bdaddr12only:$BD1)]> { let mayStore = 1; } @@ -524,38 +509,38 @@ multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRR<opcode, (outs cls1:$dst), (ins cls2:$src), - mnemonic#"\t$dst, $src", - [(set cls1:$dst, (operator cls2:$src))]>; + : InstRR<opcode, (outs cls1:$R1), (ins cls2:$R2), + mnemonic#"\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2))]>; class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRRE<opcode, (outs cls1:$dst), (ins cls2:$src), - mnemonic#"\t$dst, $src", - [(set cls1:$dst, (operator cls2:$src))]>; + : InstRRE<opcode, (outs cls1:$R1), (ins cls2:$R2), + mnemonic#"\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2))]>; class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2> - : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src, uimm8zx4:$mode), - mnemonic#"\t$dst, $mode, $src", []>; + : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2), + mnemonic#"\t$R1, $R3, $R2", []>; class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRI<opcode, (outs cls:$dst), (ins imm:$src), - mnemonic#"\t$dst, $src", - [(set cls:$dst, (operator imm:$src))]>; + : InstRI<opcode, (outs cls:$R1), (ins imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator imm:$I2))]>; class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRIL<opcode, (outs cls:$dst), (ins imm:$src), - mnemonic#"\t$dst, $src", - [(set cls:$dst, (operator imm:$src))]>; + : InstRIL<opcode, (outs cls:$R1), (ins imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator imm:$I2))]>; class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls> - : InstRIL<opcode, (outs cls:$dst), (ins pcrel32:$addr), - mnemonic#"\t$dst, $addr", - [(set cls:$dst, (operator pcrel32:$addr))]> { + : InstRIL<opcode, (outs cls:$R1), (ins pcrel32:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator pcrel32:$I2))]> { let mayLoad = 1; // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more @@ -565,25 +550,25 @@ class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdxaddr12only> - : InstRX<opcode, (outs cls:$dst), (ins mode:$addr), - mnemonic#"\t$dst, $addr", - [(set cls:$dst, (operator mode:$addr))]> { + : InstRX<opcode, (outs cls:$R1), (ins mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator mode:$XBD2))]> { let mayLoad = 1; } class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls> - : InstRXE<opcode, (outs cls:$dst), (ins bdxaddr12only:$addr), - mnemonic#"\t$dst, $addr", - [(set cls:$dst, (operator bdxaddr12only:$addr))]> { + : InstRXE<opcode, (outs cls:$R1), (ins bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator bdxaddr12only:$XBD2))]> { let mayLoad = 1; } class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdxaddr20only> - : InstRXY<opcode, (outs cls:$dst), (ins mode:$addr), - mnemonic#"\t$dst, $addr", - [(set cls:$dst, (operator mode:$addr))]> { + : InstRXY<opcode, (outs cls:$R1), (ins mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator mode:$XBD2))]> { let mayLoad = 1; } @@ -599,83 +584,76 @@ multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRR<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2), - mnemonic#"\t$dst, $src2", - [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRR<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"\t$R1, $R2", + [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRRE<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2), - mnemonic#"\t$dst, $src2", - [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"\t$R1, $R2", + [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } -// Here the assembly and dag operands are in natural order, -// but the first input operand maps to R3 and the second to R2. -// This is used for "CPSDR R1, R3, R2", which is equivalent to -// R1 = copysign (R3, R2). -// -// Direct uses of the instruction must pass operands in encoding order -- -// R1, R2, R3 -- so they must pass the source operands in reverse order. -class BinaryRevRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls1, RegisterOperand cls2> - : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src2, cls1:$src1), - mnemonic#"\t$dst, $src1, $src2", - [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]>; +class BinaryRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R3, cls2:$R2), + mnemonic#"\t$R1, $R3, $R2", + [(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]>; class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRI<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, imm:$src2))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRIL<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, imm:$src2))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRIL<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, AddressingMode mode = bdxaddr12only> - : InstRX<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRX<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; } class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load> - : InstRXE<opcode, (outs cls:$dst), (ins cls:$src1, bdxaddr12only:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, - (load bdxaddr12only:$src2)))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRXE<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, + (load bdxaddr12only:$XBD2)))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; } class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, AddressingMode mode = bdxaddr20only> - : InstRXY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRXY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; } @@ -693,18 +671,18 @@ multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator, Operand imm, AddressingMode mode = bdaddr12only> - : InstSI<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> { + : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> { let mayLoad = 1; let mayStore = 1; } class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, Operand imm, AddressingMode mode = bdaddr20only> - : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> { + : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> { let mayLoad = 1; let mayStore = 1; } @@ -722,49 +700,49 @@ multiclass BinarySIPair<string mnemonic, bits<8> siOpcode, class ShiftRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode> - : InstRS<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, mode:$src2))]> { + : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2), + mnemonic#"\t$R1, $BD2", + [(set cls:$R1, (operator cls:$R1src, mode:$BD2))]> { let R3 = 0; - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } class ShiftRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode> - : InstRSY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), - mnemonic#"\t$dst, $src1, $src2", - [(set cls:$dst, (operator cls:$src1, mode:$src2))]>; + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator cls:$R3, mode:$BD2))]>; class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRR<opcode, (outs), (ins cls1:$src1, cls2:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls1:$src1, cls2:$src2)]>; + : InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"\t$R1, $R2", + [(operator cls1:$R1, cls2:$R2)]>; class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRRE<opcode, (outs), (ins cls1:$src1, cls2:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls1:$src1, cls2:$src2)]>; + : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"\t$R1, $R2", + [(operator cls1:$R1, cls2:$R2)]>; class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRI<opcode, (outs), (ins cls:$src1, imm:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, imm:$src2)]>; + : InstRI<opcode, (outs), (ins cls:$R1, imm:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, imm:$I2)]>; class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRIL<opcode, (outs), (ins cls:$src1, imm:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, imm:$src2)]>; + : InstRIL<opcode, (outs), (ins cls:$R1, imm:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, imm:$I2)]>; class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load> - : InstRIL<opcode, (outs), (ins cls:$src1, pcrel32:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, (load pcrel32:$src2))]> { + : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, (load pcrel32:$I2))]> { let mayLoad = 1; // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more @@ -775,26 +753,26 @@ class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, AddressingMode mode = bdxaddr12only> - : InstRX<opcode, (outs), (ins cls:$src1, mode:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, (load mode:$src2))]> { + : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, (load mode:$XBD2))]> { let mayLoad = 1; } class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load> - : InstRXE<opcode, (outs), (ins cls:$src1, bdxaddr12only:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, (load bdxaddr12only:$src2))]> { + : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, (load bdxaddr12only:$XBD2))]> { let mayLoad = 1; } class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, AddressingMode mode = bdxaddr20only> - : InstRXY<opcode, (outs), (ins cls:$src1, mode:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, (load mode:$src2))]> { + : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, (load mode:$XBD2))]> { let mayLoad = 1; } @@ -814,26 +792,26 @@ multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, SDPatternOperator load, Immediate imm, AddressingMode mode = bdaddr12only> - : InstSI<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator (load mode:$addr), imm:$src)]> { + : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator (load mode:$BD1), imm:$I2)]> { let mayLoad = 1; } class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, SDPatternOperator load, Immediate imm> - : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator (load bdaddr12only:$addr), imm:$src)]> { + : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator (load bdaddr12only:$BD1), imm:$I2)]> { let mayLoad = 1; } class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, SDPatternOperator load, Immediate imm, AddressingMode mode = bdaddr20only> - : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator (load mode:$addr), imm:$src)]> { + : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator (load mode:$BD1), imm:$I2)]> { let mayLoad = 1; } @@ -851,43 +829,43 @@ multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, class TernaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls> - : InstRRD<opcode, (outs cls:$dst), (ins cls:$src1, cls:$src2, cls:$src3), - mnemonic#"\t$dst, $src2, $src3", - [(set cls:$dst, (operator cls:$src1, cls:$src2, cls:$src3))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2), + mnemonic#"\t$R1, $R3, $R2", + [(set cls:$R1, (operator cls:$R1src, cls:$R3, cls:$R2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load> - : InstRXF<opcode, (outs cls:$dst), - (ins cls:$src1, cls:$src2, bdxaddr12only:$src3), - mnemonic#"\t$dst, $src2, $src3", - [(set cls:$dst, (operator cls:$src1, cls:$src2, - (load bdxaddr12only:$src3)))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRXF<opcode, (outs cls:$R1), + (ins cls:$R1src, cls:$R3, bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $R3, $XBD2", + [(set cls:$R1, (operator cls:$R1src, cls:$R3, + (load bdxaddr12only:$XBD2)))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; } class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdaddr12only> - : InstRS<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr), - mnemonic#"\t$dst, $new, $ptr", - [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> { - let Constraints = "$old = $dst"; - let DisableEncoding = "$old"; + : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; let mayStore = 1; } class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdaddr20only> - : InstRSY<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr), - mnemonic#"\t$dst, $new, $ptr", - [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> { - let Constraints = "$old = $dst"; - let DisableEncoding = "$old"; + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; let mayStore = 1; } @@ -904,12 +882,12 @@ multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2> - : InstRIEf<opcode, (outs cls1:$dst), - (ins cls1:$src1, cls2:$src2, - uimm8zx6:$imm1, uimm8zx6:$imm2, uimm8zx6:$imm3), - mnemonic#"\t$dst, $src2, $imm1, $imm2, $imm3", []> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRIEf<opcode, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, + uimm8zx6:$I3, uimm8zx6:$I4, uimm8zx6:$I5), + mnemonic#"\t$R1, $R2, $I3, $I4, $I5", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 7ffa382d36..903fb740a4 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -42,20 +42,19 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, // Unconditional branches. R1 is the condition-code mask (all 1s). let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in { let isIndirectBranch = 1 in - def BR : InstRR<0x07, (outs), (ins ADDR64:$dst), - "br\t$dst", [(brind ADDR64:$dst)]>; + def BR : InstRR<0x07, (outs), (ins ADDR64:$R2), + "br\t$R2", [(brind ADDR64:$R2)]>; // An assembler extended mnemonic for BRC. Use a separate instruction for // the asm parser, so that we don't relax Js to external symbols into JGs. let isCodeGenOnly = 1 in - def J : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>; - let isAsmParserOnly = 1 in - def AsmJ : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>; + def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2", []>; + def AsmJ : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2", []>; // An assembler extended mnemonic for BRCL. (The extension is "G" // rather than "L" because "JL" is "Jump if Less".) - def JG : InstRIL<0xC04, (outs), (ins brtarget32:$dst), - "jg\t$dst", [(br bb:$dst)]>; + def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), + "jg\t$I2", [(br bb:$I2)]>; } // Conditional branches. It's easier for LLVM to handle these branches @@ -64,42 +63,39 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in { // JE and JLH when writing out the assembly though. multiclass CondBranches<Operand imm, string short, string long> { let isBranch = 1, isTerminator = 1, Uses = [PSW] in { - def "" : InstRI<0xA74, (outs), (ins imm:$cond, brtarget16:$dst), short, []>; - def L : InstRIL<0xC04, (outs), (ins imm:$cond, brtarget32:$dst), long, []>; + def "" : InstRI<0xA74, (outs), (ins imm:$R1, brtarget16:$I2), short, []>; + def L : InstRIL<0xC04, (outs), (ins imm:$R1, brtarget32:$I2), long, []>; } } let isCodeGenOnly = 1 in - defm BRC : CondBranches<cond4, "j$cond\t$dst", "jg$cond\t$dst">; -let isAsmParserOnly = 1 in - defm AsmBRC : CondBranches<uimm8zx4, "brc\t$cond, $dst", "brcl\t$cond, $dst">; + defm BRC : CondBranches<cond4, "j$R1\t$I2", "jg$R1\t$I2">; +defm AsmBRC : CondBranches<uimm8zx4, "brc\t$R1, $I2", "brcl\t$R1, $I2">; def : Pat<(z_br_ccmask cond4:$cond, bb:$dst), (BRCL cond4:$cond, bb:$dst)>; // Define AsmParser mnemonics for each condition code. multiclass CondExtendedMnemonic<bits<4> Cond, string name> { let R1 = Cond in { - def "" : InstRI<0xA74, (outs), (ins brtarget16:$dst), - "j"##name##"\t$dst", []>; - def L : InstRIL<0xC04, (outs), (ins brtarget32:$dst), - "jg"##name##"\t$dst", []>; + def "" : InstRI<0xA74, (outs), (ins brtarget16:$I2), + "j"##name##"\t$I2", []>; + def L : InstRIL<0xC04, (outs), (ins brtarget32:$I2), + "jg"##name##"\t$I2", []>; } } -let isAsmParserOnly = 1 in { - defm AsmJO : CondExtendedMnemonic<1, "o">; - defm AsmJH : CondExtendedMnemonic<2, "h">; - defm AsmJNLE : CondExtendedMnemonic<3, "nle">; - defm AsmJL : CondExtendedMnemonic<4, "l">; - defm AsmJNHE : CondExtendedMnemonic<5, "nhe">; - defm AsmJLH : CondExtendedMnemonic<6, "lh">; - defm AsmJNE : CondExtendedMnemonic<7, "ne">; - defm AsmJE : CondExtendedMnemonic<8, "e">; - defm AsmJNLH : CondExtendedMnemonic<9, "nlh">; - defm AsmJHE : CondExtendedMnemonic<10, "he">; - defm AsmJNL : CondExtendedMnemonic<11, "nl">; - defm AsmJLE : CondExtendedMnemonic<12, "le">; - defm AsmJNH : CondExtendedMnemonic<13, "nh">; - defm AsmJNO : CondExtendedMnemonic<14, "no">; -} +defm AsmJO : CondExtendedMnemonic<1, "o">; +defm AsmJH : CondExtendedMnemonic<2, "h">; +defm AsmJNLE : CondExtendedMnemonic<3, "nle">; +defm AsmJL : CondExtendedMnemonic<4, "l">; +defm AsmJNHE : CondExtendedMnemonic<5, "nhe">; +defm AsmJLH : CondExtendedMnemonic<6, "lh">; +defm AsmJNE : CondExtendedMnemonic<7, "ne">; +defm AsmJE : CondExtendedMnemonic<8, "e">; +defm AsmJNLH : CondExtendedMnemonic<9, "nlh">; +defm AsmJHE : CondExtendedMnemonic<10, "he">; +defm AsmJNL : CondExtendedMnemonic<11, "nl">; +defm AsmJLE : CondExtendedMnemonic<12, "le">; +defm AsmJNH : CondExtendedMnemonic<13, "nh">; +defm AsmJNO : CondExtendedMnemonic<14, "no">; def Select32 : SelectWrapper<GR32>; def Select64 : SelectWrapper<GR64>; @@ -112,24 +108,22 @@ def Select64 : SelectWrapper<GR64>; let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D, F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D], R1 = 14, isCodeGenOnly = 1 in { - def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$dst, variable_ops), - "bras\t%r14, $dst", []>; - def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$dst, variable_ops), - "brasl\t%r14, $dst", [(z_call pcrel32call:$dst)]>; - def BASR : InstRR<0x0D, (outs), (ins ADDR64:$dst, variable_ops), - "basr\t%r14, $dst", [(z_call ADDR64:$dst)]>; + def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$I2, variable_ops), + "bras\t%r14, $I2", []>; + def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$I2, variable_ops), + "brasl\t%r14, $I2", [(z_call pcrel32call:$I2)]>; + def BASR : InstRR<0x0D, (outs), (ins ADDR64:$R2, variable_ops), + "basr\t%r14, $R2", [(z_call ADDR64:$R2)]>; } // Define the general form of the call instructions for the asm parser. // These instructions don't hard-code %r14 as the return address register. -let isAsmParserOnly = 1 in { - def AsmBRAS : InstRI<0xA75, (outs), (ins GR64:$save, brtarget16:$dst), - "bras\t$save, $dst", []>; - def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$save, brtarget32:$dst), - "brasl\t$save, $dst", []>; - def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$save, ADDR64:$dst), - "basr\t$save, $dst", []>; -} +def AsmBRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2), + "bras\t$R1, $I2", []>; +def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2), + "brasl\t$R1, $I2", []>; +def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2), + "basr\t$R1, $R2", []>; //===----------------------------------------------------------------------===// // Move instructions @@ -337,21 +331,21 @@ def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap>, GR64>; // Load BDX-style addresses. let neverHasSideEffects = 1, Function = "la" in { let PairType = "12" in - def LA : InstRX<0x41, (outs GR64:$dst), (ins laaddr12pair:$src), - "la\t$dst, $src", - [(set GR64:$dst, laaddr12pair:$src)]>; + def LA : InstRX<0x41, (outs GR64:$R1), (ins laaddr12pair:$XBD2), + "la\t$R1, $XBD2", + [(set GR64:$R1, laaddr12pair:$XBD2)]>; let PairType = "20" in - def LAY : InstRXY<0xE371, (outs GR64:$dst), (ins laaddr20pair:$src), - "lay\t$dst, $src", - [(set GR64:$dst, laaddr20pair:$src)]>; + def LAY : InstRXY<0xE371, (outs GR64:$R1), (ins laaddr20pair:$XBD2), + "lay\t$R1, $XBD2", + [(set GR64:$R1, laaddr20pair:$XBD2)]>; } // Load a PC-relative address. There's no version of this instruction // with a 16-bit offset, so there's no relaxation. let neverHasSideEffects = 1 in { - def LARL : InstRIL<0xC00, (outs GR64:$dst), (ins pcrel32:$src), - "larl\t$dst, $src", - [(set GR64:$dst, pcrel32:$src)]>; + def LARL : InstRIL<0xC00, (outs GR64:$R1), (ins pcrel32:$I2), + "larl\t$R1, $I2", + [(set GR64:$R1, pcrel32:$I2)]>; } //===----------------------------------------------------------------------===// @@ -484,6 +478,7 @@ let Defs = [PSW] in { def SGR : BinaryRRE<"sgr", 0xB909, sub, GR64, GR64>; // Subtraction of memory. + defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, sextloadi16>; defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load>; def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32>; def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load>; @@ -903,9 +898,9 @@ let Defs = [PSW] in { // Read a 32-bit access register into a GR32. As with all GR32 operations, // the upper 32 bits of the enclosing GR64 remain unchanged, which is useful // when a 64-bit address is stored in a pair of access registers. -def EAR : InstRRE<0xB24F, (outs GR32:$dst), (ins access_reg:$src), - "ear\t$dst, $src", - [(set GR32:$dst, (z_extract_access access_reg:$src))]>; +def EAR : InstRRE<0xB24F, (outs GR32:$R1), (ins access_reg:$R2), + "ear\t$R1, $R2", + [(set GR32:$R1, (z_extract_access access_reg:$R2))]>; // Find leftmost one, AKA count leading zeros. The instruction actually // returns a pair of GR64s, the first giving the number of leading zeros diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index 0abc3f7517..66d9c5fceb 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -24,14 +24,30 @@ class ImmediateAsmOperand<string name> class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop> : PatLeaf<(vt imm), pred, xform>, Operand<vt> { let PrintMethod = "print"##asmop##"Operand"; + let DecoderMethod = "decode"##asmop##"Operand"; let ParserMatchClass = !cast<AsmOperandClass>(asmop); } +// Constructs an asm operand for a PC-relative address. SIZE says how +// many bits there are. +class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"##size> { + let PredicateMethod = "isImm"; + let ParserMethod = "parsePCRel"##size; +} + +// Constructs an operand for a PC-relative address with address type VT. +// ASMOP is the associated asm operand. +class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> { + let PrintMethod = "printPCRelOperand"; + let ParserMatchClass = asmop; +} + // Constructs both a DAG pattern and instruction operand for a PC-relative -// address with address size VT. SELF is the name of the operand. -class PCRelAddress<ValueType vt, string self> +// address with address size VT. SELF is the name of the operand and +// ASMOP is the associated asm operand. +class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop> : ComplexPattern<vt, 1, "selectPCRelAddress", [z_pcrel_wrapper]>, - Operand<vt> { + PCRelOperand<vt, asmop> { let MIOperandInfo = (ops !cast<Operand>(self)); } @@ -45,11 +61,14 @@ class AddressAsmOperand<string format, string bitsize, string dispsize> } // Constructs both a DAG pattern and instruction operand for an addressing mode. -// The mode is selected by custom code in selectTYPE...SUFFIX(). The address -// registers have BITSIZE bits and displacements have DISPSIZE bits. NUMOPS is -// the number of operands that make up an address and OPERANDS lists the types -// of those operands using (ops ...). FORMAT is the type of addressing mode, -// which needs to match the names used in AddressAsmOperand. +// The mode is selected by custom code in select<TYPE><DISPSIZE><SUFFIX>(), +// encoded by custom code in get<FORMAT><DISPSIZE>Encoding() and decoded +// by custom code in decode<TYPE><BITSIZE>Disp<DISPSIZE>Operand(). +// The address registers have BITSIZE bits and displacements have +// DISPSIZE bits. NUMOPS is the number of operands that make up an +// address and OPERANDS lists the types of those operands using (ops ...). +// FORMAT is the type of addressing mode, which needs to match the names +// used in AddressAsmOperand. class AddressingMode<string type, string bitsize, string dispsize, string suffix, int numops, string format, dag operands> : ComplexPattern<!cast<ValueType>("i"##bitsize), numops, @@ -57,6 +76,8 @@ class AddressingMode<string type, string bitsize, string dispsize, [add, sub, or, frameindex, z_adjdynalloc]>, Operand<!cast<ValueType>("i"##bitsize)> { let PrintMethod = "print"##format##"Operand"; + let EncoderMethod = "get"##format##dispsize##"Encoding"; + let DecoderMethod = "decode"##format##bitsize##"Disp"##dispsize##"Operand"; let MIOperandInfo = operands; let ParserMatchClass = !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize); @@ -334,30 +355,39 @@ def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>; // Symbolic address operands //===----------------------------------------------------------------------===// +// PC-relative asm operands. +def PCRel16 : PCRelAsmOperand<"16">; +def PCRel32 : PCRelAsmOperand<"32">; + // PC-relative offsets of a basic block. The offset is sign-extended // and multiplied by 2. -def brtarget16 : Operand<OtherVT> { +def brtarget16 : PCRelOperand<OtherVT, PCRel16> { let EncoderMethod = "getPC16DBLEncoding"; + let DecoderMethod = "decodePC16DBLOperand"; } -def brtarget32 : Operand<OtherVT> { +def brtarget32 : PCRelOperand<OtherVT, PCRel32> { let EncoderMethod = "getPC32DBLEncoding"; + let DecoderMethod = "decodePC32DBLOperand"; } // A PC-relative offset of a global value. The offset is sign-extended // and multiplied by 2. -def pcrel32 : PCRelAddress<i64, "pcrel32"> { +def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> { let EncoderMethod = "getPC32DBLEncoding"; + let DecoderMethod = "decodePC32DBLOperand"; } // A PC-relative offset of a global value when the value is used as a // call target. The offset is sign-extended and multiplied by 2. -def pcrel16call : PCRelAddress<i64, "pcrel16call"> { +def pcrel16call : PCRelAddress<i64, "pcrel16call", PCRel16> { let PrintMethod = "printCallOperand"; let EncoderMethod = "getPLT16DBLEncoding"; + let DecoderMethod = "decodePC16DBLOperand"; } -def pcrel32call : PCRelAddress<i64, "pcrel32call"> { +def pcrel32call : PCRelAddress<i64, "pcrel32call", PCRel32> { let PrintMethod = "printCallOperand"; let EncoderMethod = "getPLT32DBLEncoding"; + let DecoderMethod = "decodePC32DBLOperand"; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 8c4c456ef5..17450ee53e 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -33,6 +33,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT, "-f32:32-f64:64-f128:64-a0:8:16-n32:64"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this, Subtarget) { + initAsmInfo(); } namespace { diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 019a670083..263eb5ed9c 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1196,6 +1196,7 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites, } } assert (Found && "Unable to rewrite ImmDisp."); + (void)Found; } else { // We have a symbolic and an immediate displacement, but no displacement // before the bracketed expression. Put the immediate displacement diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 226ebca8cb..d5aab8e0a2 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -263,7 +263,7 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) { return X; } -static MCAsmInfo *createX86MCAsmInfo(StringRef TT) { +static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); bool is64Bit = TheTriple.getArch() == Triple::x86_64; @@ -290,14 +290,16 @@ static MCAsmInfo *createX86MCAsmInfo(StringRef TT) { int stackGrowth = is64Bit ? -8 : -4; // Initial state of the frame pointer is esp+stackGrowth. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(is64Bit ? X86::RSP : X86::ESP, stackGrowth); - MAI->addInitialFrameState(0, Dst, Src); + unsigned StackPtr = is64Bit ? X86::RSP : X86::ESP; + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa( + 0, MRI.getDwarfRegNum(StackPtr, true), -stackGrowth); + MAI->addInitialFrameState(Inst); // Add return address to move list - MachineLocation CSDst(is64Bit ? X86::RSP : X86::ESP, stackGrowth); - MachineLocation CSSrc(is64Bit ? X86::RIP : X86::EIP); - MAI->addInitialFrameState(0, CSDst, CSSrc); + unsigned InstPtr = is64Bit ? X86::RIP : X86::EIP; + MCCFIInstruction Inst2 = MCCFIInstruction::createOffset( + 0, MRI.getDwarfRegNum(InstPtr, true), stackGrowth); + MAI->addInitialFrameState(Inst2); return MAI; } diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 49721df7c1..07314a092c 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -357,21 +357,21 @@ defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw, defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b, MMX_INTALU_ITINS>; defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS>; defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS>; defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q, - MMX_INTALUQ_ITINS, 1>; + MMX_INTALUQ_ITINS>; defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS>; defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS>; defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS>; defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS>; defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w, MMX_PHADDSUBW>; diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 00fa47f80b..0422a61fb8 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -49,6 +49,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, TLInfo(*this), TSInfo(*this), JITInfo(*this) { + initAsmInfo(); } void X86_64TargetMachine::anchor() { } @@ -69,6 +70,7 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, TLInfo(*this), TSInfo(*this), JITInfo(*this) { + initAsmInfo(); } /// X86TargetMachine ctor - Create an X86 target. diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index e38da34a81..10bb6dfa92 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -51,13 +51,13 @@ static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -static MCAsmInfo *createXCoreMCAsmInfo(StringRef TT) { +static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI, + StringRef TT) { MCAsmInfo *MAI = new XCoreMCAsmInfo(TT); // Initial state of the frame pointer is SP. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(XCore::SP, 0); - MAI->addInitialFrameState(0, Dst, Src); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, XCore::SP, 0); + MAI->addInitialFrameState(Inst); return MAI; } diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 07e5fff141..3ef1520c71 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -33,6 +33,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this) { + initAsmInfo(); } namespace { diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 0ef900e2b9..4a9cb27b03 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -3323,8 +3323,6 @@ bool GlobalOpt::runOnModule(Module &M) { // Try to find the llvm.globalctors list. GlobalVariable *GlobalCtors = FindGlobalCtors(M); - Function *CXAAtExitFn = FindCXAAtExit(M, TLI); - bool LocalChange = true; while (LocalChange) { LocalChange = false; @@ -3342,7 +3340,9 @@ bool GlobalOpt::runOnModule(Module &M) { // Resolve aliases, when possible. LocalChange |= OptimizeGlobalAliases(M); - // Try to remove trivial global destructors. + // Try to remove trivial global destructors if they are not removed + // already. + Function *CXAAtExitFn = FindCXAAtExit(M, TLI); if (CXAAtExitFn) LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 87d56214a3..51ca29bc07 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -846,7 +846,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { /// FP value and: /// 1) 1/C is exact, or /// 2) reciprocal is allowed. -/// If the convertion was successful, the simplified expression "X * 1/C" is +/// If the conversion was successful, the simplified expression "X * 1/C" is /// returned; otherwise, NULL is returned. /// static Instruction *CvtFDivConstToReciprocal(Value *Dividend, diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 43e2e20035..4bf25facc6 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -107,6 +107,12 @@ namespace { return std::make_pair(Vector.begin() + Pair.first->second, false); } + iterator find(const KeyT &Key) { + typename MapTy::iterator It = Map.find(Key); + if (It == Map.end()) return Vector.end(); + return Vector.begin() + It->second; + } + const_iterator find(const KeyT &Key) const { typename MapTy::const_iterator It = Map.find(Key); if (It == Map.end()) return Vector.end(); @@ -253,6 +259,40 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { return false; } +/// This is a wrapper around getUnderlyingObjCPtr along the lines of +/// GetUnderlyingObjects except that it returns early when it sees the first +/// alloca. +static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) { + SmallPtrSet<const Value *, 4> Visited; + SmallVector<const Value *, 4> Worklist; + Worklist.push_back(V); + do { + const Value *P = Worklist.pop_back_val(); + P = GetUnderlyingObjCPtr(P); + + if (isa<AllocaInst>(P)) + return true; + + if (!Visited.insert(P)) + continue; + + if (const SelectInst *SI = dyn_cast<const SelectInst>(P)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + if (const PHINode *PN = dyn_cast<const PHINode>(P)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + Worklist.push_back(PN->getIncomingValue(i)); + continue; + } + } while (!Worklist.empty()); + + return false; +} + + /// @} /// /// \defgroup ARCOpt ARC Optimization. @@ -300,18 +340,18 @@ STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); STATISTIC(NumRets, "Number of return value forwarding " - "retain+autoreleaes eliminated"); + "retain+autoreleases eliminated"); STATISTIC(NumRRs, "Number of retain+release paths eliminated"); STATISTIC(NumPeeps, "Number of calls peephole-optimized"); +#ifndef NDEBUG STATISTIC(NumRetainsBeforeOpt, - "Number of retains before optimization."); + "Number of retains before optimization"); STATISTIC(NumReleasesBeforeOpt, - "Number of releases before optimization."); -#ifndef NDEBUG + "Number of releases before optimization"); STATISTIC(NumRetainsAfterOpt, - "Number of retains after optimization."); + "Number of retains after optimization"); STATISTIC(NumReleasesAfterOpt, - "Number of releases after optimization."); + "Number of releases after optimization"); #endif namespace { @@ -414,8 +454,18 @@ namespace { /// sequence. SmallPtrSet<Instruction *, 2> ReverseInsertPts; + /// Does this pointer have multiple owners? + /// + /// In the presence of multiple owners with the same provenance caused by + /// allocas, we can not assume that the frontend will emit balanced code + /// since it could put the release on the pointer loaded from the + /// alloca. This confuses the optimizer so we must be more conservative in + /// that case. + bool MultipleOwners; + RRInfo() : - KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {} + KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0), + MultipleOwners(false) {} void clear(); @@ -428,6 +478,7 @@ namespace { void RRInfo::clear() { KnownSafe = false; IsTailCallRelease = false; + MultipleOwners = false; ReleaseMetadata = 0; Calls.clear(); ReverseInsertPts.clear(); @@ -457,10 +508,12 @@ namespace { Seq(S_None) {} void SetKnownPositiveRefCount() { + DEBUG(dbgs() << "Setting Known Positive.\n"); KnownPositiveRefCount = true; } void ClearKnownPositiveRefCount() { + DEBUG(dbgs() << "Clearing Known Positive.\n"); KnownPositiveRefCount = false; } @@ -516,6 +569,7 @@ PtrState::Merge(const PtrState &Other, bool TopDown) { RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease; RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); + RRI.MultipleOwners |= Other.RRI.MultipleOwners; // Merge the insert point sets. If there are any differences, // that makes this a partial merge. @@ -587,14 +641,26 @@ namespace { /// definition. void SetAsExit() { BottomUpPathCount = 1; } + /// Attempt to find the PtrState object describing the top down state for + /// pointer Arg. Return a new initialized PtrState describing the top down + /// state for Arg if we do not find one. PtrState &getPtrTopDownState(const Value *Arg) { return PerPtrTopDown[Arg]; } + /// Attempt to find the PtrState object describing the bottom up state for + /// pointer Arg. Return a new initialized PtrState describing the bottom up + /// state for Arg if we do not find one. PtrState &getPtrBottomUpState(const Value *Arg) { return PerPtrBottomUp[Arg]; } + /// Attempt to find the PtrState object describing the bottom up state for + /// pointer Arg. + ptr_iterator findPtrBottomUpState(const Value *Arg) { + return PerPtrBottomUp.find(Arg); + } + void clearBottomUpPointers() { PerPtrBottomUp.clear(); } @@ -1440,11 +1506,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_RetainBlock: // If we strength reduce an objc_retainBlock to an objc_retain, continue // onto the objc_retain peephole optimizations. Otherwise break. - if (!OptimizeRetainBlockCall(F, Inst, Class)) - break; - // FALLTHROUGH - case IC_Retain: - ++NumRetainsBeforeOpt; + OptimizeRetainBlockCall(F, Inst, Class); break; case IC_RetainRV: if (OptimizeRetainRVCall(F, Inst)) @@ -1453,9 +1515,6 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_AutoreleaseRV: OptimizeAutoreleaseRVCall(F, Inst, Class); break; - case IC_Release: - ++NumReleasesBeforeOpt; - break; } // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. @@ -1866,6 +1925,28 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, case IC_None: // These are irrelevant. return NestingDetected; + case IC_User: + // If we have a store into an alloca of a pointer we are tracking, the + // pointer has multiple owners implying that we must be more conservative. + // + // This comes up in the context of a pointer being ``KnownSafe''. In the + // presense of a block being initialized, the frontend will emit the + // objc_retain on the original pointer and the release on the pointer loaded + // from the alloca. The optimizer will through the provenance analysis + // realize that the two are related, but since we only require KnownSafe in + // one direction, will match the inner retain on the original pointer with + // the guard release on the original pointer. This is fixed by ensuring that + // in the presense of allocas we only unconditionally remove pointers if + // both our retain and our release are KnownSafe. + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand())) { + BBState::ptr_iterator I = MyStates.findPtrBottomUpState( + StripPointerCastsAndObjCCalls(SI->getValueOperand())); + if (I != MyStates.bottom_up_ptr_end()) + I->second.RRI.MultipleOwners = true; + } + } + break; default: break; } @@ -2412,8 +2493,10 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> bool KnownSafe, bool &AnyPairsCompletelyEliminated) { // If a pair happens in a region where it is known that the reference count - // is already incremented, we can similarly ignore possible decrements. + // is already incremented, we can similarly ignore possible decrements unless + // we are dealing with a retainable object with multiple provenance sources. bool KnownSafeTD = true, KnownSafeBU = true; + bool MultipleOwners = false; // Connect the dots between the top-down-collected RetainsToMove and // bottom-up-collected ReleasesToMove to form sets of related calls. @@ -2432,6 +2515,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> assert(It != Retains.end()); const RRInfo &NewRetainRRI = It->second; KnownSafeTD &= NewRetainRRI.KnownSafe; + MultipleOwners |= NewRetainRRI.MultipleOwners; for (SmallPtrSet<Instruction *, 2>::const_iterator LI = NewRetainRRI.Calls.begin(), LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { @@ -2525,9 +2609,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> if (NewRetains.empty()) break; } - // If the pointer is known incremented or nested, we can safely delete the - // pair regardless of what's between them. - if (KnownSafeTD || KnownSafeBU) { + // If the pointer is known incremented in 1 direction and we do not have + // MultipleOwners, we can safely remove the retain/releases. Otherwise we need + // to be known safe in both directions. + bool UnconditionallySafe = (KnownSafeTD && KnownSafeBU) || + ((KnownSafeTD || KnownSafeBU) && !MultipleOwners); + if (UnconditionallySafe) { RetainsToMove.ReverseInsertPts.clear(); ReleasesToMove.ReverseInsertPts.clear(); NewCount = 0; @@ -3050,6 +3137,12 @@ bool ObjCARCOpt::runOnFunction(Function &F) { PA.setAA(&getAnalysis<AliasAnalysis>()); +#ifndef NDEBUG + if (AreStatisticsEnabled()) { + GatherStatistics(F, false); + } +#endif + // This pass performs several distinct transformations. As a compile-time aid // when compiling code that isn't ObjC, skip these if the relevant ObjC // library functions aren't declared. diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 0dd6abb1ae..58a1a74655 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -318,6 +318,93 @@ private: ValueMap WidenMap; }; +/// \brief Check if conditionally executed loads are hoistable. +/// +/// This class has two functions: isHoistableLoad and canHoistAllLoads. +/// isHoistableLoad should be called on all load instructions that are executed +/// conditionally. After all conditional loads are processed, the client should +/// call canHoistAllLoads to determine if all of the conditional executed loads +/// have an unconditional memory access to the same memory address in the loop. +class LoadHoisting { + typedef SmallPtrSet<Value *, 8> MemorySet; + + Loop *TheLoop; + DominatorTree *DT; + MemorySet CondLoadAddrSet; + +public: + LoadHoisting(Loop *L, DominatorTree *D) : TheLoop(L), DT(D) {} + + /// \brief Check if the instruction is a load with a identifiable address. + bool isHoistableLoad(Instruction *L); + + /// \brief Check if all of the conditional loads are hoistable because there + /// exists an unconditional memory access to the same address in the loop. + bool canHoistAllLoads(); +}; + +bool LoadHoisting::isHoistableLoad(Instruction *L) { + LoadInst *LI = dyn_cast<LoadInst>(L); + if (!LI) + return false; + + CondLoadAddrSet.insert(LI->getPointerOperand()); + return true; +} + +static void addMemAccesses(BasicBlock *BB, SmallPtrSet<Value *, 8> &Set) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { + Instruction *I = &*BI; + Value *Addr = 0; + + // Try a load. + LoadInst *LI = dyn_cast<LoadInst>(I); + if (LI) { + Addr = LI->getPointerOperand(); + Set.insert(Addr); + continue; + } + + // Try a store. + StoreInst *SI = dyn_cast<StoreInst>(I); + if (!SI) + continue; + + Addr = SI->getPointerOperand(); + Set.insert(Addr); + } +} + +bool LoadHoisting::canHoistAllLoads() { + // No conditional loads. + if (CondLoadAddrSet.empty()) + return true; + + MemorySet UncondMemAccesses; + std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector(); + BasicBlock *LoopLatch = TheLoop->getLoopLatch(); + + // Iterate over the unconditional blocks and collect memory access addresses. + for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) { + BasicBlock *BB = LoopBlocks[i]; + + // Ignore conditional blocks. + if (BB != LoopLatch && !DT->dominates(BB, LoopLatch)) + continue; + + addMemAccesses(BB, UncondMemAccesses); + } + + // And make sure there is a matching unconditional access for every + // conditional load. + for (MemorySet::iterator MI = CondLoadAddrSet.begin(), + ME = CondLoadAddrSet.end(); MI != ME; ++MI) + if (!UncondMemAccesses.count(*MI)) + return false; + + return true; +} + /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and /// to what vectorization factor. /// This class does not look at the profitability of vectorization, only the @@ -337,7 +424,8 @@ public: DominatorTree *DT, TargetTransformInfo* TTI, AliasAnalysis *AA, TargetLibraryInfo *TLI) : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI), - Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false) {} + Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false), + LoadSpeculation(L, DT) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -598,6 +686,9 @@ private: RuntimePointerCheck PtrRtCheck; /// Can we assume the absence of NaNs. bool HasFunNoNaNAttr; + + /// Utility to determine whether loads can be speculated. + LoadHoisting LoadSpeculation; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -1389,9 +1480,10 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { case LoopVectorizationLegality::IK_NoInduction: llvm_unreachable("Unknown induction"); case LoopVectorizationLegality::IK_IntInduction: { - // Handle the integer induction counter: + // Handle the integer induction counter. assert(OrigPhi->getType()->isIntegerTy() && "Invalid type"); - assert(OrigPhi == OldInduction && "Unknown integer PHI"); + + // We have the canonical induction variable. if (OrigPhi == OldInduction) { // Create a truncated version of the resume value for the scalar loop, // we might have promoted the type to a larger width. @@ -1402,11 +1494,20 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); TruncResumeVal->addIncoming(EndValue, VecBody); + + // We know what the end value is. + EndValue = IdxEndRoundDown; + // We also know which PHI node holds it. + ResumeIndex = ResumeVal; + break; } - // We know what the end value is. - EndValue = IdxEndRoundDown; - // We also know which PHI node holds it. - ResumeIndex = ResumeVal; + + // Not the canonical induction variable - add the vector loop count to the + // start value. + Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown, + II.StartValue->getType(), + "cast.crd"); + EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end"); break; } case LoopVectorizationLegality::IK_ReverseIntInduction: { @@ -2056,12 +2157,25 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, case LoopVectorizationLegality::IK_NoInduction: llvm_unreachable("Unknown induction"); case LoopVectorizationLegality::IK_IntInduction: { - assert(P == OldInduction && "Unexpected PHI"); - // We might have had to extend the type. - Value *Trunc = Builder.CreateTrunc(Induction, P->getType()); - Value *Broadcasted = getBroadcastInstrs(Trunc); - // After broadcasting the induction variable we need to make the - // vector consecutive by adding 0, 1, 2 ... + assert(P->getType() == II.StartValue->getType() && "Types must match"); + Type *PhiTy = P->getType(); + Value *Broadcasted; + if (P == OldInduction) { + // Handle the canonical induction variable. We might have had to + // extend the type. + Broadcasted = Builder.CreateTrunc(Induction, PhiTy); + } else { + // Handle other induction variables that are now based on the + // canonical one. + Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx, + "normalized.idx"); + NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy); + Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx, + "offset.idx"); + } + Broadcasted = getBroadcastInstrs(Broadcasted); + // After broadcasting the induction variable we need to make the vector + // consecutive by adding 0, 1, 2, etc. for (unsigned part = 0; part < UF; ++part) Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false); continue; @@ -2466,11 +2580,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Int inductions are special because we only allow one IV. if (IK == IK_IntInduction) { - if (Induction) { - DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n"); - return false; - } - Induction = Phi; + // Use the phi node with the widest type as induction. Use the last + // one if there are multiple (no good reason for doing this other + // than it is expedient). + if (!Induction || PhiTy == WidestIndTy) + Induction = Phi; } DEBUG(dbgs() << "LV: Found an induction variable.\n"); @@ -3236,8 +3350,12 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) { for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - // We don't predicate loads/stores at the moment. - if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow()) + // We might be able to hoist the load. + if (it->mayReadFromMemory() && !LoadSpeculation.isHoistableLoad(it)) + return false; + + // We don't predicate stores at the moment. + if (it->mayWriteToMemory() || it->mayThrow()) return false; // The instructions below can trap. @@ -3251,6 +3369,10 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) { } } + // Check that we can actually speculate the hoistable loads. + if (!LoadSpeculation.canHoistAllLoads()) + return false; + return true; } diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp index 55adf8a816..50d2af0f65 100644 --- a/lib/Transforms/Vectorize/VecUtils.cpp +++ b/lib/Transforms/Vectorize/VecUtils.cpp @@ -282,6 +282,7 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) { DEBUG(dbgs()<<"SLP: Adding to MustExtract " "because of a safe out of tree usage.\n"); MustExtract.insert(*it); + continue; } if (Lane == -1) Lane = LaneMap[*I]; if (Lane != LaneMap[*I]) { @@ -610,6 +611,9 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) { GatherInstructions.push_back(Vec); } + for (unsigned i = 0; i < Ty->getNumElements(); ++i) + VectorizedValues[VL[i]] = Vec; + return Vec; } @@ -617,6 +621,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) { Value *V = vectorizeTree_rec(VL, VF); Instruction *LastInstr = GetLastInstr(VL, VL.size()); + int LastInstrIdx = InstrIdx[LastInstr]; IRBuilder<> Builder(LastInstr); for (ValueSet::iterator it = MustExtract.begin(), e = MustExtract.end(); it != e; ++it) { @@ -625,7 +630,16 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) { assert(LaneMap.count(I) && "Unable to find the lane for the external use"); Value *Idx = Builder.getInt32(LaneMap[I]); Value *Extract = Builder.CreateExtractElement(Vec, Idx); - I->replaceAllUsesWith(Extract); + bool Replaced = false; + for (Value::use_iterator U = I->use_begin(), UE = U->use_end(); U != UE; + ++U) { + Instruction *UI = cast<Instruction>(*U); + if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx) + UI->replaceUsesOfWith(I ,Extract); + Replaced = true; + } + assert(Replaced && "Must replace at least one outside user"); + (void)Replaced; } // We moved some instructions around. We have to number them again @@ -633,6 +647,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) { numberInstructions(); MustScalarize.clear(); MustExtract.clear(); + VectorizedValues.clear(); return V; } @@ -690,7 +705,10 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) { IRBuilder<> Builder(GetLastInstr(VL, VF)); CastInst *CI = dyn_cast<CastInst>(VL0); Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); - VectorizedValues[VL0] = V; + + for (int i = 0; i < VF; ++i) + VectorizedValues[VL[i]] = V; + return V; } case Instruction::Add: @@ -713,16 +731,19 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) { case Instruction::Xor: { ValueList LHSVL, RHSVL; for (int i = 0; i < VF; ++i) { - RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); - LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1)); + LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); + RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1)); } - Value *RHS = vectorizeTree_rec(RHSVL, VF); Value *LHS = vectorizeTree_rec(LHSVL, VF); + Value *RHS = vectorizeTree_rec(RHSVL, VF); IRBuilder<> Builder(GetLastInstr(VL, VF)); BinaryOperator *BinOp = cast<BinaryOperator>(VL0); - Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS); - VectorizedValues[VL0] = V; + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS,RHS); + + for (int i = 0; i < VF; ++i) + VectorizedValues[VL[i]] = V; + return V; } case Instruction::Load: { @@ -739,7 +760,10 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) { VecTy->getPointerTo()); LI = Builder.CreateLoad(VecPtr); LI->setAlignment(Alignment); - VectorizedValues[VL0] = LI; + + for (int i = 0; i < VF; ++i) + VectorizedValues[VL[i]] = LI; + return LI; } case Instruction::Store: { @@ -762,9 +786,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) { return 0; } default: - Value *S = Scalarize(VL, VecTy); - VectorizedValues[VL0] = S; - return S; + return Scalarize(VL, VecTy); } } |