diff options
author | Abdoulaye Walsimou Gaye <awg@embtoolkit.org> | 2013-05-15 21:43:59 +0200 |
---|---|---|
committer | Abdoulaye Walsimou Gaye <awg@embtoolkit.org> | 2013-05-15 21:43:59 +0200 |
commit | b2e3c7da703d4c3dda41d4f1e18cd976e6c4381a (patch) | |
tree | 7462cd2988228ae24c40b37a9382d35874bbf099 | |
parent | 23d8d191eff180ba312a4d1b4fec8597e5a988d5 (diff) | |
parent | 4ef61f2ad4ff509ee05c7051d359009511f81226 (diff) | |
download | llvm-embtk-support-master.tar.gz llvm-embtk-support-master.tar.bz2 llvm-embtk-support-master.tar.xz |
Merge branch 'master' into embtk-support-masterembtk-support-master
791 files changed, 22366 insertions, 10720 deletions
diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst index 10ca307b78..d54df0f6f4 100644 --- a/docs/CodeGenerator.rst +++ b/docs/CodeGenerator.rst @@ -1838,7 +1838,7 @@ Here is the table: :raw-html:`<td class="no"></td> <!-- Mips -->` :raw-html:`<td class="na"></td> <!-- NVPTX -->` :raw-html:`<td class="no"></td> <!-- PowerPC -->` -:raw-html:`<td class="no"></td> <!-- SystemZ -->` +:raw-html:`<td class="yes"></td> <!-- SystemZ -->` :raw-html:`<td class="no"></td> <!-- Sparc -->` :raw-html:`<td class="yes"></td> <!-- X86 -->` :raw-html:`<td class="yes"></td> <!-- XCore -->` diff --git a/docs/CommandGuide/FileCheck.rst b/docs/CommandGuide/FileCheck.rst index fce63ba688..0d9834918a 100644 --- a/docs/CommandGuide/FileCheck.rst +++ b/docs/CommandGuide/FileCheck.rst @@ -194,6 +194,55 @@ can be used: ; CHECK: ret i8 } +The "CHECK-DAG:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If it's necessary to match strings that don't occur in a strictly sequential +order, "``CHECK-DAG:``" could be used to verify them between two matches (or +before the first match, or after the last match). For example, clang emits +vtable globals in reverse order. Using ``CHECK-DAG:``, we can keep the checks +in the natural order: + +.. code-block:: c++ + + // RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s + + struct Foo { virtual void method(); }; + Foo f; // emit vtable + // CHECK-DAG: @_ZTV3Foo = + + struct Bar { virtual void method(); }; + Bar b; + // CHECK-DAG: @_ZTV3Bar = + + +With captured variables, ``CHECK-DAG:`` is able to match valid topological +orderings of a DAG with edges from the definition of a variable to its use. +It's useful, e.g., when your test cases need to match different output +sequences from the instruction scheduler. For example, + +.. code-block:: llvm + + ; CHECK-DAG: add [[REG1:r[0-9]+]], r1, r2 + ; CHECK-DAG: add [[REG2:r[0-9]+]], r3, r4 + ; CHECK: mul r5, [[REG1]], [[REG2]] + +In this case, any order of that two ``add`` instructions will be allowed. + +``CHECK-NOT:`` directives could be mixed with ``CHECK-DAG:`` directives to +exclude strings between the surrounding ``CHECK-DAG:`` directives. As a result, +the surrounding ``CHECK-DAG:`` directives cannot be reordered, i.e. all +occurrences matching ``CHECK-DAG:`` before ``CHECK-NOT:`` must not fall behind +occurrences matching ``CHECK-DAG:`` after ``CHECK-NOT:``. For example, + +.. code-block:: llvm + + ; CHECK-DAG: BEFORE + ; CHECK-NOT: NOT + ; CHECK-DAG: AFTER + +This case will reject input strings where ``BEFORE`` occurs after ``AFTER``. + FileCheck Pattern Matching Syntax ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index cb60df73d9..96461e51b9 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -191,6 +191,20 @@ LLVM-based code generators "on the fly" for the designed TTA processors and loads them in to the compiler backend as runtime libraries to avoid per-target recompilation of larger parts of the compiler chain. +Just-in-time Adaptive Decoder Engine (Jade) +------------------------------------------- + +`Jade <https://github.com/orcc/jade>`_ (Just-in-time Adaptive Decoder Engine) +is a generic video decoder engine using LLVM for just-in-time compilation of +video decoder configurations. Those configurations are designed by MPEG +Reconfigurable Video Coding (RVC) committee. MPEG RVC standard is built on a +stream-based dataflow representation of decoders. It is composed of a standard +library of coding tools written in RVC-CAL language and a dataflow +configuration --- block diagram --- of a decoder. + +Jade project is hosted as part of the Open RVC-CAL Compiler +(`Orcc <http://orcc.sf.net>`_) and requires it to translate the RVC-CAL standard +library of video coding tools into an LLVM assembly code. Additional Information ====================== diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp index cf39ea7792..7d8e5729c2 100644 --- a/examples/ExceptionDemo/ExceptionDemo.cpp +++ b/examples/ExceptionDemo/ExceptionDemo.cpp @@ -1959,14 +1959,14 @@ int main(int argc, char *argv[]) { // Make the module, which holds all the code. llvm::Module *module = new llvm::Module("my cool jit", context); - llvm::JITMemoryManager *MemMgr = new llvm::SectionMemoryManager(); + llvm::RTDyldMemoryManager *MemMgr = new llvm::SectionMemoryManager(); // Build engine with JIT llvm::EngineBuilder factory(module); factory.setEngineKind(llvm::EngineKind::JIT); factory.setAllocateGVsWithCode(false); factory.setTargetOptions(Opts); - factory.setJITMemoryManager(MemMgr); + factory.setMCJITMemoryManager(MemMgr); factory.setUseMCJIT(true); llvm::ExecutionEngine *executionEngine = factory.create(); diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h index c4083eed6a..44a61fff85 100644 --- a/include/llvm/ADT/IntervalMap.h +++ b/include/llvm/ADT/IntervalMap.h @@ -496,7 +496,7 @@ public: NodeRef() {} /// operator bool - Detect a null ref. - operator bool() const { return pip.getOpaqueValue(); } + LLVM_EXPLICIT operator bool() const { return pip.getOpaqueValue(); } /// NodeRef - Create a reference to the node p with n elements. template <typename NodeT> diff --git a/include/llvm/ADT/OwningPtr.h b/include/llvm/ADT/OwningPtr.h index 86f9feee2c..6b9e42eaec 100644 --- a/include/llvm/ADT/OwningPtr.h +++ b/include/llvm/ADT/OwningPtr.h @@ -70,8 +70,9 @@ public: T *operator->() const { return Ptr; } T *get() const { return Ptr; } - operator bool() const { return Ptr != 0; } + LLVM_EXPLICIT operator bool() const { return Ptr != 0; } bool operator!() const { return Ptr == 0; } + bool isValid() const { return Ptr != 0; } void swap(OwningPtr &RHS) { T *Tmp = RHS.Ptr; @@ -132,7 +133,7 @@ public: } T *get() const { return Ptr; } - operator bool() const { return Ptr != 0; } + LLVM_EXPLICIT operator bool() const { return Ptr != 0; } bool operator!() const { return Ptr == 0; } void swap(OwningArrayPtr &RHS) { diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h index f42515ac77..c1a6d74412 100644 --- a/include/llvm/ADT/PointerUnion.h +++ b/include/llvm/ADT/PointerUnion.h @@ -15,6 +15,7 @@ #ifndef LLVM_ADT_POINTERUNION_H #define LLVM_ADT_POINTERUNION_H +#include "llvm/Support/Compiler.h" #include "llvm/ADT/PointerIntPair.h" namespace llvm { @@ -109,7 +110,7 @@ namespace llvm { // we recursively strip off low bits if we have a nested PointerUnion. return !PointerLikeTypeTraits<PT1>::getFromVoidPointer(Val.getPointer()); } - operator bool() const { return !isNull(); } + LLVM_EXPLICIT operator bool() const { return !isNull(); } /// is<T>() return true if the Union currently holds the type matching T. template<typename T> @@ -174,6 +175,11 @@ namespace llvm { return V; } }; + + template<typename PT1, typename PT2> + bool operator==(PointerUnion<PT1, PT2> lhs, PointerUnion<PT1, PT2> rhs) { + return lhs.getOpaqueValue() == rhs.getOpaqueValue(); + } // Teach SmallPtrSet that PointerUnion is "basically a pointer", that has // # low bits available = min(PT1bits,PT2bits)-1. @@ -251,7 +257,7 @@ namespace llvm { /// isNull - Return true if the pointer held in the union is null, /// regardless of which type it is. bool isNull() const { return Val.isNull(); } - operator bool() const { return !isNull(); } + LLVM_EXPLICIT operator bool() const { return !isNull(); } /// is<T>() return true if the Union currently holds the type matching T. template<typename T> @@ -359,7 +365,7 @@ namespace llvm { /// isNull - Return true if the pointer held in the union is null, /// regardless of which type it is. bool isNull() const { return Val.isNull(); } - operator bool() const { return !isNull(); } + LLVM_EXPLICIT operator bool() const { return !isNull(); } /// is<T>() return true if the Union currently holds the type matching T. template<typename T> diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index bc7924e10f..28baa9eb94 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -77,7 +77,7 @@ public: } /// \brief Test whether the inline cost is low enough for inlining. - operator bool() const { + LLVM_EXPLICIT operator bool() const { return Cost < Threshold; } diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index c2fd6ce367..5973255047 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -38,8 +38,8 @@ namespace llvm { class MachineConstantPoolValue; class MachineJumpTableInfo; class MachineModuleInfo; - class MachineMove; class MCAsmInfo; + class MCCFIInstruction; class MCContext; class MCSection; class MCStreamer; @@ -417,9 +417,8 @@ namespace llvm { // Dwarf Lowering Routines //===------------------------------------------------------------------===// - /// EmitCFIFrameMove - Emit frame instruction to describe the layout of the - /// frame. - void EmitCFIFrameMove(const MachineMove &Move) const; + /// \brief Emit frame instruction to describe the layout of the frame. + void emitCFIInstruction(const MCCFIInstruction &Inst) const; //===------------------------------------------------------------------===// // Inline Asm Support diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index b719757822..95eb1e40dc 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -106,9 +106,9 @@ class MachineModuleInfo : public ImmutablePass { /// want. MachineModuleInfoImpl *ObjFileMMI; - /// FrameMoves - List of moves done by a function's prolog. Used to construct - /// frame maps by debug and exception handling consumers. - std::vector<MachineMove> FrameMoves; + /// List of moves done by a function's prolog. Used to construct frame maps + /// by debug and exception handling consumers. + std::vector<MCCFIInstruction> FrameInstructions; /// CompactUnwindEncoding - If the target supports it, this is the compact /// unwind encoding. It replaces a function's CIE and FDE. @@ -231,15 +231,15 @@ public: UsesVAFloatArgument = b; } - /// getFrameMoves - Returns a reference to a list of moves done in the current + /// \brief Returns a reference to a list of cfi instructions in the current /// function's prologue. Used to construct frame maps for debug and exception /// handling comsumers. - const std::vector<MachineMove> &getFrameMoves() { return FrameMoves; } + const std::vector<MCCFIInstruction> &getFrameInstructions() { + return FrameInstructions; + } void addFrameMove(MCSymbol *Label, const MachineLocation &Dst, - const MachineLocation &Src) { - FrameMoves.push_back(MachineMove(Label, Dst, Src)); - } + const MachineLocation &Src); /// getCompactUnwindEncoding - Returns the compact unwind encoding for a /// function if the target supports the encoding. This encoding replaces a diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index 26d0433f3e..676cdaf7fb 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -162,7 +162,7 @@ namespace llvm { } /// Return true for a valid index. - operator bool() const { return isValid(); } + LLVM_EXPLICIT operator bool() const { return isValid(); } /// Print this index to the given raw_ostream. void print(raw_ostream &os) const; diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h index bbaebc6f90..83a672de2b 100644 --- a/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -34,6 +34,7 @@ namespace llvm { struct GenericValue; class Constant; +class DataLayout; class ExecutionEngine; class Function; class GlobalVariable; @@ -44,7 +45,7 @@ class MachineCodeInfo; class Module; class MutexGuard; class ObjectCache; -class DataLayout; +class RTDyldMemoryManager; class Triple; class Type; @@ -142,7 +143,7 @@ protected: static ExecutionEngine *(*MCJITCtor)( Module *M, std::string *ErrorStr, - JITMemoryManager *JMM, + RTDyldMemoryManager *MCJMM, bool GVsWithCode, TargetMachine *TM); static ExecutionEngine *(*InterpCtor)(Module *M, std::string *ErrorStr); @@ -496,6 +497,7 @@ private: EngineKind::Kind WhichEngine; std::string *ErrorStr; CodeGenOpt::Level OptLevel; + RTDyldMemoryManager *MCJMM; JITMemoryManager *JMM; bool AllocateGVsWithCode; TargetOptions Options; @@ -511,6 +513,7 @@ private: WhichEngine = EngineKind::Either; ErrorStr = NULL; OptLevel = CodeGenOpt::Default; + MCJMM = NULL; JMM = NULL; Options = TargetOptions(); AllocateGVsWithCode = false; @@ -532,12 +535,29 @@ public: WhichEngine = w; return *this; } + + /// setMCJITMemoryManager - Sets the MCJIT memory manager to use. This allows + /// clients to customize their memory allocation policies for the MCJIT. This + /// is only appropriate for the MCJIT; setting this and configuring the builder + /// to create anything other than MCJIT will cause a runtime error. If create() + /// is called and is successful, the created engine takes ownership of the + /// memory manager. This option defaults to NULL. Using this option nullifies + /// the setJITMemoryManager() option. + EngineBuilder &setMCJITMemoryManager(RTDyldMemoryManager *mcjmm) { + MCJMM = mcjmm; + JMM = NULL; + return *this; + } - /// setJITMemoryManager - Sets the memory manager to use. This allows - /// clients to customize their memory allocation policies. If create() is - /// called and is successful, the created engine takes ownership of the - /// memory manager. This option defaults to NULL. + /// setJITMemoryManager - Sets the JIT memory manager to use. This allows + /// clients to customize their memory allocation policies. This is only + /// appropriate for either JIT or MCJIT; setting this and configuring the + /// builder to create an interpreter will cause a runtime error. If create() + /// is called and is successful, the created engine takes ownership of the + /// memory manager. This option defaults to NULL. This option overrides + /// setMCJITMemoryManager() as well. EngineBuilder &setJITMemoryManager(JITMemoryManager *jmm) { + MCJMM = NULL; JMM = jmm; return *this; } diff --git a/include/llvm/ExecutionEngine/SectionMemoryManager.h b/include/llvm/ExecutionEngine/SectionMemoryManager.h index 07e6832324..305a96619a 100644 --- a/include/llvm/ExecutionEngine/SectionMemoryManager.h +++ b/include/llvm/ExecutionEngine/SectionMemoryManager.h @@ -16,7 +16,7 @@ #define LLVM_EXECUTIONENGINE_SECTIONMEMORYMANAGER_H #include "llvm/ADT/SmallVector.h" -#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Memory.h" @@ -35,7 +35,7 @@ namespace llvm { /// in the JITed object. Permissions can be applied either by calling /// MCJIT::finalizeObject or by calling SectionMemoryManager::applyPermissions /// directly. Clients of MCJIT should call MCJIT::finalizeObject. -class SectionMemoryManager : public JITMemoryManager { +class SectionMemoryManager : public RTDyldMemoryManager { SectionMemoryManager(const SectionMemoryManager&) LLVM_DELETED_FUNCTION; void operator=(const SectionMemoryManager&) LLVM_DELETED_FUNCTION; @@ -108,54 +108,6 @@ private: MemoryGroup CodeMem; MemoryGroup RWDataMem; MemoryGroup RODataMem; - -public: - /// - /// Functions below are not used by MCJIT or RuntimeDyld, but must be - /// implemented because they are declared as pure virtuals in the base class. - /// - - virtual void setMemoryWritable() { - llvm_unreachable("Unexpected call!"); - } - virtual void setMemoryExecutable() { - llvm_unreachable("Unexpected call!"); - } - virtual void setPoisonMemory(bool poison) { - llvm_unreachable("Unexpected call!"); - } - virtual void AllocateGOT() { - llvm_unreachable("Unexpected call!"); - } - virtual uint8_t *getGOTBase() const { - llvm_unreachable("Unexpected call!"); - return 0; - } - virtual uint8_t *startFunctionBody(const Function *F, - uintptr_t &ActualSize){ - llvm_unreachable("Unexpected call!"); - return 0; - } - virtual uint8_t *allocateStub(const GlobalValue *F, unsigned StubSize, - unsigned Alignment) { - llvm_unreachable("Unexpected call!"); - return 0; - } - virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart, - uint8_t *FunctionEnd) { - llvm_unreachable("Unexpected call!"); - } - virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) { - llvm_unreachable("Unexpected call!"); - return 0; - } - virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) { - llvm_unreachable("Unexpected call!"); - return 0; - } - virtual void deallocateFunctionBody(void *Body) { - llvm_unreachable("Unexpected call!"); - } }; } diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index d020de3004..72d325f79c 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -17,6 +17,7 @@ #define LLVM_MC_MCASMINFO_H #include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MachineLocation.h" #include <cassert> #include <vector> @@ -332,7 +333,7 @@ namespace llvm { //===--- Prologue State ----------------------------------------------===// - std::vector<MachineMove> InitialFrameState; + std::vector<MCCFIInstruction> InitialFrameState; public: explicit MCAsmInfo(); @@ -567,11 +568,11 @@ namespace llvm { return DwarfRegNumForCFI; } - void addInitialFrameState(MCSymbol *label, const MachineLocation &D, - const MachineLocation &S) { - InitialFrameState.push_back(MachineMove(label, D, S)); + void addInitialFrameState(const MCCFIInstruction &Inst) { + InitialFrameState.push_back(Inst); } - const std::vector<MachineMove> &getInitialFrameState() const { + + const std::vector<MCCFIInstruction> &getInitialFrameState() const { return InitialFrameState; } }; diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h index a59776d5cd..92ad1b1a46 100644 --- a/include/llvm/MC/MCELFObjectWriter.h +++ b/include/llvm/MC/MCELFObjectWriter.h @@ -42,11 +42,6 @@ struct ELFRelocationEntry { const MCSymbol *Sym, uint64_t Addend, const MCFixup &Fixup) : r_offset(RelocOffset), Index(Idx), Type(RelType), Symbol(Sym), r_addend(Addend), Fixup(&Fixup) {} - - // Support lexicographic sorting. - bool operator<(const ELFRelocationEntry &RE) const { - return RE.r_offset < r_offset; - } }; class MCELFObjectTargetWriter { @@ -87,8 +82,6 @@ public: virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const; - virtual void adjustFixupOffset(const MCFixup &Fixup, - uint64_t &RelocOffset); virtual void sortRelocs(const MCAssembler &Asm, std::vector<ELFRelocationEntry> &Relocs); diff --git a/include/llvm/MC/MachineLocation.h b/include/llvm/MC/MachineLocation.h index 83c8b72ee4..c4a96606b1 100644 --- a/include/llvm/MC/MachineLocation.h +++ b/include/llvm/MC/MachineLocation.h @@ -10,11 +10,6 @@ // frame. Locations will be one of two forms; a register or an address formed // from a base address plus an offset. Register indirection can be specified by // explicitly passing an offset to the constructor. -// -// The MachineMove class is used to represent abstract move operations in the -// prolog/epilog of a compiled function. A collection of these objects can be -// used by a debug consumer to track the location of values when unwinding stack -// frames. //===----------------------------------------------------------------------===// @@ -74,30 +69,6 @@ public: void dump(); #endif }; - -/// MachineMove - This class represents the save or restore of a callee saved -/// register that exception or debug info needs to know about. -class MachineMove { -private: - /// Label - Symbol for post-instruction address when result of move takes - /// effect. - MCSymbol *Label; - - // Move to & from location. - MachineLocation Destination, Source; -public: - MachineMove() : Label(0) {} - - MachineMove(MCSymbol *label, const MachineLocation &D, - const MachineLocation &S) - : Label(label), Destination(D), Source(S) {} - - // Accessors - MCSymbol *getLabel() const { return Label; } - const MachineLocation &getDestination() const { return Destination; } - const MachineLocation &getSource() const { return Source; } -}; - } // End llvm namespace #endif diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h index 92107ac025..d80d9d8ad1 100644 --- a/include/llvm/Support/CallSite.h +++ b/include/llvm/Support/CallSite.h @@ -78,7 +78,7 @@ public: InstrTy *getInstruction() const { return I.getPointer(); } InstrTy *operator->() const { return I.getPointer(); } - operator bool() const { return I.getPointer(); } + LLVM_EXPLICIT operator bool() const { return I.getPointer(); } /// getCalledValue - Return the pointer to function that is being called. /// diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h index 5bfb8ad41d..e1f6706618 100644 --- a/include/llvm/Support/TargetRegistry.h +++ b/include/llvm/Support/TargetRegistry.h @@ -70,7 +70,8 @@ namespace llvm { typedef unsigned (*TripleMatchQualityFnTy)(const std::string &TT); - typedef MCAsmInfo *(*MCAsmInfoCtorFnTy)(StringRef TT); + typedef MCAsmInfo *(*MCAsmInfoCtorFnTy)(const MCRegisterInfo &MRI, + StringRef TT); typedef MCCodeGenInfo *(*MCCodeGenInfoCtorFnTy)(StringRef TT, Reloc::Model RM, CodeModel::Model CM, @@ -265,10 +266,11 @@ namespace llvm { /// feature set; it should always be provided. Generally this should be /// either the target triple from the module, or the target triple of the /// host if that does not exist. - MCAsmInfo *createMCAsmInfo(StringRef Triple) const { + MCAsmInfo *createMCAsmInfo(const MCRegisterInfo &MRI, + StringRef Triple) const { if (!MCAsmInfoCtorFn) return 0; - return MCAsmInfoCtorFn(Triple); + return MCAsmInfoCtorFn(MRI, Triple); } /// createMCCodeGenInfo - Create a MCCodeGenInfo implementation. @@ -803,7 +805,7 @@ namespace llvm { TargetRegistry::RegisterMCAsmInfo(T, &Allocator); } private: - static MCAsmInfo *Allocator(StringRef TT) { + static MCAsmInfo *Allocator(const MCRegisterInfo &MRI, StringRef TT) { return new MCAsmInfoImpl(TT); } diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h index 6e4f57f6ab..338bb4b6f2 100644 --- a/include/llvm/Support/YAMLParser.h +++ b/include/llvm/Support/YAMLParser.h @@ -516,7 +516,7 @@ public: if (isAtEnd() || Other.isAtEnd()) return isAtEnd() && Other.isAtEnd(); - return *Doc == *Other.Doc; + return Doc == Other.Doc; } bool operator !=(const document_iterator &Other) { return !(*this == Other); @@ -543,7 +543,7 @@ public: private: bool isAtEnd() const { - return Doc == 0 || *Doc == 0; + return !Doc || !*Doc; } OwningPtr<Document> *Doc; diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index 37a79fe852..c5e4b278d9 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -292,6 +292,7 @@ protected: // Can only create subclasses. Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); + void initAsmInfo(); public: /// \brief Register analysis passes for this target with a pass manager. /// diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index c0009cb989..674ce3aea7 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -89,7 +89,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis<AliasAnalysis>(); TD = getAnalysisIfAvailable<DataLayout>(); DT = getAnalysisIfAvailable<DominatorTree>(); - if (PredCache == 0) + if (!PredCache) PredCache.reset(new PredIteratorCache()); return false; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index eb744d243b..7ad4f57f75 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -636,14 +636,13 @@ void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding()); MachineModuleInfo &MMI = MF->getMMI(); - const std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + std::vector<MCCFIInstruction> Instructions = MMI.getFrameInstructions(); bool FoundOne = false; (void)FoundOne; - for (std::vector<MachineMove>::const_iterator I = Moves.begin(), - E = Moves.end(); - I != E; ++I) { + for (std::vector<MCCFIInstruction>::iterator I = Instructions.begin(), + E = Instructions.end(); I != E; ++I) { if (I->getLabel() == Label) { - EmitCFIFrameMove(*I); + emitCFIInstruction(*I); FoundOne = true; } } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 31e42d47cf..e6d67e8822 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -169,28 +169,21 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, // Dwarf Lowering Routines //===----------------------------------------------------------------------===// -/// EmitCFIFrameMove - Emit a frame instruction. -void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const { - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - - const MachineLocation &Dst = Move.getDestination(); - const MachineLocation &Src = Move.getSource(); - - // If advancing cfa. - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - if (Src.getReg() == MachineLocation::VirtualFP) { - OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); - } else { - // Reg + Offset - OutStreamer.EmitCFIDefCfa(RI->getDwarfRegNum(Src.getReg(), true), - Src.getOffset()); - } - } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { - assert(Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); - } else { - assert(!Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), - Dst.getOffset()); +void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { + switch (Inst.getOperation()) { + default: + llvm_unreachable("Unexpected instruction"); + case MCCFIInstruction::OpDefCfaOffset: + OutStreamer.EmitCFIDefCfaOffset(Inst.getOffset()); + break; + case MCCFIInstruction::OpDefCfa: + OutStreamer.EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); + break; + case MCCFIInstruction::OpDefCfaRegister: + OutStreamer.EmitCFIDefCfaRegister(Inst.getRegister()); + break; + case MCCFIInstruction::OpOffset: + OutStreamer.EmitCFIOffset(Inst.getRegister(), Inst.getOffset()); + break; } } diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 673867ada1..cc0cb56e8b 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -114,8 +114,8 @@ DIE::~DIE() { /// Climb up the parent chain to get the compile unit DIE to which this DIE /// belongs. -DIE *DIE::getCompileUnit() const { - DIE *p = getParent(); +DIE *DIE::getCompileUnit() { + DIE *p = this; while (p) { if (p->getTag() == dwarf::DW_TAG_compile_unit) return p; diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 3c06001686..550d873128 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -153,7 +153,7 @@ namespace llvm { DIE *getParent() const { return Parent; } /// Climb up the parent chain to get the compile unit DIE this DIE belongs /// to. - DIE *getCompileUnit() const; + DIE *getCompileUnit(); void setTag(unsigned Tag) { Abbrev.setTag(Tag); } void setOffset(unsigned O) { Offset = O; } void setSize(unsigned S) { Size = S; } @@ -325,7 +325,9 @@ namespace llvm { class DIEEntry : public DIEValue { DIE *const Entry; public: - explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {} + explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) { + assert(E && "Cannot construct a DIEEntry with a null DIE"); + } DIE *getEntry() const { return Entry; } diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index 74b1b13367..49a85d81b4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -23,7 +23,6 @@ namespace llvm { template <typename T> class SmallVectorImpl; struct LandingPadInfo; class MachineModuleInfo; -class MachineMove; class MachineInstr; class MachineFunction; class MCAsmInfo; diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 1a09837834..7ce5cc6f67 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -62,14 +62,8 @@ static bool getVerboseAsm() { llvm_unreachable("Invalid verbose asm state"); } -LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, - StringRef CPU, StringRef FS, - TargetOptions Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : TargetMachine(T, Triple, CPU, FS, Options) { - CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); - AsmInfo = T.createMCAsmInfo(Triple); +void LLVMTargetMachine::initAsmInfo() { + AsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), TargetTriple); // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, // and if the old one gets included then MCAsmInfo will be NULL and // we'll crash later. @@ -79,6 +73,15 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, "and that InitializeAllTargetMCs() is being invoked!"); } +LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, + StringRef CPU, StringRef FS, + TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : TargetMachine(T, Triple, CPU, FS, Options) { + CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); +} + void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) { PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); } diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 8af9d053b1..74cf9f50df 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -268,6 +268,39 @@ MachineModuleInfo::MachineModuleInfo() MachineModuleInfo::~MachineModuleInfo() { } +static MCCFIInstruction convertMoveToCFI(const MCRegisterInfo &MRI, + MCSymbol *Label, + const MachineLocation &Dst, + const MachineLocation &Src) { + // If advancing cfa. + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + if (Src.getReg() == MachineLocation::VirtualFP) + return MCCFIInstruction::createDefCfaOffset(Label, Src.getOffset()); + // Reg + Offset + return MCCFIInstruction::createDefCfa( + Label, MRI.getDwarfRegNum(Src.getReg(), true), -Src.getOffset()); + } + + if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { + assert(Dst.isReg() && "Machine move not supported yet."); + return MCCFIInstruction::createDefCfaRegister( + Label, MRI.getDwarfRegNum(Dst.getReg(), true)); + } + + assert(!Dst.isReg() && "Machine move not supported yet."); + return MCCFIInstruction::createOffset( + Label, MRI.getDwarfRegNum(Src.getReg(), true), Dst.getOffset()); +} + + +void MachineModuleInfo::addFrameMove(MCSymbol *Label, + const MachineLocation &Dst, + const MachineLocation &Src) { + MCCFIInstruction I = + convertMoveToCFI(Context.getRegisterInfo(), Label, Dst, Src); + FrameInstructions.push_back(I); +} + bool MachineModuleInfo::doInitialization(Module &M) { ObjFileMMI = 0; @@ -303,7 +336,7 @@ bool MachineModuleInfo::doFinalization(Module &M) { /// void MachineModuleInfo::EndFunction() { // Clean up frame info. - FrameMoves.clear(); + FrameInstructions.clear(); // Clean up exception info. LandingPads.clear(); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 9eed1fc62a..49748289da 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -713,7 +713,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, Intf.moveToBlock(BC.Number); BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; - BC.ChangesValue = BI.FirstDef; + BC.ChangesValue = BI.FirstDef.isValid(); if (!Intf.hasInterference()) continue; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c54dffbb13..a8621a89a8 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9254,19 +9254,33 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { for (unsigned I = 0; I != NumConcats; ++I) { // Make sure we're dealing with a copy. unsigned Begin = I * NumElemsPerConcat; - if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) - return SDValue(); + bool AllUndef = true, NoUndef = true; + for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { + if (SVN->getMaskElt(J) >= 0) + AllUndef = false; + else + NoUndef = false; + } - for (unsigned J = 1; J != NumElemsPerConcat; ++J) { - if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + if (NoUndef) { + if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) return SDValue(); - } - unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; - if (FirstElt < N0.getNumOperands()) - Ops.push_back(N0.getOperand(FirstElt)); - else - Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + for (unsigned J = 1; J != NumElemsPerConcat; ++J) + if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + return SDValue(); + + unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; + if (FirstElt < N0.getNumOperands()) + Ops.push_back(N0.getOperand(FirstElt)); + else + Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + + } else if (AllUndef) { + Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); + } else { // Mixed with general masks and undefs, can't do optimization. + return SDValue(); + } } return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(), diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 3903743878..23984e9986 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -222,7 +222,9 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { PersonalityFn = LPads[0]->getPersonalityFn(); Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3, "pers_fn_gep"); - Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true); + Builder.CreateStore(Builder.CreateBitCast(PersonalityFn, + Builder.getInt8PtrTy()), + PersonalityFieldPtr, /*isVolatile=*/true); // LSDA address Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr"); diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index a789a2596d..90b93aaa72 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -42,6 +42,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/DebugInfo.h" #include "llvm/IR/Function.h" @@ -528,6 +529,10 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { if (!V) continue; + const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V); + if (PSV && PSV->isConstant(MFI)) + continue; + // Climb up and find the original alloca. V = GetUnderlyingObject(V); // If we did not find one, or if the one that we found is not in our diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index e43ba4f1dd..0191636307 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "jit" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/ExecutionEngine/GenericValue.h" @@ -47,7 +48,7 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)( ExecutionEngine *(*ExecutionEngine::MCJITCtor)( Module *M, std::string *ErrorStr, - JITMemoryManager *JMM, + RTDyldMemoryManager *MCJMM, bool GVsWithCode, TargetMachine *TM) = 0; ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M, @@ -455,10 +456,12 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr)) return 0; + assert(!(JMM && MCJMM)); + // If the user specified a memory manager but didn't specify which engine to // create, we assume they only want the JIT, and we fail if they only want // the interpreter. - if (JMM) { + if (JMM || MCJMM) { if (WhichEngine & EngineKind::JIT) WhichEngine = EngineKind::JIT; else { @@ -467,6 +470,14 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { return 0; } } + + if (MCJMM && ! UseMCJIT) { + if (ErrorStr) + *ErrorStr = + "Cannot create a legacy JIT with a runtime dyld memory " + "manager."; + return 0; + } // Unless the interpreter was explicitly selected or the JIT is not linked, // try making a JIT. @@ -480,7 +491,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { if (UseMCJIT && ExecutionEngine::MCJITCtor) { ExecutionEngine *EE = - ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, + ExecutionEngine::MCJITCtor(M, ErrorStr, MCJMM ? MCJMM : JMM, AllocateGVsWithCode, TheTM.take()); if (EE) return EE; } else if (ExecutionEngine::JITCtor) { diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 38aa5474a3..ced567205a 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -39,7 +39,7 @@ extern "C" void LLVMLinkInMCJIT() { ExecutionEngine *MCJIT::createJIT(Module *M, std::string *ErrorStr, - JITMemoryManager *JMM, + RTDyldMemoryManager *MemMgr, bool GVsWithCode, TargetMachine *TM) { // Try to register the program as a source of symbols to resolve against. @@ -47,14 +47,14 @@ ExecutionEngine *MCJIT::createJIT(Module *M, // FIXME: Don't do this here. sys::DynamicLibrary::LoadLibraryPermanently(0, NULL); - return new MCJIT(M, TM, JMM ? JMM : new SectionMemoryManager(), GVsWithCode); + return new MCJIT(M, TM, MemMgr ? MemMgr : new SectionMemoryManager(), + GVsWithCode); } MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM, bool AllocateGVsWithCode) - : ExecutionEngine(m), TM(tm), Ctx(0), - MemMgr(MM ? MM : new SectionMemoryManager()), Dyld(MemMgr), - IsLoaded(false), M(m), ObjCache(0) { + : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM), + IsLoaded(false), M(m), ObjCache(0) { setDataLayout(TM->getDataLayout()); } diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index 8c4bf6e1db..7f247e2dee 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -98,7 +98,7 @@ public: static ExecutionEngine *createJIT(Module *M, std::string *ErrorStr, - JITMemoryManager *JMM, + RTDyldMemoryManager *MemMgr, bool GVsWithCode, TargetMachine *TM); diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index 89a3c0578c..81d7efa774 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -112,21 +112,20 @@ bool Value::hasNUsesOrMore(unsigned N) const { /// isUsedInBasicBlock - Return true if this value is used in the specified /// basic block. bool Value::isUsedInBasicBlock(const BasicBlock *BB) const { - // Start by scanning over the instructions looking for a use before we start - // the expensive use iteration. - unsigned MaxBlockSize = 3; - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (std::find(I->op_begin(), I->op_end(), this) != I->op_end()) + // This can be computed either by scanning the instructions in BB, or by + // scanning the use list of this Value. Both lists can be very long, but + // usually one is quite short. + // + // Scan both lists simultaneously until one is exhausted. This limits the + // search to the shorter list. + BasicBlock::const_iterator BI = BB->begin(), BE = BB->end(); + const_use_iterator UI = use_begin(), UE = use_end(); + for (; BI != BE && UI != UE; ++BI, ++UI) { + // Scan basic block: Check if this Value is used by the instruction at BI. + if (std::find(BI->op_begin(), BI->op_end(), this) != BI->op_end()) return true; - if (--MaxBlockSize == 0) // If the block is larger fall back to use_iterator - break; - } - - if (MaxBlockSize != 0) // We scanned the entire block and found no use. - return false; - - for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) { - const Instruction *User = dyn_cast<Instruction>(*I); + // Scan use list: Check if the use at UI is in BB. + const Instruction *User = dyn_cast<Instruction>(*UI); if (User && User->getParent() == BB) return true; } diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 3d995484e7..4f66156f6d 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -759,9 +759,6 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); - // FIXME: no tests cover this. Is adjustFixupOffset dead code? - TargetObjectWriter->adjustFixupOffset(Fixup, RelocOffset); - if (!hasRelocationAddend()) Addend = 0; diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index 4766b37476..d3c019246c 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -42,8 +42,12 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error); assert(TheTarget && "Unable to create target!"); + const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple); + if (!MRI) + return 0; + // Get the assembler info needed to setup the MCContext. - const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(Triple); + const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, Triple); if (!MAI) return 0; @@ -51,10 +55,6 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, if (!MII) return 0; - const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple); - if (!MRI) - return 0; - // Package up features to be passed to target/subtarget std::string FeaturesStr; diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 7640a63ee3..efe0c46db8 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -873,17 +873,6 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol, streamer.EmitValue(v, size); } -static const MachineLocation TranslateMachineLocation( - const MCRegisterInfo &MRI, - const MachineLocation &Loc) { - unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ? - MachineLocation::VirtualFP : - unsigned(MRI.getDwarfRegNum(Loc.getReg(), true)); - const MachineLocation &NewLoc = Loc.isReg() ? - MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset()); - return NewLoc; -} - namespace { class FrameEmitterImpl { int CFAOffset; @@ -1316,32 +1305,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, // Initial Instructions const MCAsmInfo &MAI = context.getAsmInfo(); - const std::vector<MachineMove> &Moves = MAI.getInitialFrameState(); - std::vector<MCCFIInstruction> Instructions; - - for (int i = 0, n = Moves.size(); i != n; ++i) { - MCSymbol *Label = Moves[i].getLabel(); - const MachineLocation &Dst = - TranslateMachineLocation(MRI, Moves[i].getDestination()); - const MachineLocation &Src = - TranslateMachineLocation(MRI, Moves[i].getSource()); - - if (Dst.isReg()) { - assert(Dst.getReg() == MachineLocation::VirtualFP); - assert(!Src.isReg()); - MCCFIInstruction Inst = - MCCFIInstruction::createDefCfa(Label, Src.getReg(), -Src.getOffset()); - Instructions.push_back(Inst); - } else { - assert(Src.isReg()); - unsigned Reg = Src.getReg(); - int Offset = Dst.getOffset(); - MCCFIInstruction Inst = - MCCFIInstruction::createOffset(Label, Reg, Offset); - Instructions.push_back(Inst); - } - } - + const std::vector<MCCFIInstruction> &Instructions = + MAI.getInitialFrameState(); EmitCFIInstructions(streamer, Instructions, NULL); // Padding diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp index 4cac84d666..ec7397d748 100644 --- a/lib/MC/MCELFObjectTargetWriter.cpp +++ b/lib/MC/MCELFObjectTargetWriter.cpp @@ -39,13 +39,23 @@ const MCSymbol *MCELFObjectTargetWriter::undefinedExplicitRelSym(const MCValue & return &Symbol.AliasedSymbol(); } -void MCELFObjectTargetWriter::adjustFixupOffset(const MCFixup &Fixup, - uint64_t &RelocOffset) { +// ELF doesn't require relocations to be in any order. We sort by the r_offset, +// just to match gnu as for easier comparison. The use type and index is an +// arbitrary way of making the sort deterministic. +static int cmpRel(const void *AP, const void *BP) { + const ELFRelocationEntry &A = *(const ELFRelocationEntry *)AP; + const ELFRelocationEntry &B = *(const ELFRelocationEntry *)BP; + if (A.r_offset != B.r_offset) + return B.r_offset - A.r_offset; + if (B.Type != A.Type) + return A.Type - B.Type; + if (B.Index != A.Index) + return B.Index - A.Index; + llvm_unreachable("ELFRelocs might be unstable!"); } void MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm, std::vector<ELFRelocationEntry> &Relocs) { - // Sort by the r_offset, just like gnu as does. - array_pod_sort(Relocs.begin(), Relocs.end()); + array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel); } diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index edefdb4c36..f7c71e97e3 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -545,7 +545,7 @@ bool AsmParser::EnterIncludeFile(const std::string &Filename) { return false; } -/// Process the specified .incbin file by seaching for it in the include paths +/// Process the specified .incbin file by searching for it in the include paths /// then just emitting the byte contents of the file to the streamer. This /// returns true on failure. bool AsmParser::ProcessIncbinFile(const std::string &Filename) { diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index af14c72145..654af081f9 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -339,7 +339,7 @@ static void printRelocationTargetName(const MachOObjectFile *O, StringRef S; bool isExtern = O->getPlainRelocationExternal(RE); - uint64_t Val = O->getAnyRelocationAddress(RE); + uint64_t Val = O->getPlainRelocationSymbolNum(RE); if (isExtern) { symbol_iterator SI = O->begin_symbols(); @@ -347,7 +347,8 @@ static void printRelocationTargetName(const MachOObjectFile *O, SI->getName(S); } else { section_iterator SI = O->begin_sections(); - advanceTo(SI, Val); + // Adjust for the fact that sections are 1-indexed. + advanceTo(SI, Val - 1); SI->getName(S); } diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 6182e34150..57e60dac45 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -872,7 +872,21 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; exponent += rhs.exponent; + // Assume the operands involved in the multiplication are single-precision + // FP, and the two multiplicants are: + // *this = a23 . a22 ... a0 * 2^e1 + // rhs = b23 . b22 ... b0 * 2^e2 + // the result of multiplication is: + // *this = c47 c46 . c45 ... c0 * 2^(e1+e2) + // Note that there are two significant bits at the left-hand side of the + // radix point. Move the radix point toward left by one bit, and adjust + // exponent accordingly. + exponent += 1; + if (addend) { + // The intermediate result of the multiplication has "2 * precision" + // signicant bit; adjust the addend to be consistent with mul result. + // Significand savedSignificand = significand; const fltSemantics *savedSemantics = semantics; fltSemantics extendedSemantics; @@ -880,8 +894,9 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) unsigned int extendedPrecision; /* Normalize our MSB. */ - extendedPrecision = precision + precision - 1; + extendedPrecision = 2 * precision; if (omsb != extendedPrecision) { + assert(extendedPrecision > omsb); APInt::tcShiftLeft(fullSignificand, newPartsCount, extendedPrecision - omsb); exponent -= extendedPrecision - omsb; @@ -912,8 +927,18 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; } - exponent -= (precision - 1); + // Convert the result having "2 * precision" significant-bits back to the one + // having "precision" significant-bits. First, move the radix point from + // poision "2*precision - 1" to "precision - 1". The exponent need to be + // adjusted by "2*precision - 1" - "precision - 1" = "precision". + exponent -= precision; + // In case MSB resides at the left-hand side of radix point, shift the + // mantissa right by some amount to make sure the MSB reside right before + // the radix point (i.e. "MSB . rest-significant-bits"). + // + // Note that the result is not normalized when "omsb < precision". So, the + // caller needs to call APFloat::normalize() if normalized value is expected. if (omsb > precision) { unsigned int bits, significantParts; lostFraction lf; diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index fac3cad5cc..4f650b42cc 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -65,7 +65,7 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename, MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf); } - if (NewBuf == 0) return ~0U; + if (!NewBuf) return ~0U; return AddNewSourceBuffer(NewBuf.take(), IncludeLoc); } diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index c9729b5412..9e497a0f63 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -251,7 +251,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("hexagon", Triple::hexagon) .Case("s390x", Triple::systemz) .Case("sparc", Triple::sparc) - .Case("sparcv9", Triple::sparcv9) + .Cases("sparcv9", "sparc64", Triple::sparcv9) .Case("tce", Triple::tce) .Case("xcore", Triple::xcore) .Case("nvptx", Triple::nvptx) diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index 72a8af621d..cdd475c17f 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -33,6 +33,7 @@ #endif extern "C" void sys_icache_invalidate(const void *Addr, size_t len); +extern "C" void __clear_cache(void *, void*); namespace { diff --git a/lib/Support/Windows/Windows.h b/lib/Support/Windows/Windows.h index 5c1da0d617..4cdac788a0 100644 --- a/lib/Support/Windows/Windows.h +++ b/lib/Support/Windows/Windows.h @@ -25,6 +25,7 @@ #define WIN32_LEAN_AND_MEAN #include "llvm/Config/config.h" // Get build system configuration settings +#include "llvm/Support/Compiler.h" #include <windows.h> #include <wincrypt.h> #include <shlobj.h> @@ -75,7 +76,7 @@ public: } // True if Handle is valid. - operator bool() const { + LLVM_EXPLICIT operator bool() const { return HandleTraits::IsValid(Handle) ? true : false; } diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index df599d599d..f1695e2ce2 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -38,6 +38,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { + initAsmInfo(); } namespace { diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 3435217bb2..eeec608820 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -57,13 +57,14 @@ static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) { return X; } -static MCAsmInfo *createAArch64MCAsmInfo(StringRef TT) { +static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, + StringRef TT) { Triple TheTriple(TT); MCAsmInfo *MAI = new AArch64ELFMCAsmInfo(); - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(AArch64::XSP, 0); - MAI->addInitialFrameState(0, Dst, Src); + unsigned Reg = MRI.getDwarfRegNum(AArch64::XSP, true); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0); + MAI->addInitialFrameState(Inst); return MAI; } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b0d34a76b0..4de5b4f41c 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -94,6 +94,7 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(ARM::SP); Reserved.set(ARM::PC); Reserved.set(ARM::FPSCR); + Reserved.set(ARM::APSR_NZCV); if (TFI->hasFP(MF)) Reserved.set(FramePtr); if (hasBasePointer(MF)) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index fd77732364..432e3eefb1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2147,7 +2147,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { Copy = *Copy->use_begin(); if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; - Chain = Copy->getOperand(0); + TCChain = Copy->getOperand(0); } else { return false; } @@ -5257,6 +5257,23 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { return false; } +static EVT getExtensionTo64Bits(const EVT &OrigVT) { + if (OrigVT.getSizeInBits() >= 64) + return OrigVT; + + assert(OrigVT.isSimple() && "Expecting a simple value type"); + + MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; + switch (OrigSimpleTy) { + default: llvm_unreachable("Unexpected Vector Type"); + case MVT::v2i8: + case MVT::v2i16: + return MVT::v2i32; + case MVT::v4i8: + return MVT::v4i16; + } +} + /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. /// We insert the required extension here to get the vector to fill a D register. @@ -5272,18 +5289,8 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, return N; // Must extend size to at least 64 bits to be used as an operand for VMULL. - MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy; - EVT NewVT; - switch (OrigSimpleTy) { - default: llvm_unreachable("Unexpected Orig Vector Type"); - case MVT::v2i8: - case MVT::v2i16: - NewVT = MVT::v2i32; - break; - case MVT::v4i8: - NewVT = MVT::v4i16; - break; - } + EVT NewVT = getExtensionTo64Bits(OrigTy); + return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N); } @@ -5293,22 +5300,22 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, /// reach a total size of 64 bits. We have to add the extension separately /// because ARM does not have a sign/zero extending load for vectors. static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { - SDValue NonExtendingLoad = - DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), + EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT()); + + // The load already has the right type. + if (ExtendedTy == LD->getMemoryVT()) + return DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment()); - unsigned ExtOp = 0; - switch (LD->getExtensionType()) { - default: llvm_unreachable("Unexpected LoadExtType"); - case ISD::EXTLOAD: - case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break; - } - MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy; - MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy; - return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG, - MemType, ExtType, ExtOp); + + // We need to create a zextload/sextload. We cannot just create a load + // followed by a zext/zext node because LowerMUL is also run during normal + // operation legalization where we can't create illegal types. + return DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), ExtendedTy, + LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), + LD->getMemoryVT(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); } /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 1bd174e341..89f92a589d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4636,11 +4636,11 @@ def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, - (outs GPR:$Rt), + (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", - (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), @@ -4650,7 +4650,7 @@ class MovRCopro2<string opc, bit direction, dag oops, dag iops, list<dag> pattern> : ABXI<0b1110, oops, iops, NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> { - let Inst{31-28} = 0b1111; + let Inst{31-24} = 0b11111110; let Inst{20} = direction; let Inst{4} = 1; @@ -4679,11 +4679,11 @@ def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm", (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0)>; def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, - (outs GPR:$Rt), + (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm", - (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0)>; def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index b0f576bc2b..85743d8d5a 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -157,12 +157,15 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>; // Current Program Status Register. // We model fpscr with two registers: FPSCR models the control bits and will be -// reserved. FPSCR_NZCV models the flag bits and will be unreserved. -def CPSR : ARMReg<0, "cpsr">; -def APSR : ARMReg<1, "apsr">; -def SPSR : ARMReg<2, "spsr">; -def FPSCR : ARMReg<3, "fpscr">; -def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> { +// reserved. FPSCR_NZCV models the flag bits and will be unreserved. APSR_NZCV +// models the APSR when it's accessed by some special instructions. In such cases +// it has the same encoding as PC. +def CPSR : ARMReg<0, "cpsr">; +def APSR : ARMReg<1, "apsr">; +def APSR_NZCV : ARMReg<15, "apsr_nzcv">; +def SPSR : ARMReg<2, "spsr">; +def FPSCR : ARMReg<3, "fpscr">; +def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> { let Aliases = [FPSCR]; } def ITSTATE : ARMReg<4, "itstate">; @@ -207,6 +210,16 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> { }]; } +// GPRs without the PC but with APSR. Some instructions allow accessing the +// APSR, while actually encoding PC in the register field. This is usefull +// for assembly and disassembly only. +def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add GPR, APSR_NZCV)> { + let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)]; + let AltOrderSelect = [{ + return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); + }]; +} + // GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the // implied SP argument list. // FIXME: It would be better to not use this at all and refactor the diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 8653c462f0..9ff0d61481 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -162,10 +162,23 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { if (!isThumb() || hasThumb2()) PostRAScheduler = true; - // v6+ may or may not support unaligned mem access depending on the system - // configuration. - if (!StrictAlign && hasV6Ops() && isTargetDarwin()) - AllowsUnalignedMem = true; + if (!StrictAlign) { + // Assume pre-ARMv6 doesn't support unaligned accesses. + // + // ARMv6 may or may not support unaligned accesses depending on the + // SCTLR.U bit, which is architecture-specific. We assume ARMv6 + // Darwin targets support unaligned accesses, and others don't. + // + // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit + // which raises an alignment fault on unaligned accesses. Linux + // defaults this bit to 0 and handles it as a system-wide (not + // per-process) setting. It is therefore safe to assume that ARMv7+ + // targets support unaligned accesses. + // + // The above behavior is consistent with GCC. + if (hasV7Ops() || (hasV6Ops() && isTargetDarwin())) + AllowsUnalignedMem = true; + } // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. uint64_t Bits = getFeatureBits(); diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 42c7d2c437..17c52c94a0 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -85,6 +85,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { + initAsmInfo(); if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " "support ARM mode execution!"); @@ -117,6 +118,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget.hasThumb2() ? new ARMFrameLowering(Subtarget) : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { + initAsmInfo(); } namespace { diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ac937f3534..d2896377cc 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -156,6 +156,9 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, @@ -920,6 +923,21 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, return S; } +static DecodeStatus +DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + if (RegNo == 15) + { + Inst.addOperand(MCOperand::CreateReg(ARM::APSR_NZCV)); + return MCDisassembler::Success; + } + + Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder)); + return S; +} + static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 7) diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 57239f8011..b858fff546 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -159,7 +159,7 @@ static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) { return X; } -static MCAsmInfo *createARMMCAsmInfo(StringRef TT) { +static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); if (TheTriple.isOSDarwin()) diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index 99d7a3a963..57044b27d6 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -35,6 +35,7 @@ add_llvm_target(HexagonCodeGen HexagonTargetObjectFile.cpp HexagonVLIWPacketizer.cpp HexagonNewValueJump.cpp + HexagonCopyToCombine.cpp ) add_subdirectory(TargetInfo) diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index 8e19c61f40..b88637ad57 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -44,6 +44,8 @@ namespace llvm { FunctionPass *createHexagonHardwareLoops(); FunctionPass *createHexagonPeephole(); FunctionPass *createHexagonFixupHwLoops(); + FunctionPass *createHexagonNewValueJump(); + FunctionPass *createHexagonCopyToCombine(); FunctionPass *createHexagonPacketizer(); FunctionPass *createHexagonNewValueJump(); diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp new file mode 100644 index 0000000000..dd63523291 --- /dev/null +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -0,0 +1,676 @@ +//===------- HexagonCopyToCombine.cpp - Hexagon Copy-To-Combine Pass ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass replaces transfer instructions by combine instructions. +// We walk along a basic block and look for two combinable instructions and try +// to move them together. If we can move them next to each other we do so and +// replace them with a combine instruction. +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "hexagon-copy-combine" + +#include "llvm/PassSupport.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "HexagonMachineFunctionInfo.h" + +using namespace llvm; + +static +cl::opt<bool> IsCombinesDisabled("disable-merge-into-combines", + cl::Hidden, cl::ZeroOrMore, + cl::init(false), + cl::desc("Disable merging into combines")); +static +cl::opt<unsigned> +MaxNumOfInstsBetweenNewValueStoreAndTFR("max-num-inst-between-tfr-and-nv-store", + cl::Hidden, cl::init(4), + cl::desc("Maximum distance between a tfr feeding a store we " + "consider the store still to be newifiable")); + +namespace llvm { + void initializeHexagonCopyToCombinePass(PassRegistry&); +} + + +namespace { + +class HexagonCopyToCombine : public MachineFunctionPass { + const HexagonInstrInfo *TII; + const TargetRegisterInfo *TRI; + bool ShouldCombineAggressively; + + DenseSet<MachineInstr *> PotentiallyNewifiableTFR; +public: + static char ID; + + HexagonCopyToCombine() : MachineFunctionPass(ID) { + initializeHexagonCopyToCombinePass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const { + return "Hexagon Copy-To-Combine Pass"; + } + + virtual bool runOnMachineFunction(MachineFunction &Fn); + +private: + MachineInstr *findPairable(MachineInstr *I1, bool &DoInsertAtI1); + + void findPotentialNewifiableTFRs(MachineBasicBlock &); + + void combine(MachineInstr *I1, MachineInstr *I2, + MachineBasicBlock::iterator &MI, bool DoInsertAtI1); + + bool isSafeToMoveTogether(MachineInstr *I1, MachineInstr *I2, + unsigned I1DestReg, unsigned I2DestReg, + bool &DoInsertAtI1); + + void emitCombineRR(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); + + void emitCombineRI(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); + + void emitCombineIR(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); + + void emitCombineII(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); +}; + +} // End anonymous namespace. + +char HexagonCopyToCombine::ID = 0; + +INITIALIZE_PASS(HexagonCopyToCombine, "hexagon-copy-combine", + "Hexagon Copy-To-Combine Pass", false, false) + +static bool isCombinableInstType(MachineInstr *MI, + const HexagonInstrInfo *TII, + bool ShouldCombineAggressively) { + switch(MI->getOpcode()) { + case Hexagon::TFR: { + // A COPY instruction can be combined if its arguments are IntRegs (32bit). + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isReg()); + + unsigned DestReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + return Hexagon::IntRegsRegClass.contains(DestReg) && + Hexagon::IntRegsRegClass.contains(SrcReg); + } + + case Hexagon::TFRI: { + // A transfer-immediate can be combined if its argument is a signed 8bit + // value. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + unsigned DestReg = MI->getOperand(0).getReg(); + + // Only combine constant extended TFRI if we are in aggressive mode. + return Hexagon::IntRegsRegClass.contains(DestReg) && + (ShouldCombineAggressively || isInt<8>(MI->getOperand(1).getImm())); + } + + case Hexagon::TFRI_V4: { + if (!ShouldCombineAggressively) + return false; + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isGlobal()); + + // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a + // workaround for an ABI bug that prevents GOT relocations on combine + // instructions + if (MI->getOperand(1).getTargetFlags() != HexagonII::MO_NO_FLAG) + return false; + + unsigned DestReg = MI->getOperand(0).getReg(); + return Hexagon::IntRegsRegClass.contains(DestReg); + } + + default: + break; + } + + return false; +} + +static bool isGreaterThan8BitTFRI(MachineInstr *I) { + return I->getOpcode() == Hexagon::TFRI && + !isInt<8>(I->getOperand(1).getImm()); +} +static bool isGreaterThan6BitTFRI(MachineInstr *I) { + return I->getOpcode() == Hexagon::TFRI && + !isUInt<6>(I->getOperand(1).getImm()); +} + +/// areCombinableOperations - Returns true if the two instruction can be merge +/// into a combine (ignoring register constraints). +static bool areCombinableOperations(const TargetRegisterInfo *TRI, + MachineInstr *HighRegInst, + MachineInstr *LowRegInst) { + assert((HighRegInst->getOpcode() == Hexagon::TFR || + HighRegInst->getOpcode() == Hexagon::TFRI || + HighRegInst->getOpcode() == Hexagon::TFRI_V4) && + (LowRegInst->getOpcode() == Hexagon::TFR || + LowRegInst->getOpcode() == Hexagon::TFRI || + LowRegInst->getOpcode() == Hexagon::TFRI_V4) && + "Assume individual instructions are of a combinable type"); + + const HexagonRegisterInfo *QRI = + static_cast<const HexagonRegisterInfo *>(TRI); + + // V4 added some combine variations (mixed immediate and register source + // operands), if we are on < V4 we can only combine 2 register-to-register + // moves and 2 immediate-to-register moves. We also don't have + // constant-extenders. + if (!QRI->Subtarget.hasV4TOps()) + return HighRegInst->getOpcode() == LowRegInst->getOpcode() && + !isGreaterThan8BitTFRI(HighRegInst) && + !isGreaterThan6BitTFRI(LowRegInst); + + // There is no combine of two constant extended values. + if ((HighRegInst->getOpcode() == Hexagon::TFRI_V4 || + isGreaterThan8BitTFRI(HighRegInst)) && + (LowRegInst->getOpcode() == Hexagon::TFRI_V4 || + isGreaterThan6BitTFRI(LowRegInst))) + return false; + + return true; +} + +static bool isEvenReg(unsigned Reg) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + Hexagon::IntRegsRegClass.contains(Reg)); + return (Reg - Hexagon::R0) % 2 == 0; +} + +static void removeKillInfo(MachineInstr *MI, unsigned RegNotKilled) { + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + MachineOperand &Op = MI->getOperand(I); + if (!Op.isReg() || Op.getReg() != RegNotKilled || !Op.isKill()) + continue; + Op.setIsKill(false); + } +} + +/// isUnsafeToMoveAccross - Returns true if it is unsafe to move a copy +/// instruction from \p UseReg to \p DestReg over the instruction \p I. +bool isUnsafeToMoveAccross(MachineInstr *I, unsigned UseReg, unsigned DestReg, + const TargetRegisterInfo *TRI) { + return (UseReg && (I->modifiesRegister(UseReg, TRI))) || + I->modifiesRegister(DestReg, TRI) || + I->readsRegister(DestReg, TRI) || + I->hasUnmodeledSideEffects() || + I->isInlineAsm() || I->isDebugValue(); +} + +/// isSafeToMoveTogether - Returns true if it is safe to move I1 next to I2 such +/// that the two instructions can be paired in a combine. +bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, + MachineInstr *I2, + unsigned I1DestReg, + unsigned I2DestReg, + bool &DoInsertAtI1) { + + bool IsImmUseReg = I2->getOperand(1).isImm() || I2->getOperand(1).isGlobal(); + unsigned I2UseReg = IsImmUseReg ? 0 : I2->getOperand(1).getReg(); + + // It is not safe to move I1 and I2 into one combine if I2 has a true + // dependence on I1. + if (I2UseReg && I1->modifiesRegister(I2UseReg, TRI)) + return false; + + bool isSafe = true; + + // First try to move I2 towards I1. + { + // A reverse_iterator instantiated like below starts before I2, and I1 + // respectively. + // Look at instructions I in between I2 and (excluding) I1. + MachineBasicBlock::reverse_iterator I(I2), + End = --(MachineBasicBlock::reverse_iterator(I1)); + // At 03 we got better results (dhrystone!) by being more conservative. + if (!ShouldCombineAggressively) + End = MachineBasicBlock::reverse_iterator(I1); + // If I2 kills its operand and we move I2 over an instruction that also + // uses I2's use reg we need to modify that (first) instruction to now kill + // this reg. + unsigned KilledOperand = 0; + if (I2->killsRegister(I2UseReg)) + KilledOperand = I2UseReg; + MachineInstr *KillingInstr = 0; + + for (; I != End; ++I) { + // If the intervening instruction I: + // * modifies I2's use reg + // * modifies I2's def reg + // * reads I2's def reg + // * or has unmodelled side effects + // we can't move I2 across it. + if (isUnsafeToMoveAccross(&*I, I2UseReg, I2DestReg, TRI)) { + isSafe = false; + break; + } + + // Update first use of the killed operand. + if (!KillingInstr && KilledOperand && + I->readsRegister(KilledOperand, TRI)) + KillingInstr = &*I; + } + if (isSafe) { + // Update the intermediate instruction to with the kill flag. + if (KillingInstr) { + bool Added = KillingInstr->addRegisterKilled(KilledOperand, TRI, true); + (void)Added; // supress compiler warning + assert(Added && "Must successfully update kill flag"); + removeKillInfo(I2, KilledOperand); + } + DoInsertAtI1 = true; + return true; + } + } + + // Try to move I1 towards I2. + { + // Look at instructions I in between I1 and (excluding) I2. + MachineBasicBlock::iterator I(I1), End(I2); + // At O3 we got better results (dhrystone) by being more conservative here. + if (!ShouldCombineAggressively) + End = llvm::next(MachineBasicBlock::iterator(I2)); + IsImmUseReg = I1->getOperand(1).isImm() || I1->getOperand(1).isGlobal(); + unsigned I1UseReg = IsImmUseReg ? 0 : I1->getOperand(1).getReg(); + // Track killed operands. If we move accross an instruction that kills our + // operand, we need to update the kill information on the moved I1. It kills + // the operand now. + MachineInstr *KillingInstr = 0; + unsigned KilledOperand = 0; + + while(++I != End) { + // If the intervening instruction I: + // * modifies I1's use reg + // * modifies I1's def reg + // * reads I1's def reg + // * or has unmodelled side effects + // We introduce this special case because llvm has no api to remove a + // kill flag for a register (a removeRegisterKilled() analogous to + // addRegisterKilled) that handles aliased register correctly. + // * or has a killed aliased register use of I1's use reg + // %D4<def> = TFRI64 16 + // %R6<def> = TFR %R9 + // %R8<def> = KILL %R8, %D4<imp-use,kill> + // If we want to move R6 = across the KILL instruction we would have + // to remove the %D4<imp-use,kill> operand. For now, we are + // conservative and disallow the move. + // we can't move I1 across it. + if (isUnsafeToMoveAccross(I, I1UseReg, I1DestReg, TRI) || + // Check for an aliased register kill. Bail out if we see one. + (!I->killsRegister(I1UseReg) && I->killsRegister(I1UseReg, TRI))) + return false; + + // Check for an exact kill (registers match). + if (I1UseReg && I->killsRegister(I1UseReg)) { + assert(KillingInstr == 0 && "Should only see one killing instruction"); + KilledOperand = I1UseReg; + KillingInstr = &*I; + } + } + if (KillingInstr) { + removeKillInfo(KillingInstr, KilledOperand); + // Update I1 to set the kill flag. This flag will later be picked up by + // the new COMBINE instruction. + bool Added = I1->addRegisterKilled(KilledOperand, TRI); + (void)Added; // supress compiler warning + assert(Added && "Must successfully update kill flag"); + } + DoInsertAtI1 = false; + } + + return true; +} + +/// findPotentialNewifiableTFRs - Finds tranfers that feed stores that could be +/// newified. (A use of a 64 bit register define can not be newified) +void +HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { + DenseMap<unsigned, MachineInstr *> LastDef; + for (MachineBasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { + MachineInstr *MI = I; + // Mark TFRs that feed a potential new value store as such. + if(TII->mayBeNewStore(MI)) { + // Look for uses of TFR instructions. + for (unsigned OpdIdx = 0, OpdE = MI->getNumOperands(); OpdIdx != OpdE; + ++OpdIdx) { + MachineOperand &Op = MI->getOperand(OpdIdx); + + // Skip over anything except register uses. + if (!Op.isReg() || !Op.isUse() || !Op.getReg()) + continue; + + // Look for the defining instruction. + unsigned Reg = Op.getReg(); + MachineInstr *DefInst = LastDef[Reg]; + if (!DefInst) + continue; + if (!isCombinableInstType(DefInst, TII, ShouldCombineAggressively)) + continue; + + // Only close newifiable stores should influence the decision. + MachineBasicBlock::iterator It(DefInst); + unsigned NumInstsToDef = 0; + while (&*It++ != MI) + ++NumInstsToDef; + + if (NumInstsToDef > MaxNumOfInstsBetweenNewValueStoreAndTFR) + continue; + + PotentiallyNewifiableTFR.insert(DefInst); + } + // Skip to next instruction. + continue; + } + + // Put instructions that last defined integer or double registers into the + // map. + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + MachineOperand &Op = MI->getOperand(I); + if (!Op.isReg() || !Op.isDef() || !Op.getReg()) + continue; + unsigned Reg = Op.getReg(); + if (Hexagon::DoubleRegsRegClass.contains(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + LastDef[*SubRegs] = MI; + } + } else if (Hexagon::IntRegsRegClass.contains(Reg)) + LastDef[Reg] = MI; + } + } +} + +bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { + + if (IsCombinesDisabled) return false; + + bool HasChanged = false; + + // Get target info. + TRI = MF.getTarget().getRegisterInfo(); + TII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo()); + + // Combine aggressively (for code size) + ShouldCombineAggressively = + MF.getTarget().getOptLevel() <= CodeGenOpt::Default; + + // Traverse basic blocks. + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; + ++BI) { + PotentiallyNewifiableTFR.clear(); + findPotentialNewifiableTFRs(*BI); + + // Traverse instructions in basic block. + for(MachineBasicBlock::iterator MI = BI->begin(), End = BI->end(); + MI != End;) { + MachineInstr *I1 = MI++; + // Don't combine a TFR whose user could be newified (instructions that + // define double registers can not be newified - Programmer's Ref Manual + // 5.4.2 New-value stores). + if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I1)) + continue; + + // Ignore instructions that are not combinable. + if (!isCombinableInstType(I1, TII, ShouldCombineAggressively)) + continue; + + // Find a second instruction that can be merged into a combine + // instruction. + bool DoInsertAtI1 = false; + MachineInstr *I2 = findPairable(I1, DoInsertAtI1); + if (I2) { + HasChanged = true; + combine(I1, I2, MI, DoInsertAtI1); + } + } + } + + return HasChanged; +} + +/// findPairable - Returns an instruction that can be merged with \p I1 into a +/// COMBINE instruction or 0 if no such instruction can be found. Returns true +/// in \p DoInsertAtI1 if the combine must be inserted at instruction \p I1 +/// false if the combine must be inserted at the returned instruction. +MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr *I1, + bool &DoInsertAtI1) { + MachineBasicBlock::iterator I2 = llvm::next(MachineBasicBlock::iterator(I1)); + unsigned I1DestReg = I1->getOperand(0).getReg(); + + for (MachineBasicBlock::iterator End = I1->getParent()->end(); I2 != End; + ++I2) { + // Bail out early if we see a second definition of I1DestReg. + if (I2->modifiesRegister(I1DestReg, TRI)) + break; + + // Ignore non-combinable instructions. + if (!isCombinableInstType(I2, TII, ShouldCombineAggressively)) + continue; + + // Don't combine a TFR whose user could be newified. + if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I2)) + continue; + + unsigned I2DestReg = I2->getOperand(0).getReg(); + + // Check that registers are adjacent and that the first destination register + // is even. + bool IsI1LowReg = (I2DestReg - I1DestReg) == 1; + bool IsI2LowReg = (I1DestReg - I2DestReg) == 1; + unsigned FirstRegIndex = IsI1LowReg ? I1DestReg : I2DestReg; + if ((!IsI1LowReg && !IsI2LowReg) || !isEvenReg(FirstRegIndex)) + continue; + + // Check that the two instructions are combinable. V4 allows more + // instructions to be merged into a combine. + // The order matters because in a TFRI we might can encode a int8 as the + // hi reg operand but only a uint6 as the low reg operand. + if ((IsI2LowReg && !areCombinableOperations(TRI, I1, I2)) || + (IsI1LowReg && !areCombinableOperations(TRI, I2, I1))) + break; + + if (isSafeToMoveTogether(I1, I2, I1DestReg, I2DestReg, + DoInsertAtI1)) + return I2; + + // Not safe. Stop searching. + break; + } + return 0; +} + +void HexagonCopyToCombine::combine(MachineInstr *I1, MachineInstr *I2, + MachineBasicBlock::iterator &MI, + bool DoInsertAtI1) { + // We are going to delete I2. If MI points to I2 advance it to the next + // instruction. + if ((MachineInstr *)MI == I2) ++MI; + + // Figure out whether I1 or I2 goes into the lowreg part. + unsigned I1DestReg = I1->getOperand(0).getReg(); + unsigned I2DestReg = I2->getOperand(0).getReg(); + bool IsI1Loreg = (I2DestReg - I1DestReg) == 1; + unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg; + + // Get the double word register. + unsigned DoubleRegDest = + TRI->getMatchingSuperReg(LoRegDef, Hexagon::subreg_loreg, + &Hexagon::DoubleRegsRegClass); + assert(DoubleRegDest != 0 && "Expect a valid register"); + + + // Setup source operands. + MachineOperand &LoOperand = IsI1Loreg ? I1->getOperand(1) : + I2->getOperand(1); + MachineOperand &HiOperand = IsI1Loreg ? I2->getOperand(1) : + I1->getOperand(1); + + // Figure out which source is a register and which a constant. + bool IsHiReg = HiOperand.isReg(); + bool IsLoReg = LoOperand.isReg(); + + MachineBasicBlock::iterator InsertPt(DoInsertAtI1 ? I1 : I2); + // Emit combine. + if (IsHiReg && IsLoReg) + emitCombineRR(InsertPt, DoubleRegDest, HiOperand, LoOperand); + else if (IsHiReg) + emitCombineRI(InsertPt, DoubleRegDest, HiOperand, LoOperand); + else if (IsLoReg) + emitCombineIR(InsertPt, DoubleRegDest, HiOperand, LoOperand); + else + emitCombineII(InsertPt, DoubleRegDest, HiOperand, LoOperand); + + I1->eraseFromParent(); + I2->eraseFromParent(); +} + +void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Handle globals. + if (HiOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg) + .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addImm(LoOperand.getImm()); + return; + } + if (LoOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_iI_V4), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + + // Handle constant extended immediates. + if (!isInt<8>(HiOperand.getImm())) { + assert(isInt<8>(LoOperand.getImm())); + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addImm(LoOperand.getImm()); + return; + } + + if (!isUInt<6>(LoOperand.getImm())) { + assert(isInt<8>(HiOperand.getImm())); + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_iI_V4), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addImm(LoOperand.getImm()); + return; + } + + // Insert new combine instruction. + // DoubleRegDest = combine #HiImm, #LoImm + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addImm(LoOperand.getImm()); +} + +void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + unsigned LoReg = LoOperand.getReg(); + unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill()); + + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Handle global. + if (HiOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ir_V4), DoubleDestReg) + .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addReg(LoReg, LoRegKillFlag); + return; + } + // Insert new combine instruction. + // DoubleRegDest = combine #HiImm, LoReg + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ir_V4), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addReg(LoReg, LoRegKillFlag); +} + +void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill()); + unsigned HiReg = HiOperand.getReg(); + + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Handle global. + if (LoOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rI_V4), DoubleDestReg) + .addReg(HiReg, HiRegKillFlag) + .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + + // Insert new combine instruction. + // DoubleRegDest = combine HiReg, #LoImm + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rI_V4), DoubleDestReg) + .addReg(HiReg, HiRegKillFlag) + .addImm(LoOperand.getImm()); +} + +void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill()); + unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill()); + unsigned LoReg = LoOperand.getReg(); + unsigned HiReg = HiOperand.getReg(); + + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Insert new combine instruction. + // DoubleRegDest = combine HiReg, LoReg + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rr), DoubleDestReg) + .addReg(HiReg, HiRegKillFlag) + .addReg(LoReg, LoRegKillFlag); +} + +FunctionPass *llvm::createHexagonCopyToCombine() { + return new HexagonCopyToCombine(); +} diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index d1e32c65ad..c96aaca8f8 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -384,6 +384,12 @@ def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1), // ALU32/PERM + //===----------------------------------------------------------------------===// +let neverHasSideEffects = 1 in +def COMBINE_ii : ALU32_ii<(outs DoubleRegs:$dst), + (ins s8Imm:$src1, s8Imm:$src2), + "$dst = combine(#$src1, #$src2)", + []>; + // Mux. def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, DoubleRegs:$src2, diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 022a7f6136..fee83fb811 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -3188,6 +3188,93 @@ def STriw_offset_ext_V4 : STInst<(outs), (add IntRegs:$src1, u6_2ImmPred:$src2))]>, Requires<[HasV4T]>; +def : Pat<(i64 (ctlz (i64 DoubleRegs:$src1))), + (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTLZ64_rr DoubleRegs:$src1))))>, + Requires<[HasV4T]>; + +def : Pat<(i64 (cttz (i64 DoubleRegs:$src1))), + (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTTZ64_rr DoubleRegs:$src1))))>, + Requires<[HasV4T]>; + + +// i8 -> i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi8. +let Predicates = [HasV4T], AddedComplexity = 120 in { +def: Pat <(i64 (extloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 tglobaladdr:$addr)))>; + +def: Pat <(i64 (zextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 tglobaladdr:$addr)))>; + +def: Pat <(i64 (sextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (SXTW (LDrib_abs_V4 tglobaladdr:$addr)))>; + +def: Pat <(i64 (extloadi8 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 FoldGlobalAddr:$addr)))>; + +def: Pat <(i64 (zextloadi8 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 FoldGlobalAddr:$addr)))>; + +def: Pat <(i64 (sextloadi8 FoldGlobalAddr:$addr)), + (i64 (SXTW (LDrib_abs_V4 FoldGlobalAddr:$addr)))>; +} +// i16 -> i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi16. +let AddedComplexity = 120 in { +def: Pat <(i64 (extloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (SXTW (LDrih_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (extloadi16 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi16 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi16 FoldGlobalAddr:$addr)), + (i64 (SXTW (LDrih_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; +} +// i32->i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi32. +let AddedComplexity = 120 in { +def: Pat <(i64 (extloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (SXTW (LDriw_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (extloadi32 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi32 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi32 FoldGlobalAddr:$addr)), + (i64 (SXTW (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; +} // Indexed store double word - global address. // memw(Rs+#u6:2)=#S8 diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index 05e696865f..f7c4513213 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -631,6 +631,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { .addMBB(jmpTarget); assert(NewMI && "New Value Jump Instruction Not created!"); + (void)NewMI; if (cmpInstr->getOperand(0).isReg() && cmpInstr->getOperand(0).isKill()) cmpInstr->getOperand(0).setIsKill(false); diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index dc44b34cff..676dff2a4a 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -79,6 +79,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget), InstrItins(&Subtarget.getInstrItineraryData()) { setMCUseCFI(false); + initAsmInfo(); } // addPassesForOptimizations - Allow the backend (target) to add Target @@ -161,6 +162,7 @@ bool HexagonPassConfig::addPreSched2() { HexagonTargetObjectFile &TLOF = (HexagonTargetObjectFile&)(getTargetLowering()->getObjFileLowering()); + addPass(createHexagonCopyToCombine()); if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); if (!TLOF.IsSmallDataEnabled()) { @@ -168,9 +170,6 @@ bool HexagonPassConfig::addPreSched2() { printAndVerify("After hexagon split const32/64 pass"); } return true; - if (getOptLevel() != CodeGenOpt::None) - addPass(&IfConverterID); - return false; } bool HexagonPassConfig::addPreEmitPass() { diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index c508d124b3..59b4fabe01 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -837,16 +837,38 @@ bool HexagonPacketizerList::RestrictingDepExistInPacket (MachineInstr* MI, } +/// Gets the predicate register of a predicated instruction. +unsigned getPredicatedRegister(MachineInstr *MI, const HexagonInstrInfo *QII) { + /// We use the following rule: The first predicate register that is a use is + /// the predicate register of a predicated instruction. + + assert(QII->isPredicated(MI) && "Must be predicated instruction"); + + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + MachineOperand &Op = *OI; + if (Op.isReg() && Op.getReg() && Op.isUse() && + Hexagon::PredRegsRegClass.contains(Op.getReg())) + return Op.getReg(); + } + + llvm_unreachable("Unknown instruction operand layout"); + + return 0; +} + // Given two predicated instructions, this function detects whether // the predicates are complements bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1, MachineInstr* MI2, std::map <MachineInstr*, SUnit*> MIToSUnit) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - // Currently can only reason about conditional transfers - if (!QII->isConditionalTransfer(MI1) || !QII->isConditionalTransfer(MI2)) { + + // If we don't know the predicate sense of the instructions bail out early, we + // need it later. + if (getPredicateSense(MI1, QII) == PK_Unknown || + getPredicateSense(MI2, QII) == PK_Unknown) return false; - } // Scheduling unit for candidate SUnit* SU = MIToSUnit[MI1]; @@ -885,9 +907,9 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1, // there already exist anti dep on the same pred in // the packet. if (PacketSU->Succs[i].getSUnit() == SU && + PacketSU->Succs[i].getKind() == SDep::Data && Hexagon::PredRegsRegClass.contains( PacketSU->Succs[i].getReg()) && - PacketSU->Succs[i].getKind() == SDep::Data && // Here I know that *VIN is predicate setting instruction // with true data dep to candidate on the register // we care about - c) in the above example. @@ -908,7 +930,11 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1, // that the predicate sense is different // We also need to differentiate .old vs. .new: // !p0 is not complimentary to p0.new - return ((MI1->getOperand(1).getReg() == MI2->getOperand(1).getReg()) && + unsigned PReg1 = getPredicatedRegister(MI1, QII); + unsigned PReg2 = getPredicatedRegister(MI2, QII); + return ((PReg1 == PReg2) && + Hexagon::PredRegsRegClass.contains(PReg1) && + Hexagon::PredRegsRegClass.contains(PReg2) && (getPredicateSense(MI1, QII) != getPredicateSense(MI2, QII)) && (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2))); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 273bc22b8e..2f93a5299c 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -54,13 +54,14 @@ static MCSubtargetInfo *createHexagonMCSubtargetInfo(StringRef TT, return X; } -static MCAsmInfo *createHexagonMCAsmInfo(StringRef TT) { +static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI, + StringRef TT) { MCAsmInfo *MAI = new HexagonMCAsmInfo(TT); // VirtualFP = (R30 + #0). - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(Hexagon::R30, 0); - MAI->addInitialFrameState(0, Dst, Src); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa( + 0, Hexagon::R30, 0); + MAI->addInitialFrameState(Inst); return MAI; } diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index bcdd32fed9..c75895575d 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -43,6 +43,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { + initAsmInfo(); } namespace { diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp index ec76dba491..5bc0668f35 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp @@ -53,7 +53,7 @@ static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -static MCAsmInfo *createMCAsmInfo(StringRef TT) { +static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { default: diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 164e351df9..6710a09707 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -36,7 +36,9 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, // FIXME: Check DataLayout string. DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget) { } + FrameLowering(Subtarget) { + initAsmInfo(); +} namespace { /// MSP430 Code Generator Pass Configuration Options. diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 26694ffdac..837fabee76 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -93,12 +93,12 @@ static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -static MCAsmInfo *createMipsMCAsmInfo(StringRef TT) { +static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { MCAsmInfo *MAI = new MipsMCAsmInfo(TT); - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(Mips::SP, 0); - MAI->addInitialFrameState(0, Dst, Src); + unsigned SP = MRI.getDwarfRegNum(Mips::SP, true); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, SP, 0); + MAI->addInitialFrameState(Inst); return MAI; } diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index 4d1e61bb99..cc7324f26e 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -18,6 +18,36 @@ #include "llvm/Support/raw_ostream.h" #include <string> +static void inlineAsmOut + (LLVMContext &C, StringRef AsmString, BasicBlock *BB ) { + std::vector<llvm::Type *> AsmArgTypes; + std::vector<llvm::Value*> AsmArgs; + llvm::FunctionType *AsmFTy = + llvm::FunctionType::get(Type::getVoidTy(C), + AsmArgTypes, false); + llvm::InlineAsm *IA = + llvm::InlineAsm::get(AsmFTy, AsmString, "", true, + /* IsAlignStack */ false, + llvm::InlineAsm::AD_ATT); + CallInst::Create(IA, AsmArgs, "", BB); +} + +namespace { + +class InlineAsmHelper { + LLVMContext &C; + BasicBlock *BB; +public: + InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) : + C(C_), BB(BB_) { + } + + void Out(StringRef AsmString) { + inlineAsmOut(C, AsmString, BB); + } + +}; +} // // Return types that matter for hard float are: // float, double, complex float, and complex double @@ -52,6 +82,243 @@ static FPReturnVariant whichFPReturnVariant(Type *T) { } // +// Parameter type that matter are float, (float, float), (float, double), +// double, (double, double), (double, float) +// +enum FPParamVariant { + FSig, FFSig, FDSig, + DSig, DDSig, DFSig, NoSig +}; + +// which floating point parameter signature variant we are dealing with +// +typedef Type::TypeID TypeID; +const Type::TypeID FloatTyID = Type::FloatTyID; +const Type::TypeID DoubleTyID = Type::DoubleTyID; + +static FPParamVariant whichFPParamVariantNeeded(Function &F) { + switch (F.arg_size()) { + case 0: + return NoSig; + case 1:{ + TypeID ArgTypeID = F.getFunctionType()->getParamType(0)->getTypeID(); + switch (ArgTypeID) { + case FloatTyID: + return FSig; + case DoubleTyID: + return DSig; + default: + return NoSig; + } + } + default: { + TypeID ArgTypeID0 = F.getFunctionType()->getParamType(0)->getTypeID(); + TypeID ArgTypeID1 = F.getFunctionType()->getParamType(1)->getTypeID(); + switch(ArgTypeID0) { + case FloatTyID: { + switch (ArgTypeID1) { + case FloatTyID: + return FFSig; + case DoubleTyID: + return FDSig; + default: + return FSig; + } + } + case DoubleTyID: { + switch (ArgTypeID1) { + case FloatTyID: + return DFSig; + case DoubleTyID: + return DDSig; + default: + return DSig; + } + } + default: + return NoSig; + } + } + } + llvm_unreachable("can't get here"); +} + +// Figure out if we need float point based on the function parameters. +// We need to move variables in and/or out of floating point +// registers because of the ABI +// +static bool needsFPStubFromParams(Function &F) { + if (F.arg_size() >=1) { + Type *ArgType = F.getFunctionType()->getParamType(0); + switch (ArgType->getTypeID()) { + case Type::FloatTyID: + case Type::DoubleTyID: + return true; + default: + break; + } + } + return false; +} + +static bool needsFPReturnHelper(Function &F) { + Type* RetType = F.getReturnType(); + return whichFPReturnVariant(RetType) != NoFPRet; +} + +static bool needsFPHelperFromSig(Function &F) { + return needsFPStubFromParams(F) || needsFPReturnHelper(F); +} + +// +// We swap between FP and Integer registers to allow Mips16 and Mips32 to +// interoperate +// + +static void swapFPIntParams + (FPParamVariant PV, Module *M, InlineAsmHelper &IAH, + bool LE, bool ToFP) { + //LLVMContext &Context = M->getContext(); + std::string MI = ToFP? "mtc1 ": "mfc1 "; + switch (PV) { + case FSig: + IAH.Out(MI + "$$4,$$f12"); + break; + case FFSig: + IAH.Out(MI +"$$4,$$f12"); + IAH.Out(MI + "$$5,$$f14"); + break; + case FDSig: + IAH.Out(MI + "$$4,$$f12"); + if (LE) { + IAH.Out(MI + "$$6,$$f14"); + IAH.Out(MI + "$$7,$$f15"); + } else { + IAH.Out(MI + "$$7,$$f14"); + IAH.Out(MI + "$$6,$$f15"); + } + break; + case DSig: + if (LE) { + IAH.Out(MI + "$$4,$$f12"); + IAH.Out(MI + "$$5,$$f13"); + } else { + IAH.Out(MI + "$$5,$$f12"); + IAH.Out(MI + "$$4,$$f13"); + } + break; + case DDSig: + if (LE) { + IAH.Out(MI + "$$4,$$f12"); + IAH.Out(MI + "$$5,$$f13"); + IAH.Out(MI + "$$6,$$f14"); + IAH.Out(MI + "$$7,$$f15"); + } else { + IAH.Out(MI + "$$5,$$f12"); + IAH.Out(MI + "$$4,$$f13"); + IAH.Out(MI + "$$7,$$f14"); + IAH.Out(MI + "$$6,$$f15"); + } + break; + case DFSig: + if (LE) { + IAH.Out(MI + "$$4,$$f12"); + IAH.Out(MI + "$$5,$$f13"); + } else { + IAH.Out(MI + "$$5,$$f12"); + IAH.Out(MI + "$$4,$$f13"); + } + IAH.Out(MI + "$$6,$$f14"); + break; + case NoSig: + return; + } +} +// +// Make sure that we know we already need a stub for this function. +// Having called needsFPHelperFromSig +// +static void assureFPCallStub(Function &F, Module *M, + const MipsSubtarget &Subtarget){ + // for now we only need them for static relocation + if (Subtarget.getRelocationModel() == Reloc::PIC_) + return; + LLVMContext &Context = M->getContext(); + bool LE = Subtarget.isLittle(); + std::string Name = F.getName(); + std::string SectionName = ".mips16.call.fp." + Name; + std::string StubName = "__call_stub_" + Name; + // + // see if we already have the stub + // + Function *FStub = M->getFunction(StubName); + if (FStub && !FStub->isDeclaration()) return; + FStub = Function::Create(F.getFunctionType(), + Function::InternalLinkage, StubName, M); + FStub->addFnAttr("mips16_fp_stub"); + FStub->addFnAttr(llvm::Attribute::Naked); + FStub->addFnAttr(llvm::Attribute::NoUnwind); + FStub->addFnAttr("nomips16"); + FStub->setSection(SectionName); + BasicBlock *BB = BasicBlock::Create(Context, "entry", FStub); + InlineAsmHelper IAH(Context, BB); + FPReturnVariant RV = whichFPReturnVariant(FStub->getReturnType()); + FPParamVariant PV = whichFPParamVariantNeeded(F); + swapFPIntParams(PV, M, IAH, LE, true); + if (RV != NoFPRet) { + IAH.Out("move $$18, $$31"); + IAH.Out("jal " + Name); + } else { + IAH.Out("lui $$25,%hi(" + Name + ")"); + IAH.Out("addiu $$25,$$25,%lo(" + Name + ")" ); + } + switch (RV) { + case FRet: + IAH.Out("mfc1 $$2,$$f0"); + break; + case DRet: + if (LE) { + IAH.Out("mfc1 $$2,$$f0"); + IAH.Out("mfc1 $$3,$$f1"); + } else { + IAH.Out("mfc1 $$3,$$f0"); + IAH.Out("mfc1 $$2,$$f1"); + } + break; + case CFRet: + if (LE) { + IAH.Out("mfc1 $$2,$$f0"); + IAH.Out("mfc1 $$3,$$f2"); + } else { + IAH.Out("mfc1 $$3,$$f0"); + IAH.Out("mfc1 $$3,$$f2"); + } + break; + case CDRet: + if (LE) { + IAH.Out("mfc1 $$4,$$f2"); + IAH.Out("mfc1 $$5,$$f3"); + IAH.Out("mfc1 $$2,$$f0"); + IAH.Out("mfc1 $$3,$$f1"); + + } else { + IAH.Out("mfc1 $$5,$$f2"); + IAH.Out("mfc1 $$4,$$f3"); + IAH.Out("mfc1 $$3,$$f0"); + IAH.Out("mfc1 $$2,$$f1"); + } + break; + case NoFPRet: + break; + } + if (RV != NoFPRet) + IAH.Out("jr $$18"); + else + IAH.Out("jr $$25"); + new UnreachableInst(Context, BB); +} + +// // Returns of float, double and complex need to be handled with a helper // function. The "AndCal" part is coming in a later patch. // @@ -96,6 +363,16 @@ static bool fixupFPReturnAndCall Attribute::ReadNone); Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, NULL)); CallInst::Create(F, Params, "", &Inst ); + } else if (const CallInst *CI = dyn_cast<CallInst>(I)) { + // pic mode calls are handled by already defined + // helper functions + if (Subtarget.getRelocationModel() != Reloc::PIC_ ) { + Function *F_ = CI->getCalledFunction(); + if (F_ && needsFPHelperFromSig(*F_)) { + assureFPCallStub(*F_, M, Subtarget); + Modified=true; + } + } } } return Modified; diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 17dd2c0796..ab9e62703b 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -145,7 +145,7 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { /// GetOppositeBranchOpc - Return the inverse of the specified /// opcode, e.g. turning BEQ to BNE. -unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const { +unsigned Mips16InstrInfo::getOppositeBranchOpc(unsigned Opc) const { switch (Opc) { default: llvm_unreachable("Illegal opcode!"); case Mips::BeqzRxImmX16: return Mips::BnezRxImmX16; @@ -380,7 +380,7 @@ Mips16InstrInfo::loadImmediate(unsigned FrameReg, return Reg; } -unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { +unsigned Mips16InstrInfo::getAnalyzableBrOpc(unsigned Opc) const { return (Opc == Mips::BeqzRxImmX16 || Opc == Mips::BimmX16 || Opc == Mips::BnezRxImmX16 || Opc == Mips::BteqzX16 || Opc == Mips::BteqzT8CmpX16 || Opc == Mips::BteqzT8CmpiX16 || diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index a77a9043bb..a3bd31e94f 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -64,7 +64,7 @@ public: virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; - virtual unsigned GetOppositeBranchOpc(unsigned Opc) const; + virtual unsigned getOppositeBranchOpc(unsigned Opc) const; // Adjust SP by FrameSize bytes. Save RA, S0, S1 void makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB, @@ -102,7 +102,7 @@ public: (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const; private: - virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; + virtual unsigned getAnalyzableBrOpc(unsigned Opc) const; void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opc) const; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 6b23057c9c..5fa79cb159 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -154,6 +154,7 @@ class LW_FT<string opstr, RegisterClass RC, InstrItinClass Itin, InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI> { let DecoderMethod = "DecodeFMem"; + let mayLoad = 1; } class SW_FT<string opstr, RegisterClass RC, InstrItinClass Itin, @@ -161,6 +162,7 @@ class SW_FT<string opstr, RegisterClass RC, InstrItinClass Itin, InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI> { let DecoderMethod = "DecodeFMem"; + let mayStore = 1; } class MADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin, @@ -314,8 +316,12 @@ let Predicates = [NotN64, HasMips64, HasStdEnc], } let Predicates = [NotN64, NotMips64, HasStdEnc] in { - def LDC1 : LW_FT<"ldc1", AFGR64, IILoad, mem, load>, LW_FM<0x35>; - def SDC1 : SW_FT<"sdc1", AFGR64, IIStore, mem, store>, LW_FM<0x3d>; + let isPseudo = 1, isCodeGenOnly = 1 in { + def PseudoLDC1 : LW_FT<"", AFGR64, IILoad, mem, load>; + def PseudoSDC1 : SW_FT<"", AFGR64, IIStore, mem, store>; + } + def LDC1 : LW_FT<"ldc1", AFGR64, IILoad, mem>, LW_FM<0x35>; + def SDC1 : SW_FT<"sdc1", AFGR64, IIStore, mem>, LW_FM<0x3d>; } // Indexed loads and stores. @@ -523,7 +529,7 @@ let AddedComplexity = 40 in { } let Predicates = [NotN64, NotMips64, HasStdEnc] in { - def : LoadRegImmPat<LDC1, f64, load>; - def : StoreRegImmPat<SDC1, f64>; + def : LoadRegImmPat<PseudoLDC1, f64, load>; + def : StoreRegImmPat<PseudoSDC1, f64>; } } diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index ad92d41209..3144daebd7 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -77,7 +77,7 @@ MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, void MipsInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, MachineBasicBlock *&BB, SmallVectorImpl<MachineOperand> &Cond) const { - assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch"); + assert(getAnalyzableBrOpc(Opc) && "Not an analyzable branch"); int NumOp = Inst->getNumExplicitOperands(); // for both int and fp branches, the last explicit operand is the @@ -167,7 +167,7 @@ RemoveBranch(MachineBasicBlock &MBB) const // Up to 2 branches are removed. // Note that indirect branches are not removed. for(removed = 0; I != REnd && removed < 2; ++I, ++removed) - if (!GetAnalyzableBrOpc(I->getOpcode())) + if (!getAnalyzableBrOpc(I->getOpcode())) break; MBB.erase(I.base(), FirstBr.base()); @@ -182,7 +182,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert( (Cond.size() && Cond.size() <= 3) && "Invalid Mips branch condition!"); - Cond[0].setImm(GetOppositeBranchOpc(Cond[0].getImm())); + Cond[0].setImm(getOppositeBranchOpc(Cond[0].getImm())); return false; } @@ -210,7 +210,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, BranchInstrs.push_back(LastInst); // Not an analyzable branch (e.g., indirect jump). - if (!GetAnalyzableBrOpc(LastOpc)) + if (!getAnalyzableBrOpc(LastOpc)) return LastInst->isIndirectBranch() ? BT_Indirect : BT_None; // Get the second to last instruction in the block. @@ -219,7 +219,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, if (++I != REnd) { SecondLastInst = &*I; - SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode()); + SecondLastOpc = getAnalyzableBrOpc(SecondLastInst->getOpcode()); // Not an analyzable branch (must be an indirect jump). if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc) @@ -282,3 +282,16 @@ unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { } } } + +MachineInstrBuilder +MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, + MachineBasicBlock::iterator I) const { + MachineInstrBuilder MIB; + MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); + + for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) + MIB.addOperand(I->getOperand(J)); + + MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end()); + return MIB; +} diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 8c05d97bea..0f075ec6d0 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -17,6 +17,7 @@ #include "Mips.h" #include "MipsAnalyzeImmediate.h" #include "MipsRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" @@ -81,7 +82,7 @@ public: /// virtual const MipsRegisterInfo &getRegisterInfo() const = 0; - virtual unsigned GetOppositeBranchOpc(unsigned Opc) const = 0; + virtual unsigned getOppositeBranchOpc(unsigned Opc) const = 0; /// Return the number of bytes of code the specified instruction may be. unsigned GetInstSizeInBytes(const MachineInstr *MI) const; @@ -116,6 +117,11 @@ public: const TargetRegisterInfo *TRI, int64_t Offset) const = 0; + /// Create an instruction which has the same operands and memory operands + /// as MI but has a new opcode. + MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc, + MachineBasicBlock::iterator I) const; + protected: bool isZeroImm(const MachineOperand &op) const; @@ -123,7 +129,7 @@ protected: unsigned Flag) const; private: - virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const = 0; + virtual unsigned getAnalyzableBrOpc(unsigned Opc) const = 0; void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, MachineBasicBlock *&BB, diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 3d319373fe..5ada1df267 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -1095,7 +1095,8 @@ def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>; def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>; - +def : InstAlias<"addiu $rs, $imm", + (ADDiu CPURegsOpnd:$rs, CPURegsOpnd:$rs, simm16:$imm), 0>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index bf5ad37031..daabf3d25a 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -217,7 +217,7 @@ int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) { // MachineBasicBlock operand MBBOpnd. void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br, DebugLoc DL, MachineBasicBlock *MBBOpnd) { - unsigned NewOpc = TII->GetOppositeBranchOpc(Br->getOpcode()); + unsigned NewOpc = TII->getOppositeBranchOpc(Br->getOpcode()); const MCInstrDesc &NewDesc = TII->get(NewOpc); MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc); diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index a0768e51c0..12ed1bc186 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -18,11 +18,17 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; +static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), + cl::desc("Expand double precision loads and " + "stores to their single precision " + "counterparts.")); + MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm) : MipsInstrInfo(tm, tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J), @@ -245,17 +251,23 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { default: return false; case Mips::RetRA: - ExpandRetRA(MBB, MI, Mips::RET); + expandRetRA(MBB, MI, Mips::RET); break; case Mips::BuildPairF64: - ExpandBuildPairF64(MBB, MI); + expandBuildPairF64(MBB, MI); break; case Mips::ExtractElementF64: - ExpandExtractElementF64(MBB, MI); + expandExtractElementF64(MBB, MI); + break; + case Mips::PseudoLDC1: + expandDPLoadStore(MBB, MI, Mips::LDC1, Mips::LWC1); + break; + case Mips::PseudoSDC1: + expandDPLoadStore(MBB, MI, Mips::SDC1, Mips::SWC1); break; case Mips::MIPSeh_return32: case Mips::MIPSeh_return64: - ExpandEhReturn(MBB, MI); + expandEhReturn(MBB, MI); break; } @@ -263,9 +275,9 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { return true; } -/// GetOppositeBranchOpc - Return the inverse of the specified +/// getOppositeBranchOpc - Return the inverse of the specified /// opcode, e.g. turning BEQ to BNE. -unsigned MipsSEInstrInfo::GetOppositeBranchOpc(unsigned Opc) const { +unsigned MipsSEInstrInfo::getOppositeBranchOpc(unsigned Opc) const { switch (Opc) { default: llvm_unreachable("Illegal opcode!"); case Mips::BEQ: return Mips::BNE; @@ -346,7 +358,7 @@ MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, return Reg; } -unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { +unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const { return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || @@ -356,13 +368,13 @@ unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { Opc : 0; } -void MipsSEInstrInfo::ExpandRetRA(MachineBasicBlock &MBB, +void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opc) const { BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA); } -void MipsSEInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB, +void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { unsigned DstReg = I->getOperand(0).getReg(); unsigned SrcReg = I->getOperand(1).getReg(); @@ -377,7 +389,7 @@ void MipsSEInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB, BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg); } -void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB, +void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { unsigned DstReg = I->getOperand(0).getReg(); unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); @@ -393,7 +405,57 @@ void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB, .addReg(HiReg); } -void MipsSEInstrInfo::ExpandEhReturn(MachineBasicBlock &MBB, +/// Add 4 to the displacement of operand MO. +static void fixDisp(MachineOperand &MO) { + switch (MO.getType()) { + default: + llvm_unreachable("Unhandled operand type."); + case MachineOperand::MO_Immediate: + MO.setImm(MO.getImm() + 4); + break; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_TargetIndex: + case MachineOperand::MO_ExternalSymbol: + MO.setOffset(MO.getOffset() + 4); + break; + } +} + +void MipsSEInstrInfo::expandDPLoadStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned OpcD, unsigned OpcS) const { + // If NoDPLoadStore is false, just change the opcode. + if (!NoDPLoadStore) { + genInstrWithNewOpc(OpcD, I); + return; + } + + // Expand a double precision FP load or store to two single precision + // instructions. + + const TargetRegisterInfo &TRI = getRegisterInfo(); + const MachineOperand &ValReg = I->getOperand(0); + unsigned LoReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_fpeven); + unsigned HiReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_fpodd); + + if (!TM.getSubtarget<MipsSubtarget>().isLittle()) + std::swap(LoReg, HiReg); + + // Create an instruction which loads from or stores to the lower memory + // address. + MachineInstrBuilder MIB = genInstrWithNewOpc(OpcS, I); + MIB->getOperand(0).setReg(LoReg); + + // Create an instruction which loads from or stores to the higher memory + // address. + MIB = genInstrWithNewOpc(OpcS, I); + MIB->getOperand(0).setReg(HiReg); + fixDisp(MIB->getOperand(2)); +} + +void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { // This pseudo instruction is generated as part of the lowering of // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index 0bf7876f0f..416fff8a60 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -65,7 +65,7 @@ public: virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; - virtual unsigned GetOppositeBranchOpc(unsigned Opc) const; + virtual unsigned getOppositeBranchOpc(unsigned Opc) const; /// Adjust SP by Amount bytes. void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, @@ -79,15 +79,18 @@ public: unsigned *NewImm) const; private: - virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; + virtual unsigned getAnalyzableBrOpc(unsigned Opc) const; - void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opc) const; - void ExpandExtractElementF64(MachineBasicBlock &MBB, + void expandExtractElementF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void ExpandBuildPairF64(MachineBasicBlock &MBB, + void expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void ExpandEhReturn(MachineBasicBlock &MBB, + void expandDPLoadStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, unsigned OpcD, + unsigned OpcS) const; + void expandEhReturn(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; }; diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index a876f1c7f0..89407351a0 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -72,6 +72,7 @@ MipsTargetMachine(const Target &T, StringRef TT, FrameLowering(MipsFrameLowering::create(*this, Subtarget)), TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() { + initAsmInfo(); } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 67ca6b58e5..5f35edf219 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -72,7 +72,9 @@ NVPTXTargetMachine::NVPTXTargetMachine( Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()), InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering( - *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {} + *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { + initAsmInfo(); +} void NVPTXTargetMachine32::anchor() {} diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 71803cdac9..e5c5204708 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -27,6 +27,7 @@ add_llvm_target(PowerPCCodeGen PPCRegisterInfo.cpp PPCSubtarget.cpp PPCTargetMachine.cpp + PPCTargetObjectFile.cpp PPCTargetTransformInfo.cpp PPCSelectionDAGInfo.cpp ) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index ec2657403e..b1ac4a6f27 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -22,7 +22,7 @@ #include "llvm/Support/TargetRegistry.h" using namespace llvm; -static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { +static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { switch (Kind) { default: llvm_unreachable("Unknown fixup kind!"); @@ -50,6 +50,29 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { } } +static unsigned getFixupKindNumBytes(unsigned Kind) { + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + case FK_Data_1: + return 1; + case FK_Data_2: + case PPC::fixup_ppc_ha16: + case PPC::fixup_ppc_lo16: + case PPC::fixup_ppc_lo16_ds: + return 2; + case FK_Data_4: + case PPC::fixup_ppc_brcond14: + case PPC::fixup_ppc_br24: + return 4; + case FK_Data_8: + return 8; + case PPC::fixup_ppc_tlsreg: + case PPC::fixup_ppc_nofixup: + return 0; + } +} + namespace { class PPCMachObjectWriter : public MCMachObjectTargetWriter { public: @@ -77,9 +100,9 @@ public: // name offset bits flags { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_ppc_lo16", 16, 16, 0 }, - { "fixup_ppc_ha16", 16, 16, 0 }, - { "fixup_ppc_lo16_ds", 16, 14, 0 }, + { "fixup_ppc_lo16", 0, 16, 0 }, + { "fixup_ppc_ha16", 0, 16, 0 }, + { "fixup_ppc_lo16_ds", 0, 14, 0 }, { "fixup_ppc_tlsreg", 0, 0, 0 }, { "fixup_ppc_nofixup", 0, 0, 0 } }; @@ -98,12 +121,13 @@ public: if (!Value) return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); + unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); // For each byte of the fragment that the fixup touches, mask in the bits // from the fixup value. The Value has been "split up" into the appropriate // bitfields above. - for (unsigned i = 0; i != 4; ++i) - Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff); + for (unsigned i = 0; i != NumBytes; ++i) + Data[Offset + i] |= uint8_t((Value >> ((NumBytes - i - 1)*8)) & 0xff); } bool mayNeedRelaxation(const MCInst &Inst) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 7a84723ed5..2508cc2f37 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -33,26 +33,9 @@ namespace { virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const; - virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset); - - virtual void sortRelocs(const MCAssembler &Asm, - std::vector<ELFRelocationEntry> &Relocs); - }; - - class PPCELFRelocationEntry : public ELFRelocationEntry { - public: - PPCELFRelocationEntry(const ELFRelocationEntry &RE); - bool operator<(const PPCELFRelocationEntry &RE) const { - return (RE.r_offset < r_offset || - (RE.r_offset == r_offset && RE.Type > Type)); - } }; } -PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE) - : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol, - RE.r_addend, *RE.Fixup) {} - PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) : MCELFObjectTargetWriter(Is64Bit, OSABI, Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC, @@ -240,47 +223,6 @@ const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Targe return NULL; } -void PPCELFObjectWriter:: -adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { - switch ((unsigned)Fixup.getKind()) { - case PPC::fixup_ppc_ha16: - case PPC::fixup_ppc_lo16: - case PPC::fixup_ppc_lo16_ds: - RelocOffset += 2; - break; - default: - break; - } -} - -// The standard sorter only sorts on the r_offset field, but PowerPC can -// have multiple relocations at the same offset. Sort secondarily on the -// relocation type to avoid nondeterminism. -void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm, - std::vector<ELFRelocationEntry> &Relocs) { - - // Copy to a temporary vector of relocation entries having a different - // sort function. - std::vector<PPCELFRelocationEntry> TmpRelocs; - - for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin(); - R != Relocs.end(); ++R) { - TmpRelocs.push_back(PPCELFRelocationEntry(*R)); - } - - // Sort in place by ascending r_offset and descending r_type. - array_pod_sort(TmpRelocs.begin(), TmpRelocs.end()); - - // Copy back to the original vector. - unsigned I = 0; - for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin(); - R != TmpRelocs.end(); ++R, ++I) { - Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type, - R->Symbol, R->r_addend, *R->Fixup); - } -} - - MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, bool Is64Bit, uint8_t OSABI) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 2223cd623c..3f04a4ec0a 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -142,7 +142,7 @@ unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo, if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); // Add a fixup for the branch target. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + Fixups.push_back(MCFixup::Create(2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_ha16)); return 0; } @@ -153,7 +153,7 @@ unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo, if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); // Add a fixup for the branch target. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + Fixups.push_back(MCFixup::Create(2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_lo16)); return 0; } @@ -170,7 +170,7 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits; // Add a fixup for the displacement field. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + Fixups.push_back(MCFixup::Create(2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_lo16)); return RegBits; } @@ -188,7 +188,7 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits; // Add a fixup for the displacement field. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + Fixups.push_back(MCFixup::Create(2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_lo16_ds)); return RegBits; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index a01fa44a9a..2da30f9038 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -58,7 +58,7 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -static MCAsmInfo *createPPCMCAsmInfo(StringRef TT) { +static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); bool isPPC64 = TheTriple.getArch() == Triple::ppc64; @@ -69,9 +69,10 @@ static MCAsmInfo *createPPCMCAsmInfo(StringRef TT) { MAI = new PPCLinuxMCAsmInfo(isPPC64); // Initial state of the frame pointer is R1. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(isPPC64? PPC::X1 : PPC::R1, 0); - MAI->addInitialFrameState(0, Dst, Src); + unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1; + MCCFIInstruction Inst = + MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(Reg, true), 0); + MAI->addInitialFrameState(Inst); return MAI; } diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index cd70aeed87..1f0c3c4b5d 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1168,6 +1168,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, FuncInfo->addMustSaveCR(Reg); } else { CRSpilled = true; + FuncInfo->setSpillsCR(); // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3819bc8f15..eee2bb87de 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -16,6 +16,7 @@ #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" +#include "PPCTargetObjectFile.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -64,6 +65,9 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) return new TargetLoweringObjectFileMachO(); + if (TM.getSubtargetImpl()->isSVR4ABI()) + return new PPC64LinuxTargetObjectFile(); + return new TargetLoweringObjectFileELF(); } @@ -662,6 +666,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; + case PPCISD::SC: return "PPCISD::SC"; } } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index b219de38d5..2a1cc121da 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -175,61 +175,61 @@ namespace llvm { /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec /// TLS model, produces an ADDIS8 instruction that adds the GOT - /// base to sym@got@tprel@ha. + /// base to sym\@got\@tprel\@ha. ADDIS_GOT_TPREL_HA, /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec /// TLS model, produces a LD instruction with base register G8RReg - /// and offset sym@got@tprel@l. This completes the addition that + /// and offset sym\@got\@tprel\@l. This completes the addition that /// finds the offset of "sym" relative to the thread pointer. LD_GOT_TPREL_L, /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS /// model, produces an ADD instruction that adds the contents of /// G8RReg to the thread pointer. Symbol contains a relocation - /// sym@tls which is to be replaced by the thread pointer and + /// sym\@tls which is to be replaced by the thread pointer and /// identifies to the linker that the instruction is part of a /// TLS sequence. ADD_TLS, /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base - /// register to sym@got@tlsgd@ha. + /// register to sym\@got\@tlsgd\@ha. ADDIS_TLSGD_HA, /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym@got@tlsgd@l. + /// sym\@got\@tlsgd\@l. ADDI_TLSGD_L, /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS - /// model, produces a call to __tls_get_addr(sym@tlsgd). + /// model, produces a call to __tls_get_addr(sym\@tlsgd). GET_TLS_ADDR, /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base - /// register to sym@got@tlsld@ha. + /// register to sym\@got\@tlsld\@ha. ADDIS_TLSLD_HA, /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym@got@tlsld@l. + /// sym\@got\@tlsld\@l. ADDI_TLSLD_L, /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS - /// model, produces a call to __tls_get_addr(sym@tlsld). + /// model, produces a call to __tls_get_addr(sym\@tlsld). GET_TLSLD_ADDR, /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the /// local-dynamic TLS model, produces an ADDIS8 instruction - /// that adds X3 to sym@dtprel@ha. The Chain operand is needed + /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed /// to tie this in place following a copy to %X3 from the result /// of a GET_TLSLD_ADDR. ADDIS_DTPREL_HA, /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym@got@dtprel@l. + /// sym\@got\@dtprel\@l. ADDI_DTPREL_L, /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded @@ -238,6 +238,10 @@ namespace llvm { /// optimizations due to constant folding. VADD_SPLAT, + /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned + /// operand identifies the operating system entry point. + SC, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or @@ -266,16 +270,16 @@ namespace llvm { /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model, /// produces an ADDIS8 instruction that adds the TOC base register to - /// sym@toc@ha. + /// sym\@toc\@ha. ADDIS_TOC_HA, /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model, /// produces a LD instruction with base register G8RReg and offset - /// sym@toc@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. + /// sym\@toc\@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. LD_TOC_L, /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces - /// an ADDI8 instruction that adds G8RReg to sym@toc@l. + /// an ADDI8 instruction that adds G8RReg to sym\@toc\@l. /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. ADDI_TOC_L }; @@ -450,7 +454,7 @@ namespace llvm { /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index b6f4e85215..a24405851c 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -145,6 +145,19 @@ class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, let Inst{31} = lk; } +// 1.7.3 SC-Form +class SCForm<bits<6> opcode, bits<1> xo, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, + list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<7> LEV; + + let Pattern = pattern; + + let Inst{20-26} = LEV; + let Inst{30} = xo; +} + // 1.7.4 D-Form class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 4763069f25..9c39b34ab0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -162,6 +162,10 @@ def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPSideEffect]>; +def SDT_PPCsc : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def PPCsc : SDNode<"PPCISD::SC", SDT_PPCsc, + [SDNPHasChain, SDNPSideEffect]>; + def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; @@ -987,6 +991,12 @@ let isBranch = 1, isTerminator = 1 in { "#EH_SjLj_Setup\t$dst", []>; } +// System call. +let PPC970_Unit = 7 in { + def SC : SCForm<17, 1, (outs), (ins i32imm:$lev), + "sc $lev", BrB, [(PPCsc (i32 imm:$lev))]>; +} + // DCB* instructions. def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>, diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 14dc794195..0b099edff4 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -48,6 +48,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, // The binutils for the BG/P are too old for CFI. if (Subtarget.isBGP()) setMCUseCFI(false); + initAsmInfo(); } void PPC32TargetMachine::anchor() { } diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp new file mode 100644 index 0000000000..90e4f15452 --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp @@ -0,0 +1,57 @@ +//===-- PPCTargetObjectFile.cpp - PPC Object Info -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PPCTargetObjectFile.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +void +PPC64LinuxTargetObjectFile:: +Initialize(MCContext &Ctx, const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} + +const MCSection * PPC64LinuxTargetObjectFile:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + + const MCSection *DefaultSection = + TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM); + + if (DefaultSection != ReadOnlySection) + return DefaultSection; + + // Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI + // when we have a constant that contains global relocations. This is + // necessary because of this ABI's handling of pointers to functions in + // a shared library. The address of a function is actually the address + // of a function descriptor, which resides in the .opd section. Generated + // code uses the descriptor directly rather than going via the GOT as some + // other ABIs do, which means that initialized function pointers must + // reference the descriptor. The linker must convert copy relocs of + // pointers to functions in shared libraries into dynamic relocations, + // because of an ordering problem with initialization of copy relocs and + // PLT entries. The dynamic relocation will be initialized by the dynamic + // linker, so we must use DataRelROSection instead of ReadOnlySection. + // For more information, see the description of ELIMINATE_COPY_RELOCS in + // GNU ld. + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + + if (GVar && GVar->isConstant() && + (GVar->getInitializer()->getRelocationInfo() == + Constant::GlobalRelocations)) + return DataRelROSection; + + return DefaultSection; +} diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h new file mode 100644 index 0000000000..9203e23574 --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetObjectFile.h @@ -0,0 +1,32 @@ +//===-- PPCTargetObjectFile.h - PPC Object Info -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_PPC_TARGETOBJECTFILE_H +#define LLVM_TARGET_PPC_TARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + + /// PPC64LinuxTargetObjectFile - This implementation is used for + /// 64-bit PowerPC Linux. + class PPC64LinuxTargetObjectFile : public TargetLoweringObjectFileELF { + + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + + virtual const MCSection * + SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const; + }; + +} // end namespace llvm + +#endif diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 31fbf32d0c..7175ec941a 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -70,6 +70,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, InstrInfo = new SIInstrInfo(*this); TLInfo = new SITargetLowering(*this); } + initAsmInfo(); } AMDGPUTargetMachine::~AMDGPUTargetMachine() { diff --git a/lib/Target/R600/AMDILDeviceInfo.cpp b/lib/Target/R600/AMDILDeviceInfo.cpp index 178795936a..126514b976 100644 --- a/lib/Target/R600/AMDILDeviceInfo.cpp +++ b/lib/Target/R600/AMDILDeviceInfo.cpp @@ -81,7 +81,8 @@ AMDGPUDevice* getDeviceFromName(const std::string &deviceName, return new AMDGPUNIDevice(ptr); } else if (deviceName == "SI" || deviceName == "tahiti" || deviceName == "pitcairn" || - deviceName == "verde" || deviceName == "oland") { + deviceName == "verde" || deviceName == "oland" || + deviceName == "hainan") { return new AMDGPUSIDevice(ptr); } else { #if DEBUG diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 45d009c2a0..6f66aa898a 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -78,7 +78,7 @@ static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII, if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) { return createSIMCCodeEmitter(MCII, MRI, STI, Ctx); } else { - return createR600MCCodeEmitter(MCII, MRI, STI, Ctx); + return createR600MCCodeEmitter(MCII, MRI); } } diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h index 09d0d5b61c..95c572c21b 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -32,9 +32,7 @@ class raw_ostream; extern Target TheAMDGPUTarget; MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx); + const MCRegisterInfo &MRI); MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 271a974734..3404844435 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -35,14 +35,11 @@ class R600MCCodeEmitter : public AMDGPUMCCodeEmitter { void operator=(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCRegisterInfo &MRI; - const MCSubtargetInfo &STI; - MCContext &Ctx; public: - R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, - const MCSubtargetInfo &sti, MCContext &ctx) - : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { } + R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri) + : MCII(mcii), MRI(mri) { } /// \brief Encode the instruction and write it to the OS. virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, @@ -98,10 +95,8 @@ enum TextureTypes { }; MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new R600MCCodeEmitter(MCII, MRI, STI, Ctx); + const MCRegisterInfo &MRI) { + return new R600MCCodeEmitter(MCII, MRI); } void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index 5ee1c0d8ae..0cbe919d81 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -45,3 +45,4 @@ def : Proc<"tahiti", SI_Itin, [Feature64BitPtr, FeatureFP64]>; def : Proc<"pitcairn", SI_Itin, [Feature64BitPtr, FeatureFP64]>; def : Proc<"verde", SI_Itin, [Feature64BitPtr, FeatureFP64]>; def : Proc<"oland", SI_Itin, [Feature64BitPtr, FeatureFP64]>; +def : Proc<"hainan", SI_Itin, [Feature64BitPtr, FeatureFP64]>; diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 60bceb708f..3e7a24aecf 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -37,6 +37,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { + initAsmInfo(); } namespace { diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index c7725a1459..7f2159f79e 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -250,46 +251,6 @@ public: bool isS32Imm() const { return isImm(-(1LL << 31), (1LL << 31) - 1); } }; -// Maps of asm register numbers to LLVM register numbers, with 0 indicating -// an invalid register. We don't use register class directly because that -// specifies the allocation order. -static const unsigned GR32Regs[] = { - SystemZ::R0W, SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, - SystemZ::R4W, SystemZ::R5W, SystemZ::R6W, SystemZ::R7W, - SystemZ::R8W, SystemZ::R9W, SystemZ::R10W, SystemZ::R11W, - SystemZ::R12W, SystemZ::R13W, SystemZ::R14W, SystemZ::R15W -}; -static const unsigned GR64Regs[] = { - SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, - SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D, - SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D, - SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D -}; -static const unsigned GR128Regs[] = { - SystemZ::R0Q, 0, SystemZ::R2Q, 0, - SystemZ::R4Q, 0, SystemZ::R6Q, 0, - SystemZ::R8Q, 0, SystemZ::R10Q, 0, - SystemZ::R12Q, 0, SystemZ::R14Q, 0 -}; -static const unsigned FP32Regs[] = { - SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, - SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, - SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, - SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S -}; -static const unsigned FP64Regs[] = { - SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D, - SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D, - SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, - SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D -}; -static const unsigned FP128Regs[] = { - SystemZ::F0Q, SystemZ::F1Q, 0, 0, - SystemZ::F4Q, SystemZ::F5Q, 0, 0, - SystemZ::F8Q, SystemZ::F9Q, 0, 0, - SystemZ::F12Q, SystemZ::F13Q, 0, 0 -}; - class SystemZAsmParser : public MCTargetAsmParser { #define GET_ASSEMBLER_HEADER #include "SystemZGenAsmMatcher.inc" @@ -349,25 +310,28 @@ public: // Used by the TableGen code to parse particular operand types. OperandMatchResultTy parseGR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::GR32Reg); + return parseRegister(Operands, 'r', SystemZMC::GR32Regs, + SystemZOperand::GR32Reg); } OperandMatchResultTy parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::GR64Reg); + return parseRegister(Operands, 'r', SystemZMC::GR64Regs, + SystemZOperand::GR64Reg); } OperandMatchResultTy parseGR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR128Regs, SystemZOperand::GR128Reg); + return parseRegister(Operands, 'r', SystemZMC::GR128Regs, + SystemZOperand::GR128Reg); } OperandMatchResultTy parseADDR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::ADDR32Reg, - true); + return parseRegister(Operands, 'r', SystemZMC::GR32Regs, + SystemZOperand::ADDR32Reg, true); } OperandMatchResultTy parseADDR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::ADDR64Reg, - true); + return parseRegister(Operands, 'r', SystemZMC::GR64Regs, + SystemZOperand::ADDR64Reg, true); } OperandMatchResultTy parseADDR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { @@ -375,30 +339,47 @@ public: } OperandMatchResultTy parseFP32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'f', FP32Regs, SystemZOperand::FP32Reg); + return parseRegister(Operands, 'f', SystemZMC::FP32Regs, + SystemZOperand::FP32Reg); } OperandMatchResultTy parseFP64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'f', FP64Regs, SystemZOperand::FP64Reg); + return parseRegister(Operands, 'f', SystemZMC::FP64Regs, + SystemZOperand::FP64Reg); } OperandMatchResultTy parseFP128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'f', FP128Regs, SystemZOperand::FP128Reg); + return parseRegister(Operands, 'f', SystemZMC::FP128Regs, + SystemZOperand::FP128Reg); } OperandMatchResultTy parseBDAddr32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseAddress(Operands, GR32Regs, SystemZOperand::ADDR32Reg, false); + return parseAddress(Operands, SystemZMC::GR32Regs, + SystemZOperand::ADDR32Reg, false); } OperandMatchResultTy parseBDAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, false); + return parseAddress(Operands, SystemZMC::GR64Regs, + SystemZOperand::ADDR64Reg, false); } OperandMatchResultTy parseBDXAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, true); + return parseAddress(Operands, SystemZMC::GR64Regs, + SystemZOperand::ADDR64Reg, true); } OperandMatchResultTy parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + OperandMatchResultTy + parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + int64_t MinVal, int64_t MaxVal); + OperandMatchResultTy + parsePCRel16(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1); + } + OperandMatchResultTy + parsePCRel32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1); + } }; } @@ -502,7 +483,8 @@ SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands, // Parse the first register. Register Reg; - OperandMatchResultTy Result = parseRegister(Reg, 'r', GR64Regs, true); + OperandMatchResultTy Result = parseRegister(Reg, 'r', SystemZMC::GR64Regs, + true); if (Result != MatchOperand_Success) return Result; @@ -517,7 +499,7 @@ SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands, } Index = Reg.Number; - Result = parseRegister(Reg, 'r', GR64Regs, true); + Result = parseRegister(Reg, 'r', SystemZMC::GR64Regs, true); if (Result != MatchOperand_Success) return Result; } @@ -546,9 +528,9 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, if (parseRegister(Reg)) return Error(Reg.StartLoc, "register expected"); if (Reg.Prefix == 'r' && Reg.Number < 16) - RegNo = GR64Regs[Reg.Number]; + RegNo = SystemZMC::GR64Regs[Reg.Number]; else if (Reg.Prefix == 'f' && Reg.Number < 16) - RegNo = FP64Regs[Reg.Number]; + RegNo = SystemZMC::FP64Regs[Reg.Number]; else return Error(Reg.StartLoc, "invalid register"); StartLoc = Reg.StartLoc; @@ -683,6 +665,37 @@ parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } +SystemZAsmParser::OperandMatchResultTy SystemZAsmParser:: +parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + int64_t MinVal, int64_t MaxVal) { + MCContext &Ctx = getContext(); + MCStreamer &Out = getStreamer(); + const MCExpr *Expr; + SMLoc StartLoc = Parser.getTok().getLoc(); + if (getParser().parseExpression(Expr)) + return MatchOperand_NoMatch; + + // For consistency with the GNU assembler, treat immediates as offsets + // from ".". + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) { + int64_t Value = CE->getValue(); + if ((Value & 1) || Value < MinVal || Value > MaxVal) { + Error(StartLoc, "offset out of range"); + return MatchOperand_ParseFail; + } + MCSymbol *Sym = Ctx.CreateTempSymbol(); + Out.EmitLabel(Sym); + const MCExpr *Base = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + Ctx); + Expr = Value == 0 ? Base : MCBinaryExpr::CreateAdd(Base, Expr, Ctx); + } + + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); + return MatchOperand_Success; +} + // Force static initialization. extern "C" void LLVMInitializeSystemZAsmParser() { RegisterMCAsmParser<SystemZAsmParser> X(TheSystemZTarget); diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index 67b17fcc59..757d5a8898 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -4,6 +4,7 @@ tablegen(LLVM SystemZGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM SystemZGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM SystemZGenCallingConv.inc -gen-callingconv) tablegen(LLVM SystemZGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM SystemZGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM SystemZGenInstrInfo.inc -gen-instr-info) tablegen(LLVM SystemZGenRegisterInfo.inc -gen-register-info) @@ -27,6 +28,7 @@ add_llvm_target(SystemZCodeGen add_dependencies(LLVMSystemZCodeGen intrinsics_gen) add_subdirectory(AsmParser) +add_subdirectory(Disassembler) add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/SystemZ/Disassembler/CMakeLists.txt b/lib/Target/SystemZ/Disassembler/CMakeLists.txt new file mode 100644 index 0000000000..5bc1859816 --- /dev/null +++ b/lib/Target/SystemZ/Disassembler/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMSystemZDisassembler + SystemZDisassembler.cpp + ) + +add_dependencies(LLVMSystemZDisassembler SystemZCommonTableGen) diff --git a/lib/Target/SystemZ/Disassembler/LLVMBuild.txt b/lib/Target/SystemZ/Disassembler/LLVMBuild.txt new file mode 100644 index 0000000000..c3081f5447 --- /dev/null +++ b/lib/Target/SystemZ/Disassembler/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===-- ./lib/Target/SystemZ/Disassembler/LLVMBuild.txt ---------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = SystemZDisassembler +parent = SystemZ +required_libraries = MC Support SystemZDesc SystemZInfo +add_to_library_groups = SystemZ diff --git a/lib/Target/SystemZ/Disassembler/Makefile b/lib/Target/SystemZ/Disassembler/Makefile new file mode 100644 index 0000000000..efc4cc8e9c --- /dev/null +++ b/lib/Target/SystemZ/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===-- lib/Target/SystemZ/Disassembler/Makefile -----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMSystemZDisassembler + +# Hack: we need to include 'main' x86 target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp new file mode 100644 index 0000000000..9a9de78224 --- /dev/null +++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -0,0 +1,301 @@ +//===-- SystemZDisassembler.cpp - Disassembler for SystemZ ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +class SystemZDisassembler : public MCDisassembler { +public: + SystemZDisassembler(const MCSubtargetInfo &STI) + : MCDisassembler(STI) {} + virtual ~SystemZDisassembler() {} + + // Override MCDisassembler. + virtual DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const LLVM_OVERRIDE; +}; +} // end anonymous namespace + +static MCDisassembler *createSystemZDisassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new SystemZDisassembler(STI); +} + +extern "C" void LLVMInitializeSystemZDisassembler() { + // Register the disassembler. + TargetRegistry::RegisterMCDisassembler(TheSystemZTarget, + createSystemZDisassembler); +} + +static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, + const unsigned *Regs, + bool isAddress = false) { + assert(RegNo < 16 && "Invalid register"); + if (!isAddress || RegNo) { + RegNo = Regs[RegNo]; + if (RegNo == 0) + return MCDisassembler::Fail; + } + Inst.addOperand(MCOperand::CreateReg(RegNo)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs); +} + +static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs); +} + +static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs); +} + +static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, true); +} + +static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs); +} + +static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs); +} + +static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs); +} + +template<unsigned N> +static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) { + assert(isUInt<N>(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::CreateImm(Imm)); + return MCDisassembler::Success; +} + +template<unsigned N> +static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) { + assert(isUInt<N>(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm))); + return MCDisassembler::Success; +} + +static DecodeStatus decodeAccessRegOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const void *Decoder) { + return decodeUImmOperand<4>(Inst, Imm); +} + +static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<4>(Inst, Imm); +} + +static DecodeStatus decodeU6ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<6>(Inst, Imm); +} + +static DecodeStatus decodeU8ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<8>(Inst, Imm); +} + +static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<16>(Inst, Imm); +} + +static DecodeStatus decodeU32ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<32>(Inst, Imm); +} + +static DecodeStatus decodeS8ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeSImmOperand<8>(Inst, Imm); +} + +static DecodeStatus decodeS16ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeSImmOperand<16>(Inst, Imm); +} + +static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeSImmOperand<32>(Inst, Imm); +} + +template<unsigned N> +static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address) { + assert(isUInt<N>(Imm) && "Invalid PC-relative offset"); + Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm) * 2 + Address)); + return MCDisassembler::Success; +} + +static DecodeStatus decodePC16DBLOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const void *Decoder) { + return decodePCDBLOperand<16>(Inst, Imm, Address); +} + +static DecodeStatus decodePC32DBLOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const void *Decoder) { + return decodePCDBLOperand<32>(Inst, Imm, Address); +} + +static DecodeStatus decodeBDAddr12Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Base = Field >> 12; + uint64_t Disp = Field & 0xfff; + assert(Base < 16 && "Invalid BDAddr12"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(Disp)); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDAddr20Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Base = Field >> 20; + uint64_t Disp = ((Field << 12) & 0xff000) | ((Field >> 8) & 0xfff); + assert(Base < 16 && "Invalid BDAddr20"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp))); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDXAddr12Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Index = Field >> 16; + uint64_t Base = (Field >> 12) & 0xf; + uint64_t Disp = Field & 0xfff; + assert(Index < 16 && "Invalid BDXAddr12"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(Disp)); + Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDXAddr20Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Index = Field >> 24; + uint64_t Base = (Field >> 20) & 0xf; + uint64_t Disp = ((Field & 0xfff00) >> 8) | ((Field & 0xff) << 12); + assert(Index < 16 && "Invalid BDXAddr20"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp))); + Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR32Regs); +} + +static DecodeStatus decodeBDAddr32Disp20Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR32Regs); +} + +static DecodeStatus decodeBDAddr64Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDAddr64Disp20Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDXAddr64Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDXAddr12Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDXAddr20Operand(Inst, Field, SystemZMC::GR64Regs); +} + +#include "SystemZGenDisassemblerTables.inc" + +DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + // Get the first two bytes of the instruction. + uint8_t Bytes[6]; + Size = 0; + if (Region.readBytes(Address, 2, Bytes, 0) == -1) + return MCDisassembler::Fail; + + // The top 2 bits of the first byte specify the size. + const uint8_t *Table; + if (Bytes[0] < 0x40) { + Size = 2; + Table = DecoderTable16; + } else if (Bytes[0] < 0xc0) { + Size = 4; + Table = DecoderTable32; + } else { + Size = 6; + Table = DecoderTable48; + } + + // Read any remaining bytes. + if (Size > 2 && Region.readBytes(Address + 2, Size - 2, Bytes + 2, 0) == -1) + return MCDisassembler::Fail; + + // Construct the instruction. + uint64_t Inst = 0; + for (uint64_t I = 0; I < Size; ++I) + Inst = (Inst << 8) | Bytes[I]; + + return decodeInstruction(Table, MI, Inst, Address, this, STI); +} diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp index d73cf49808..369802b2b8 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -114,10 +114,26 @@ void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum, O << "%a" << (unsigned int)Value; } +void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.isImm()) { + O << "0x"; + O.write_hex(MO.getImm()); + } else + O << *MO.getExpr(); +} + void SystemZInstPrinter::printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O) { - printOperand(MI, OpNum, O); - O << "@PLT"; + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.isImm()) { + O << "0x"; + O.write_hex(MO.getImm()); + } else { + O << *MO.getExpr(); + O << "@PLT"; + } } void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum, diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h index b82e79d93c..f77282efcb 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -56,6 +56,7 @@ private: void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O); diff --git a/lib/Target/SystemZ/LLVMBuild.txt b/lib/Target/SystemZ/LLVMBuild.txt index aba0de27ac..95e657f7bd 100644 --- a/lib/Target/SystemZ/LLVMBuild.txt +++ b/lib/Target/SystemZ/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup @@ -24,6 +24,7 @@ name = SystemZ parent = Target has_asmparser = 1 has_asmprinter = 1 +has_disassembler = 1 has_jit = 1 [component_1] diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index ea2250f546..7721b1ffab 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -45,30 +45,43 @@ private: // Called by the TableGen code to get the binary encoding of operand // MO in MI. Fixups is the list of fixups against MI. - unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups) const; + // Called by the TableGen code to get the binary encoding of an address. + // The index, if any, is encoded first, followed by the base, + // followed by the displacement. In a 20-bit displacement, + // the low 12 bits are encoded before the high 8 bits. + uint64_t getBDAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + uint64_t getBDAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + uint64_t getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + uint64_t getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + // Operand OpNum of MI needs a PC-relative fixup of kind Kind at // Offset bytes from the start of MI. Add the fixup to Fixups // and return the in-place addend, which since we're a RELA target // is always 0. - unsigned getPCRelEncoding(const MCInst &MI, unsigned int OpNum, + uint64_t getPCRelEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups, unsigned Kind, int64_t Offset) const; - unsigned getPC16DBLEncoding(const MCInst &MI, unsigned int OpNum, + uint64_t getPC16DBLEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups) const { return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2); } - unsigned getPC32DBLEncoding(const MCInst &MI, unsigned int OpNum, + uint64_t getPC32DBLEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups) const { return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2); } - unsigned getPLT16DBLEncoding(const MCInst &MI, unsigned int OpNum, + uint64_t getPLT16DBLEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups) const { return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT16DBL, 2); } - unsigned getPLT32DBLEncoding(const MCInst &MI, unsigned int OpNum, + uint64_t getPLT32DBLEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups) const { return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT32DBL, 2); } @@ -95,34 +108,73 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, } } -unsigned SystemZMCCodeEmitter:: +uint64_t SystemZMCCodeEmitter:: getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups) const { if (MO.isReg()) return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()); if (MO.isImm()) - return static_cast<unsigned>(MO.getImm()); + return static_cast<uint64_t>(MO.getImm()); llvm_unreachable("Unexpected operand type!"); } -unsigned -SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned int OpNum, +uint64_t SystemZMCCodeEmitter:: +getBDAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + assert(isUInt<4>(Base) && isUInt<12>(Disp)); + return (Base << 12) | Disp; +} + +uint64_t SystemZMCCodeEmitter:: +getBDAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + assert(isUInt<4>(Base) && isInt<20>(Disp)); + return (Base << 20) | ((Disp & 0xfff) << 8) | ((Disp & 0xff000) >> 12); +} + +uint64_t SystemZMCCodeEmitter:: +getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups); + assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Index)); + return (Index << 16) | (Base << 12) | Disp; +} + +uint64_t SystemZMCCodeEmitter:: +getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups); + assert(isUInt<4>(Base) && isInt<20>(Disp) && isUInt<4>(Index)); + return (Index << 24) | (Base << 20) | ((Disp & 0xfff) << 8) + | ((Disp & 0xff000) >> 12); +} + +uint64_t +SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups, unsigned Kind, int64_t Offset) const { const MCOperand &MO = MI.getOperand(OpNum); - // For compatibility with the GNU assembler, treat constant operands as - // unadjusted PC-relative offsets. + const MCExpr *Expr; if (MO.isImm()) - return MO.getImm() / 2; - - const MCExpr *Expr = MO.getExpr(); - if (Offset) { - // The operand value is relative to the start of MI, but the fixup - // is relative to the operand field itself, which is Offset bytes - // into MI. Add Offset to the relocation value to cancel out - // this difference. - const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); - Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); + Expr = MCConstantExpr::Create(MO.getImm() + Offset, Ctx); + else { + Expr = MO.getExpr(); + if (Offset) { + // The operand value is relative to the start of MI, but the fixup + // is relative to the operand field itself, which is Offset bytes + // into MI. Add Offset to the relocation value to cancel out + // this difference. + const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); + Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); + } } Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind)); return 0; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 6844f92ec9..3653192d85 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -27,11 +27,55 @@ using namespace llvm; -static MCAsmInfo *createSystemZMCAsmInfo(StringRef TT) { +const unsigned SystemZMC::GR32Regs[16] = { + SystemZ::R0W, SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, + SystemZ::R4W, SystemZ::R5W, SystemZ::R6W, SystemZ::R7W, + SystemZ::R8W, SystemZ::R9W, SystemZ::R10W, SystemZ::R11W, + SystemZ::R12W, SystemZ::R13W, SystemZ::R14W, SystemZ::R15W +}; + +const unsigned SystemZMC::GR64Regs[16] = { + SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, + SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D, + SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D, + SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D +}; + +const unsigned SystemZMC::GR128Regs[16] = { + SystemZ::R0Q, 0, SystemZ::R2Q, 0, + SystemZ::R4Q, 0, SystemZ::R6Q, 0, + SystemZ::R8Q, 0, SystemZ::R10Q, 0, + SystemZ::R12Q, 0, SystemZ::R14Q, 0 +}; + +const unsigned SystemZMC::FP32Regs[16] = { + SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, + SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, + SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, + SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S +}; + +const unsigned SystemZMC::FP64Regs[16] = { + SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D, + SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D, + SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, + SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D +}; + +const unsigned SystemZMC::FP128Regs[16] = { + SystemZ::F0Q, SystemZ::F1Q, 0, 0, + SystemZ::F4Q, SystemZ::F5Q, 0, 0, + SystemZ::F8Q, SystemZ::F9Q, 0, 0, + SystemZ::F12Q, SystemZ::F13Q, 0, 0 +}; + +static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI, + StringRef TT) { MCAsmInfo *MAI = new SystemZMCAsmInfo(TT); - MachineLocation FPDst(MachineLocation::VirtualFP); - MachineLocation FPSrc(SystemZ::R15D, -SystemZMC::CFAOffsetFromInitialSP); - MAI->addInitialFrameState(0, FPDst, FPSrc); + MCCFIInstruction Inst = + MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(SystemZ::R15D, true), + SystemZMC::CFAOffsetFromInitialSP); + MAI->addInitialFrameState(Inst); return MAI; } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index 229912f161..1f70047db6 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -34,6 +34,16 @@ namespace SystemZMC { // The offset of the DWARF CFA from the incoming stack pointer. const int64_t CFAOffsetFromInitialSP = CallFrameSize; + + // Maps of asm register numbers to LLVM register numbers, with 0 indicating + // an invalid register. We don't use the register classes directly because + // they specify the allocation order. + extern const unsigned GR32Regs[16]; + extern const unsigned GR64Regs[16]; + extern const unsigned GR128Regs[16]; + extern const unsigned FP32Regs[16]; + extern const unsigned FP64Regs[16]; + extern const unsigned FP128Regs[16]; } MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile index c992584af9..445725bd1e 100644 --- a/lib/Target/SystemZ/Makefile +++ b/lib/Target/SystemZ/Makefile @@ -16,13 +16,14 @@ BUILT_SOURCES = SystemZGenRegisterInfo.inc \ SystemZGenAsmWriter.inc \ SystemZGenAsmMatcher.inc \ SystemZGenCodeEmitter.inc \ + SystemZGenDisassemblerTables.inc \ SystemZGenInstrInfo.inc \ SystemZGenDAGISel.inc \ SystemZGenSubtargetInfo.inc \ SystemZGenCallingConv.inc \ SystemZGenMCCodeEmitter.inc -DIRS = InstPrinter AsmParser TargetInfo MCTargetDesc +DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index d1f56a4916..8f5a5476b4 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -29,17 +29,44 @@ to load 103. This seems to be a general target-independent problem. -- -The tuning of the choice between Load Address (LA) and addition in +The tuning of the choice between LOAD ADDRESS (LA) and addition in SystemZISelDAGToDAG.cpp is suspect. It should be tweaked based on performance measurements. -- +We don't support tail calls at present. + +-- + +We don't support prefetching yet. + +-- + There is no scheduling support. -- -We don't use the Branch on Count or Branch on Index families of instruction. +We don't use the BRANCH ON COUNT or BRANCH ON INDEX families of instruction. + +-- + +We might want to use BRANCH ON CONDITION for conditional indirect calls +and conditional returns. + +-- + +We don't use the combined COMPARE AND BRANCH instructions. Using them +would require a change to the way we handle out-of-range branches. +At the moment, we start with 32-bit forms like BRCL and shorten them +to forms like BRC where possible, but COMPARE AND BRANCH does not have +a 32-bit form. + +-- + +We should probably model just CC, not the PSW as a whole. Strictly +speaking, every instruction changes the PSW since the PSW contains the +current instruction address. -- @@ -54,7 +81,30 @@ equality after an integer comparison, etc. -- -We don't optimize string and block memory operations. +We don't use the LOAD AND TEST or TEST DATA CLASS instructions. + +-- + +We could use the generic floating-point forms of LOAD COMPLEMENT, +LOAD NEGATIVE and LOAD POSITIVE in cases where we don't need the +condition codes. For example, we could use LCDFR instead of LCDBR. + +-- + +We don't optimize block memory operations. + +It's definitely worth using things like MVC, CLC, NC, XC and OC with +constant lengths. MVCIN may be worthwhile too. + +We should probably implement things like memcpy using MVC with EXECUTE. +Likewise memcmp and CLC. MVCLE and CLCLE could be useful too. + +-- + +We don't optimize string operations. + +MVST, CLST, SRST and CUSE could be useful here. Some of the TRANSLATE +family might be too, although they are probably more difficult to exploit. -- @@ -63,9 +113,33 @@ conventions require f128s to be returned by invisible reference. -- +ADD LOGICAL WITH SIGNED IMMEDIATE could be useful when we need to +produce a carry. SUBTRACT LOGICAL IMMEDIATE could be useful when we +need to produce a borrow. (Note that there are no memory forms of +ADD LOGICAL WITH CARRY and SUBTRACT LOGICAL WITH BORROW, so the high +part of 128-bit memory operations would probably need to be done +via a register.) + +-- + +We don't use the halfword forms of LOAD REVERSED and STORE REVERSED +(LRVH and STRVH). + +-- + +We could take advantage of the various ... UNDER MASK instructions, +such as ICM and STCM. + +-- + +We could make more use of the ROTATE AND ... SELECTED BITS instructions. +At the moment we only use RISBG, and only then for subword atomic operations. + +-- + DAGCombiner can detect integer absolute, but there's not yet an associated -ISD opcode. We could add one and implement it using Load Positive. -Negated absolutes could use Load Negative. +ISD opcode. We could add one and implement it using LOAD POSITIVE. +Negated absolutes could use LOAD NEGATIVE. -- @@ -142,5 +216,15 @@ See CodeGen/SystemZ/alloca-01.ll for an example. -- Atomic loads and stores use the default compare-and-swap based implementation. -This is probably much too conservative in practice, and the overhead is -especially bad for 8- and 16-bit accesses. +This is much too conservative in practice, since the architecture guarantees +that 1-, 2-, 4- and 8-byte loads and stores to aligned addresses are +inherently atomic. + +-- + +If needed, we can support 16-byte atomics using LPQ, STPQ and CSDG. + +-- + +We might want to model all access registers and use them to spill +32-bit values. diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 7c9f0e668b..104af6e99d 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -40,24 +40,22 @@ def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>; // fcopysign with an FP32 result. let isCodeGenOnly = 1 in { - def CPSDRss : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP32>; - def CPSDRsd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP64>; + def CPSDRss : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP32, FP32>; + def CPSDRsd : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP32, FP64>; } -// The sign of an FP128 is in the high register. Give the CPSDRsd -// operands in R1, R2, R3 order. +// The sign of an FP128 is in the high register. def : Pat<(fcopysign FP32:$src1, FP128:$src2), - (CPSDRsd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP32:$src1)>; + (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_high))>; // fcopysign with an FP64 result. let isCodeGenOnly = 1 in - def CPSDRds : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP32>; -def CPSDRdd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP64>; + def CPSDRds : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP64, FP32>; +def CPSDRdd : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP64, FP64>; -// The sign of an FP128 is in the high register. Give the CPSDRdd -// operands in R1, R2, R3 order. +// The sign of an FP128 is in the high register. def : Pat<(fcopysign FP64:$src1, FP128:$src2), - (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP64:$src1)>; + (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_high))>; // fcopysign with an FP128 result. Use "upper" as the high half and leave // the low half as-is. @@ -65,13 +63,12 @@ class CopySign128<RegisterOperand cls, dag upper> : Pat<(fcopysign FP128:$src1, cls:$src2), (INSERT_SUBREG FP128:$src1, upper, subreg_high)>; -// Give the CPSDR* operands in R1, R2, R3 order. -def : CopySign128<FP32, (CPSDRds FP32:$src2, - (EXTRACT_SUBREG FP128:$src1, subreg_high))>; -def : CopySign128<FP64, (CPSDRdd FP64:$src2, - (EXTRACT_SUBREG FP128:$src1, subreg_high))>; -def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high), - (EXTRACT_SUBREG FP128:$src1, subreg_high))>; +def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_high), + FP32:$src2)>; +def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high), + FP64:$src2)>; +def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high), + (EXTRACT_SUBREG FP128:$src2, subreg_high))>; //===----------------------------------------------------------------------===// // Load instructions @@ -155,13 +152,13 @@ let Defs = [PSW] in { } // fp_to_sint always rounds towards zero, which is modifier value 5. -def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR FP32:$src, 5)>; -def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR FP64:$src, 5)>; -def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR FP128:$src, 5)>; +def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>; +def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>; +def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>; -def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR FP32:$src, 5)>; -def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR FP64:$src, 5)>; -def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR FP128:$src, 5)>; +def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>; +def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>; +def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>; //===----------------------------------------------------------------------===// // Unary arithmetic @@ -210,9 +207,9 @@ let Defs = [PSW] in { // frint rounds according to the current mode (modifier 0) and detects // inexact conditions. -def : Pat<(frint FP32:$src), (FIEBR FP32:$src, 0)>; -def : Pat<(frint FP64:$src), (FIDBR FP64:$src, 0)>; -def : Pat<(frint FP128:$src), (FIXBR FP128:$src, 0)>; +def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>; +def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>; +def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>; //===----------------------------------------------------------------------===// // Binary arithmetic diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index b32b7eb0fc..bf5aa8dbeb 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -82,25 +82,24 @@ def getDisp20Opcode : InstrMapping { // // Formats are specified using operand field declarations of the form: // -// bits<4> Rn : register input or output for operand n -// bits<m> In : immediate value of width m for operand n -// bits<4> Bn : base register for address operand n -// bits<m> Dn : displacement value of width m for address operand n -// bits<4> Xn : index register for address operand n -// bits<4> Mn : mode value for operand n +// bits<4> Rn : register input or output for operand n +// bits<m> In : immediate value of width m for operand n +// bits<4> BDn : address operand n, which has a base and a displacement +// bits<m> XBDn : address operand n, which has an index, a base and a +// displacement +// bits<4> Xn : index register for address operand n +// bits<4> Mn : mode value for operand n // -// The operand numbers ("n" in the list above) follow the architecture manual, -// but the fields are always declared in assembly order, so there are some -// cases where operand "2" comes after operand "3". For address operands, -// the base register field is declared first, followed by the displacement, -// followed by the index (if any). This matches the bdaddr* and bdxaddr* -// orders. +// The operand numbers ("n" in the list above) follow the architecture manual. +// Assembly operands sometimes have a different order; in particular, R3 often +// is often written between operands 1 and 2. // //===----------------------------------------------------------------------===// class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<16> I2; @@ -114,6 +113,7 @@ class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; bits<4> R2; @@ -133,6 +133,7 @@ class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; bits<32> I2; @@ -146,6 +147,7 @@ class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<2, outs, ins, asmstr, pattern> { field bits<16> Inst; + field bits<16> SoftFail = 0; bits<4> R1; bits<4> R2; @@ -158,6 +160,7 @@ class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<4> R3; @@ -173,6 +176,7 @@ class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<4> R2; @@ -186,6 +190,7 @@ class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<4> R2; @@ -201,17 +206,14 @@ class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; - bits<4> B2; - bits<12> D2; - bits<4> X2; + bits<20> XBD2; let Inst{31-24} = op; let Inst{23-20} = R1; - let Inst{19-16} = X2; - let Inst{15-12} = B2; - let Inst{11-0} = D2; + let Inst{19-0} = XBD2; let HasIndex = 1; } @@ -219,17 +221,14 @@ class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; - bits<4> B2; - bits<12> D2; - bits<4> X2; + bits<20> XBD2; let Inst{47-40} = op{15-8}; let Inst{39-36} = R1; - let Inst{35-32} = X2; - let Inst{31-28} = B2; - let Inst{27-16} = D2; + let Inst{35-16} = XBD2; let Inst{15-8} = 0; let Inst{7-0} = op{7-0}; @@ -239,18 +238,15 @@ class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; bits<4> R3; - bits<4> B2; - bits<12> D2; - bits<4> X2; + bits<20> XBD2; let Inst{47-40} = op{15-8}; let Inst{39-36} = R3; - let Inst{35-32} = X2; - let Inst{31-28} = B2; - let Inst{27-16} = D2; + let Inst{35-16} = XBD2; let Inst{15-12} = R1; let Inst{11-8} = 0; let Inst{7-0} = op{7-0}; @@ -261,18 +257,14 @@ class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; - bits<4> B2; - bits<20> D2; - bits<4> X2; + bits<28> XBD2; let Inst{47-40} = op{15-8}; let Inst{39-36} = R1; - let Inst{35-32} = X2; - let Inst{31-28} = B2; - let Inst{27-16} = D2{11-0}; - let Inst{15-8} = D2{19-12}; + let Inst{35-8} = XBD2; let Inst{7-0} = op{7-0}; let Has20BitOffset = 1; @@ -282,34 +274,31 @@ class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<4> R3; - bits<4> B2; - bits<12> D2; + bits<16> BD2; let Inst{31-24} = op; let Inst{23-20} = R1; let Inst{19-16} = R3; - let Inst{15-12} = B2; - let Inst{11-0} = D2; + let Inst{15-0} = BD2; } class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; bits<4> R3; - bits<4> B2; - bits<20> D2; + bits<24> BD2; let Inst{47-40} = op{15-8}; let Inst{39-36} = R1; let Inst{35-32} = R3; - let Inst{31-28} = B2; - let Inst{27-16} = D2{11-0}; - let Inst{15-8} = D2{19-12}; + let Inst{31-8} = BD2; let Inst{7-0} = op{7-0}; let Has20BitOffset = 1; @@ -318,44 +307,40 @@ class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; - bits<4> B1; - bits<12> D1; + bits<16> BD1; bits<8> I2; let Inst{31-24} = op; let Inst{23-16} = I2; - let Inst{15-12} = B1; - let Inst{11-0} = D1; + let Inst{15-0} = BD1; } class InstSIL<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; - bits<4> B1; - bits<12> D1; + bits<16> BD1; bits<16> I2; let Inst{47-32} = op; - let Inst{31-28} = B1; - let Inst{27-16} = D1; + let Inst{31-16} = BD1; let Inst{15-0} = I2; } class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; - bits<4> B1; - bits<20> D1; + bits<24> BD1; bits<8> I2; let Inst{47-40} = op{15-8}; let Inst{39-32} = I2; - let Inst{31-28} = B1; - let Inst{27-16} = D1{11-0}; - let Inst{15-8} = D1{19-12}; + let Inst{31-8} = BD1; let Inst{7-0} = op{7-0}; let Has20BitOffset = 1; @@ -432,23 +417,23 @@ class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls, dag src> - : InstRRE<opcode, (outs cls:$dst), (ins), - mnemonic#"\t$dst", - [(set cls:$dst, src)]> { + : InstRRE<opcode, (outs cls:$R1), (ins), + mnemonic#"\t$R1", + [(set cls:$R1, src)]> { let R2 = 0; } class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> - : InstRSY<opcode, (outs cls:$dst1, cls:$dst2), (ins bdaddr20only:$addr), - mnemonic#"\t$dst1, $dst2, $addr", []> { + : InstRSY<opcode, (outs cls:$R1, cls:$R3), (ins bdaddr20only:$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { let mayLoad = 1; } class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls> - : InstRIL<opcode, (outs), (ins cls:$src, pcrel32:$addr), - mnemonic#"\t$src, $addr", - [(operator cls:$src, pcrel32:$addr)]> { + : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, pcrel32:$I2)]> { let mayStore = 1; // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more @@ -458,17 +443,17 @@ class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdxaddr12only> - : InstRX<opcode, (outs), (ins cls:$src, mode:$addr), - mnemonic#"\t$src, $addr", - [(operator cls:$src, mode:$addr)]> { + : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, mode:$XBD2)]> { let mayStore = 1; } class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdxaddr20only> - : InstRXY<opcode, (outs), (ins cls:$src, mode:$addr), - mnemonic#"\t$src, $addr", - [(operator cls:$src, mode:$addr)]> { + : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, mode:$XBD2)]> { let mayStore = 1; } @@ -483,32 +468,32 @@ multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, } class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> - : InstRSY<opcode, (outs), (ins cls:$from, cls:$to, bdaddr20only:$addr), - mnemonic#"\t$from, $to, $addr", []> { + : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, bdaddr20only:$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { let mayStore = 1; } class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, Immediate imm, AddressingMode mode = bdaddr12only> - : InstSI<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator imm:$src, mode:$addr)]> { + : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, mode:$BD1)]> { let mayStore = 1; } class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, Immediate imm, AddressingMode mode = bdaddr20only> - : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator imm:$src, mode:$addr)]> { + : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, mode:$BD1)]> { let mayStore = 1; } class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, Immediate imm> - : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator imm:$src, bdaddr12only:$addr)]> { + : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, bdaddr12only:$BD1)]> { let mayStore = 1; } @@ -524,38 +509,38 @@ multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRR<opcode, (outs cls1:$dst), (ins cls2:$src), - mnemonic#"\t$dst, $src", - [(set cls1:$dst, (operator cls2:$src))]>; + : InstRR<opcode, (outs cls1:$R1), (ins cls2:$R2), + mnemonic#"\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2))]>; class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRRE<opcode, (outs cls1:$dst), (ins cls2:$src), - mnemonic#"\t$dst, $src", - [(set cls1:$dst, (operator cls2:$src))]>; + : InstRRE<opcode, (outs cls1:$R1), (ins cls2:$R2), + mnemonic#"\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2))]>; class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2> - : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src, uimm8zx4:$mode), - mnemonic#"\t$dst, $mode, $src", []>; + : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2), + mnemonic#"\t$R1, $R3, $R2", []>; class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRI<opcode, (outs cls:$dst), (ins imm:$src), - mnemonic#"\t$dst, $src", - [(set cls:$dst, (operator imm:$src))]>; + : InstRI<opcode, (outs cls:$R1), (ins imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator imm:$I2))]>; class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRIL<opcode, (outs cls:$dst), (ins imm:$src), - mnemonic#"\t$dst, $src", - [(set cls:$dst, (operator imm:$src))]>; + : InstRIL<opcode, (outs cls:$R1), (ins imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator imm:$I2))]>; class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls> - : InstRIL<opcode, (outs cls:$dst), (ins pcrel32:$addr), - mnemonic#"\t$dst, $addr", - [(set cls:$dst, (operator pcrel32:$addr))]> { + : InstRIL<opcode, (outs cls:$R1), (ins pcrel32:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator pcrel32:$I2))]> { let mayLoad = 1; // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more @@ -565,25 +550,25 @@ class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdxaddr12only> - : InstRX<opcode, (outs cls:$dst), (ins mode:$addr), - mnemonic#"\t$dst, $addr", - [(set cls:$dst, (operator mode:$addr))]> { + : InstRX<opcode, (outs cls:$R1), (ins mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator mode:$XBD2))]> { let mayLoad = 1; } class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls> - : InstRXE<opcode, (outs cls:$dst), (ins bdxaddr12only:$addr), - mnemonic#"\t$dst, $addr", - [(set cls:$dst, (operator bdxaddr12only:$addr))]> { + : InstRXE<opcode, (outs cls:$R1), (ins bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator bdxaddr12only:$XBD2))]> { let mayLoad = 1; } class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdxaddr20only> - : InstRXY<opcode, (outs cls:$dst), (ins mode:$addr), - mnemonic#"\t$dst, $addr", - [(set cls:$dst, (operator mode:$addr))]> { + : InstRXY<opcode, (outs cls:$R1), (ins mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator mode:$XBD2))]> { let mayLoad = 1; } @@ -599,83 +584,76 @@ multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRR<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2), - mnemonic#"\t$dst, $src2", - [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRR<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"\t$R1, $R2", + [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRRE<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2), - mnemonic#"\t$dst, $src2", - [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"\t$R1, $R2", + [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } -// Here the assembly and dag operands are in natural order, -// but the first input operand maps to R3 and the second to R2. -// This is used for "CPSDR R1, R3, R2", which is equivalent to -// R1 = copysign (R3, R2). -// -// Direct uses of the instruction must pass operands in encoding order -- -// R1, R2, R3 -- so they must pass the source operands in reverse order. -class BinaryRevRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls1, RegisterOperand cls2> - : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src2, cls1:$src1), - mnemonic#"\t$dst, $src1, $src2", - [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]>; +class BinaryRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R3, cls2:$R2), + mnemonic#"\t$R1, $R3, $R2", + [(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]>; class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRI<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, imm:$src2))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRIL<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, imm:$src2))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRIL<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, AddressingMode mode = bdxaddr12only> - : InstRX<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRX<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; } class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load> - : InstRXE<opcode, (outs cls:$dst), (ins cls:$src1, bdxaddr12only:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, - (load bdxaddr12only:$src2)))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRXE<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, + (load bdxaddr12only:$XBD2)))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; } class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, AddressingMode mode = bdxaddr20only> - : InstRXY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRXY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; } @@ -693,18 +671,18 @@ multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator, Operand imm, AddressingMode mode = bdaddr12only> - : InstSI<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> { + : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> { let mayLoad = 1; let mayStore = 1; } class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, Operand imm, AddressingMode mode = bdaddr20only> - : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> { + : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> { let mayLoad = 1; let mayStore = 1; } @@ -722,49 +700,49 @@ multiclass BinarySIPair<string mnemonic, bits<8> siOpcode, class ShiftRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode> - : InstRS<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, mode:$src2))]> { + : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2), + mnemonic#"\t$R1, $BD2", + [(set cls:$R1, (operator cls:$R1src, mode:$BD2))]> { let R3 = 0; - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } class ShiftRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode> - : InstRSY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), - mnemonic#"\t$dst, $src1, $src2", - [(set cls:$dst, (operator cls:$src1, mode:$src2))]>; + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator cls:$R3, mode:$BD2))]>; class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRR<opcode, (outs), (ins cls1:$src1, cls2:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls1:$src1, cls2:$src2)]>; + : InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"\t$R1, $R2", + [(operator cls1:$R1, cls2:$R2)]>; class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRRE<opcode, (outs), (ins cls1:$src1, cls2:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls1:$src1, cls2:$src2)]>; + : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"\t$R1, $R2", + [(operator cls1:$R1, cls2:$R2)]>; class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRI<opcode, (outs), (ins cls:$src1, imm:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, imm:$src2)]>; + : InstRI<opcode, (outs), (ins cls:$R1, imm:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, imm:$I2)]>; class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRIL<opcode, (outs), (ins cls:$src1, imm:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, imm:$src2)]>; + : InstRIL<opcode, (outs), (ins cls:$R1, imm:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, imm:$I2)]>; class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load> - : InstRIL<opcode, (outs), (ins cls:$src1, pcrel32:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, (load pcrel32:$src2))]> { + : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, (load pcrel32:$I2))]> { let mayLoad = 1; // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more @@ -775,26 +753,26 @@ class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, AddressingMode mode = bdxaddr12only> - : InstRX<opcode, (outs), (ins cls:$src1, mode:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, (load mode:$src2))]> { + : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, (load mode:$XBD2))]> { let mayLoad = 1; } class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load> - : InstRXE<opcode, (outs), (ins cls:$src1, bdxaddr12only:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, (load bdxaddr12only:$src2))]> { + : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, (load bdxaddr12only:$XBD2))]> { let mayLoad = 1; } class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, AddressingMode mode = bdxaddr20only> - : InstRXY<opcode, (outs), (ins cls:$src1, mode:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, (load mode:$src2))]> { + : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, (load mode:$XBD2))]> { let mayLoad = 1; } @@ -814,26 +792,26 @@ multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, SDPatternOperator load, Immediate imm, AddressingMode mode = bdaddr12only> - : InstSI<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator (load mode:$addr), imm:$src)]> { + : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator (load mode:$BD1), imm:$I2)]> { let mayLoad = 1; } class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, SDPatternOperator load, Immediate imm> - : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator (load bdaddr12only:$addr), imm:$src)]> { + : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator (load bdaddr12only:$BD1), imm:$I2)]> { let mayLoad = 1; } class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, SDPatternOperator load, Immediate imm, AddressingMode mode = bdaddr20only> - : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator (load mode:$addr), imm:$src)]> { + : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator (load mode:$BD1), imm:$I2)]> { let mayLoad = 1; } @@ -851,43 +829,43 @@ multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, class TernaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls> - : InstRRD<opcode, (outs cls:$dst), (ins cls:$src1, cls:$src2, cls:$src3), - mnemonic#"\t$dst, $src2, $src3", - [(set cls:$dst, (operator cls:$src1, cls:$src2, cls:$src3))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2), + mnemonic#"\t$R1, $R3, $R2", + [(set cls:$R1, (operator cls:$R1src, cls:$R3, cls:$R2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load> - : InstRXF<opcode, (outs cls:$dst), - (ins cls:$src1, cls:$src2, bdxaddr12only:$src3), - mnemonic#"\t$dst, $src2, $src3", - [(set cls:$dst, (operator cls:$src1, cls:$src2, - (load bdxaddr12only:$src3)))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRXF<opcode, (outs cls:$R1), + (ins cls:$R1src, cls:$R3, bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $R3, $XBD2", + [(set cls:$R1, (operator cls:$R1src, cls:$R3, + (load bdxaddr12only:$XBD2)))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; } class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdaddr12only> - : InstRS<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr), - mnemonic#"\t$dst, $new, $ptr", - [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> { - let Constraints = "$old = $dst"; - let DisableEncoding = "$old"; + : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; let mayStore = 1; } class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdaddr20only> - : InstRSY<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr), - mnemonic#"\t$dst, $new, $ptr", - [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> { - let Constraints = "$old = $dst"; - let DisableEncoding = "$old"; + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; let mayStore = 1; } @@ -904,12 +882,12 @@ multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2> - : InstRIEf<opcode, (outs cls1:$dst), - (ins cls1:$src1, cls2:$src2, - uimm8zx6:$imm1, uimm8zx6:$imm2, uimm8zx6:$imm3), - mnemonic#"\t$dst, $src2, $imm1, $imm2, $imm3", []> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRIEf<opcode, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, + uimm8zx6:$I3, uimm8zx6:$I4, uimm8zx6:$I5), + mnemonic#"\t$R1, $R2, $I3, $I4, $I5", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 7ffa382d36..903fb740a4 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -42,20 +42,19 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, // Unconditional branches. R1 is the condition-code mask (all 1s). let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in { let isIndirectBranch = 1 in - def BR : InstRR<0x07, (outs), (ins ADDR64:$dst), - "br\t$dst", [(brind ADDR64:$dst)]>; + def BR : InstRR<0x07, (outs), (ins ADDR64:$R2), + "br\t$R2", [(brind ADDR64:$R2)]>; // An assembler extended mnemonic for BRC. Use a separate instruction for // the asm parser, so that we don't relax Js to external symbols into JGs. let isCodeGenOnly = 1 in - def J : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>; - let isAsmParserOnly = 1 in - def AsmJ : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>; + def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2", []>; + def AsmJ : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2", []>; // An assembler extended mnemonic for BRCL. (The extension is "G" // rather than "L" because "JL" is "Jump if Less".) - def JG : InstRIL<0xC04, (outs), (ins brtarget32:$dst), - "jg\t$dst", [(br bb:$dst)]>; + def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), + "jg\t$I2", [(br bb:$I2)]>; } // Conditional branches. It's easier for LLVM to handle these branches @@ -64,42 +63,39 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in { // JE and JLH when writing out the assembly though. multiclass CondBranches<Operand imm, string short, string long> { let isBranch = 1, isTerminator = 1, Uses = [PSW] in { - def "" : InstRI<0xA74, (outs), (ins imm:$cond, brtarget16:$dst), short, []>; - def L : InstRIL<0xC04, (outs), (ins imm:$cond, brtarget32:$dst), long, []>; + def "" : InstRI<0xA74, (outs), (ins imm:$R1, brtarget16:$I2), short, []>; + def L : InstRIL<0xC04, (outs), (ins imm:$R1, brtarget32:$I2), long, []>; } } let isCodeGenOnly = 1 in - defm BRC : CondBranches<cond4, "j$cond\t$dst", "jg$cond\t$dst">; -let isAsmParserOnly = 1 in - defm AsmBRC : CondBranches<uimm8zx4, "brc\t$cond, $dst", "brcl\t$cond, $dst">; + defm BRC : CondBranches<cond4, "j$R1\t$I2", "jg$R1\t$I2">; +defm AsmBRC : CondBranches<uimm8zx4, "brc\t$R1, $I2", "brcl\t$R1, $I2">; def : Pat<(z_br_ccmask cond4:$cond, bb:$dst), (BRCL cond4:$cond, bb:$dst)>; // Define AsmParser mnemonics for each condition code. multiclass CondExtendedMnemonic<bits<4> Cond, string name> { let R1 = Cond in { - def "" : InstRI<0xA74, (outs), (ins brtarget16:$dst), - "j"##name##"\t$dst", []>; - def L : InstRIL<0xC04, (outs), (ins brtarget32:$dst), - "jg"##name##"\t$dst", []>; + def "" : InstRI<0xA74, (outs), (ins brtarget16:$I2), + "j"##name##"\t$I2", []>; + def L : InstRIL<0xC04, (outs), (ins brtarget32:$I2), + "jg"##name##"\t$I2", []>; } } -let isAsmParserOnly = 1 in { - defm AsmJO : CondExtendedMnemonic<1, "o">; - defm AsmJH : CondExtendedMnemonic<2, "h">; - defm AsmJNLE : CondExtendedMnemonic<3, "nle">; - defm AsmJL : CondExtendedMnemonic<4, "l">; - defm AsmJNHE : CondExtendedMnemonic<5, "nhe">; - defm AsmJLH : CondExtendedMnemonic<6, "lh">; - defm AsmJNE : CondExtendedMnemonic<7, "ne">; - defm AsmJE : CondExtendedMnemonic<8, "e">; - defm AsmJNLH : CondExtendedMnemonic<9, "nlh">; - defm AsmJHE : CondExtendedMnemonic<10, "he">; - defm AsmJNL : CondExtendedMnemonic<11, "nl">; - defm AsmJLE : CondExtendedMnemonic<12, "le">; - defm AsmJNH : CondExtendedMnemonic<13, "nh">; - defm AsmJNO : CondExtendedMnemonic<14, "no">; -} +defm AsmJO : CondExtendedMnemonic<1, "o">; +defm AsmJH : CondExtendedMnemonic<2, "h">; +defm AsmJNLE : CondExtendedMnemonic<3, "nle">; +defm AsmJL : CondExtendedMnemonic<4, "l">; +defm AsmJNHE : CondExtendedMnemonic<5, "nhe">; +defm AsmJLH : CondExtendedMnemonic<6, "lh">; +defm AsmJNE : CondExtendedMnemonic<7, "ne">; +defm AsmJE : CondExtendedMnemonic<8, "e">; +defm AsmJNLH : CondExtendedMnemonic<9, "nlh">; +defm AsmJHE : CondExtendedMnemonic<10, "he">; +defm AsmJNL : CondExtendedMnemonic<11, "nl">; +defm AsmJLE : CondExtendedMnemonic<12, "le">; +defm AsmJNH : CondExtendedMnemonic<13, "nh">; +defm AsmJNO : CondExtendedMnemonic<14, "no">; def Select32 : SelectWrapper<GR32>; def Select64 : SelectWrapper<GR64>; @@ -112,24 +108,22 @@ def Select64 : SelectWrapper<GR64>; let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D, F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D], R1 = 14, isCodeGenOnly = 1 in { - def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$dst, variable_ops), - "bras\t%r14, $dst", []>; - def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$dst, variable_ops), - "brasl\t%r14, $dst", [(z_call pcrel32call:$dst)]>; - def BASR : InstRR<0x0D, (outs), (ins ADDR64:$dst, variable_ops), - "basr\t%r14, $dst", [(z_call ADDR64:$dst)]>; + def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$I2, variable_ops), + "bras\t%r14, $I2", []>; + def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$I2, variable_ops), + "brasl\t%r14, $I2", [(z_call pcrel32call:$I2)]>; + def BASR : InstRR<0x0D, (outs), (ins ADDR64:$R2, variable_ops), + "basr\t%r14, $R2", [(z_call ADDR64:$R2)]>; } // Define the general form of the call instructions for the asm parser. // These instructions don't hard-code %r14 as the return address register. -let isAsmParserOnly = 1 in { - def AsmBRAS : InstRI<0xA75, (outs), (ins GR64:$save, brtarget16:$dst), - "bras\t$save, $dst", []>; - def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$save, brtarget32:$dst), - "brasl\t$save, $dst", []>; - def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$save, ADDR64:$dst), - "basr\t$save, $dst", []>; -} +def AsmBRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2), + "bras\t$R1, $I2", []>; +def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2), + "brasl\t$R1, $I2", []>; +def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2), + "basr\t$R1, $R2", []>; //===----------------------------------------------------------------------===// // Move instructions @@ -337,21 +331,21 @@ def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap>, GR64>; // Load BDX-style addresses. let neverHasSideEffects = 1, Function = "la" in { let PairType = "12" in - def LA : InstRX<0x41, (outs GR64:$dst), (ins laaddr12pair:$src), - "la\t$dst, $src", - [(set GR64:$dst, laaddr12pair:$src)]>; + def LA : InstRX<0x41, (outs GR64:$R1), (ins laaddr12pair:$XBD2), + "la\t$R1, $XBD2", + [(set GR64:$R1, laaddr12pair:$XBD2)]>; let PairType = "20" in - def LAY : InstRXY<0xE371, (outs GR64:$dst), (ins laaddr20pair:$src), - "lay\t$dst, $src", - [(set GR64:$dst, laaddr20pair:$src)]>; + def LAY : InstRXY<0xE371, (outs GR64:$R1), (ins laaddr20pair:$XBD2), + "lay\t$R1, $XBD2", + [(set GR64:$R1, laaddr20pair:$XBD2)]>; } // Load a PC-relative address. There's no version of this instruction // with a 16-bit offset, so there's no relaxation. let neverHasSideEffects = 1 in { - def LARL : InstRIL<0xC00, (outs GR64:$dst), (ins pcrel32:$src), - "larl\t$dst, $src", - [(set GR64:$dst, pcrel32:$src)]>; + def LARL : InstRIL<0xC00, (outs GR64:$R1), (ins pcrel32:$I2), + "larl\t$R1, $I2", + [(set GR64:$R1, pcrel32:$I2)]>; } //===----------------------------------------------------------------------===// @@ -484,6 +478,7 @@ let Defs = [PSW] in { def SGR : BinaryRRE<"sgr", 0xB909, sub, GR64, GR64>; // Subtraction of memory. + defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, sextloadi16>; defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load>; def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32>; def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load>; @@ -903,9 +898,9 @@ let Defs = [PSW] in { // Read a 32-bit access register into a GR32. As with all GR32 operations, // the upper 32 bits of the enclosing GR64 remain unchanged, which is useful // when a 64-bit address is stored in a pair of access registers. -def EAR : InstRRE<0xB24F, (outs GR32:$dst), (ins access_reg:$src), - "ear\t$dst, $src", - [(set GR32:$dst, (z_extract_access access_reg:$src))]>; +def EAR : InstRRE<0xB24F, (outs GR32:$R1), (ins access_reg:$R2), + "ear\t$R1, $R2", + [(set GR32:$R1, (z_extract_access access_reg:$R2))]>; // Find leftmost one, AKA count leading zeros. The instruction actually // returns a pair of GR64s, the first giving the number of leading zeros diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index 0abc3f7517..66d9c5fceb 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -24,14 +24,30 @@ class ImmediateAsmOperand<string name> class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop> : PatLeaf<(vt imm), pred, xform>, Operand<vt> { let PrintMethod = "print"##asmop##"Operand"; + let DecoderMethod = "decode"##asmop##"Operand"; let ParserMatchClass = !cast<AsmOperandClass>(asmop); } +// Constructs an asm operand for a PC-relative address. SIZE says how +// many bits there are. +class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"##size> { + let PredicateMethod = "isImm"; + let ParserMethod = "parsePCRel"##size; +} + +// Constructs an operand for a PC-relative address with address type VT. +// ASMOP is the associated asm operand. +class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> { + let PrintMethod = "printPCRelOperand"; + let ParserMatchClass = asmop; +} + // Constructs both a DAG pattern and instruction operand for a PC-relative -// address with address size VT. SELF is the name of the operand. -class PCRelAddress<ValueType vt, string self> +// address with address size VT. SELF is the name of the operand and +// ASMOP is the associated asm operand. +class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop> : ComplexPattern<vt, 1, "selectPCRelAddress", [z_pcrel_wrapper]>, - Operand<vt> { + PCRelOperand<vt, asmop> { let MIOperandInfo = (ops !cast<Operand>(self)); } @@ -45,11 +61,14 @@ class AddressAsmOperand<string format, string bitsize, string dispsize> } // Constructs both a DAG pattern and instruction operand for an addressing mode. -// The mode is selected by custom code in selectTYPE...SUFFIX(). The address -// registers have BITSIZE bits and displacements have DISPSIZE bits. NUMOPS is -// the number of operands that make up an address and OPERANDS lists the types -// of those operands using (ops ...). FORMAT is the type of addressing mode, -// which needs to match the names used in AddressAsmOperand. +// The mode is selected by custom code in select<TYPE><DISPSIZE><SUFFIX>(), +// encoded by custom code in get<FORMAT><DISPSIZE>Encoding() and decoded +// by custom code in decode<TYPE><BITSIZE>Disp<DISPSIZE>Operand(). +// The address registers have BITSIZE bits and displacements have +// DISPSIZE bits. NUMOPS is the number of operands that make up an +// address and OPERANDS lists the types of those operands using (ops ...). +// FORMAT is the type of addressing mode, which needs to match the names +// used in AddressAsmOperand. class AddressingMode<string type, string bitsize, string dispsize, string suffix, int numops, string format, dag operands> : ComplexPattern<!cast<ValueType>("i"##bitsize), numops, @@ -57,6 +76,8 @@ class AddressingMode<string type, string bitsize, string dispsize, [add, sub, or, frameindex, z_adjdynalloc]>, Operand<!cast<ValueType>("i"##bitsize)> { let PrintMethod = "print"##format##"Operand"; + let EncoderMethod = "get"##format##dispsize##"Encoding"; + let DecoderMethod = "decode"##format##bitsize##"Disp"##dispsize##"Operand"; let MIOperandInfo = operands; let ParserMatchClass = !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize); @@ -334,30 +355,39 @@ def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>; // Symbolic address operands //===----------------------------------------------------------------------===// +// PC-relative asm operands. +def PCRel16 : PCRelAsmOperand<"16">; +def PCRel32 : PCRelAsmOperand<"32">; + // PC-relative offsets of a basic block. The offset is sign-extended // and multiplied by 2. -def brtarget16 : Operand<OtherVT> { +def brtarget16 : PCRelOperand<OtherVT, PCRel16> { let EncoderMethod = "getPC16DBLEncoding"; + let DecoderMethod = "decodePC16DBLOperand"; } -def brtarget32 : Operand<OtherVT> { +def brtarget32 : PCRelOperand<OtherVT, PCRel32> { let EncoderMethod = "getPC32DBLEncoding"; + let DecoderMethod = "decodePC32DBLOperand"; } // A PC-relative offset of a global value. The offset is sign-extended // and multiplied by 2. -def pcrel32 : PCRelAddress<i64, "pcrel32"> { +def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> { let EncoderMethod = "getPC32DBLEncoding"; + let DecoderMethod = "decodePC32DBLOperand"; } // A PC-relative offset of a global value when the value is used as a // call target. The offset is sign-extended and multiplied by 2. -def pcrel16call : PCRelAddress<i64, "pcrel16call"> { +def pcrel16call : PCRelAddress<i64, "pcrel16call", PCRel16> { let PrintMethod = "printCallOperand"; let EncoderMethod = "getPLT16DBLEncoding"; + let DecoderMethod = "decodePC16DBLOperand"; } -def pcrel32call : PCRelAddress<i64, "pcrel32call"> { +def pcrel32call : PCRelAddress<i64, "pcrel32call", PCRel32> { let PrintMethod = "printCallOperand"; let EncoderMethod = "getPLT32DBLEncoding"; + let DecoderMethod = "decodePC32DBLOperand"; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 8c4c456ef5..17450ee53e 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -33,6 +33,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT, "-f32:32-f64:64-f128:64-a0:8:16-n32:64"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this, Subtarget) { + initAsmInfo(); } namespace { diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 019a670083..263eb5ed9c 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1196,6 +1196,7 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites, } } assert (Found && "Unable to rewrite ImmDisp."); + (void)Found; } else { // We have a symbolic and an immediate displacement, but no displacement // before the bracketed expression. Put the immediate displacement diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 226ebca8cb..d5aab8e0a2 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -263,7 +263,7 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) { return X; } -static MCAsmInfo *createX86MCAsmInfo(StringRef TT) { +static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); bool is64Bit = TheTriple.getArch() == Triple::x86_64; @@ -290,14 +290,16 @@ static MCAsmInfo *createX86MCAsmInfo(StringRef TT) { int stackGrowth = is64Bit ? -8 : -4; // Initial state of the frame pointer is esp+stackGrowth. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(is64Bit ? X86::RSP : X86::ESP, stackGrowth); - MAI->addInitialFrameState(0, Dst, Src); + unsigned StackPtr = is64Bit ? X86::RSP : X86::ESP; + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa( + 0, MRI.getDwarfRegNum(StackPtr, true), -stackGrowth); + MAI->addInitialFrameState(Inst); // Add return address to move list - MachineLocation CSDst(is64Bit ? X86::RSP : X86::ESP, stackGrowth); - MachineLocation CSSrc(is64Bit ? X86::RIP : X86::EIP); - MAI->addInitialFrameState(0, CSDst, CSSrc); + unsigned InstPtr = is64Bit ? X86::RIP : X86::EIP; + MCCFIInstruction Inst2 = MCCFIInstruction::createOffset( + 0, MRI.getDwarfRegNum(InstPtr, true), stackGrowth); + MAI->addInitialFrameState(Inst2); return MAI; } diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 49721df7c1..07314a092c 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -357,21 +357,21 @@ defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw, defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b, MMX_INTALU_ITINS>; defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS>; defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS>; defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q, - MMX_INTALUQ_ITINS, 1>; + MMX_INTALUQ_ITINS>; defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS>; defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS>; defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS>; defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS>; defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w, MMX_PHADDSUBW>; diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 00fa47f80b..0422a61fb8 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -49,6 +49,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, TLInfo(*this), TSInfo(*this), JITInfo(*this) { + initAsmInfo(); } void X86_64TargetMachine::anchor() { } @@ -69,6 +70,7 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, TLInfo(*this), TSInfo(*this), JITInfo(*this) { + initAsmInfo(); } /// X86TargetMachine ctor - Create an X86 target. diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index e38da34a81..10bb6dfa92 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -51,13 +51,13 @@ static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -static MCAsmInfo *createXCoreMCAsmInfo(StringRef TT) { +static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI, + StringRef TT) { MCAsmInfo *MAI = new XCoreMCAsmInfo(TT); // Initial state of the frame pointer is SP. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(XCore::SP, 0); - MAI->addInitialFrameState(0, Dst, Src); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, XCore::SP, 0); + MAI->addInitialFrameState(Inst); return MAI; } diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 07e5fff141..3ef1520c71 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -33,6 +33,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this) { + initAsmInfo(); } namespace { diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 0ef900e2b9..4a9cb27b03 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -3323,8 +3323,6 @@ bool GlobalOpt::runOnModule(Module &M) { // Try to find the llvm.globalctors list. GlobalVariable *GlobalCtors = FindGlobalCtors(M); - Function *CXAAtExitFn = FindCXAAtExit(M, TLI); - bool LocalChange = true; while (LocalChange) { LocalChange = false; @@ -3342,7 +3340,9 @@ bool GlobalOpt::runOnModule(Module &M) { // Resolve aliases, when possible. LocalChange |= OptimizeGlobalAliases(M); - // Try to remove trivial global destructors. + // Try to remove trivial global destructors if they are not removed + // already. + Function *CXAAtExitFn = FindCXAAtExit(M, TLI); if (CXAAtExitFn) LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 87d56214a3..51ca29bc07 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -846,7 +846,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { /// FP value and: /// 1) 1/C is exact, or /// 2) reciprocal is allowed. -/// If the convertion was successful, the simplified expression "X * 1/C" is +/// If the conversion was successful, the simplified expression "X * 1/C" is /// returned; otherwise, NULL is returned. /// static Instruction *CvtFDivConstToReciprocal(Value *Dividend, diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 43e2e20035..4bf25facc6 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -107,6 +107,12 @@ namespace { return std::make_pair(Vector.begin() + Pair.first->second, false); } + iterator find(const KeyT &Key) { + typename MapTy::iterator It = Map.find(Key); + if (It == Map.end()) return Vector.end(); + return Vector.begin() + It->second; + } + const_iterator find(const KeyT &Key) const { typename MapTy::const_iterator It = Map.find(Key); if (It == Map.end()) return Vector.end(); @@ -253,6 +259,40 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { return false; } +/// This is a wrapper around getUnderlyingObjCPtr along the lines of +/// GetUnderlyingObjects except that it returns early when it sees the first +/// alloca. +static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) { + SmallPtrSet<const Value *, 4> Visited; + SmallVector<const Value *, 4> Worklist; + Worklist.push_back(V); + do { + const Value *P = Worklist.pop_back_val(); + P = GetUnderlyingObjCPtr(P); + + if (isa<AllocaInst>(P)) + return true; + + if (!Visited.insert(P)) + continue; + + if (const SelectInst *SI = dyn_cast<const SelectInst>(P)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + if (const PHINode *PN = dyn_cast<const PHINode>(P)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + Worklist.push_back(PN->getIncomingValue(i)); + continue; + } + } while (!Worklist.empty()); + + return false; +} + + /// @} /// /// \defgroup ARCOpt ARC Optimization. @@ -300,18 +340,18 @@ STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); STATISTIC(NumRets, "Number of return value forwarding " - "retain+autoreleaes eliminated"); + "retain+autoreleases eliminated"); STATISTIC(NumRRs, "Number of retain+release paths eliminated"); STATISTIC(NumPeeps, "Number of calls peephole-optimized"); +#ifndef NDEBUG STATISTIC(NumRetainsBeforeOpt, - "Number of retains before optimization."); + "Number of retains before optimization"); STATISTIC(NumReleasesBeforeOpt, - "Number of releases before optimization."); -#ifndef NDEBUG + "Number of releases before optimization"); STATISTIC(NumRetainsAfterOpt, - "Number of retains after optimization."); + "Number of retains after optimization"); STATISTIC(NumReleasesAfterOpt, - "Number of releases after optimization."); + "Number of releases after optimization"); #endif namespace { @@ -414,8 +454,18 @@ namespace { /// sequence. SmallPtrSet<Instruction *, 2> ReverseInsertPts; + /// Does this pointer have multiple owners? + /// + /// In the presence of multiple owners with the same provenance caused by + /// allocas, we can not assume that the frontend will emit balanced code + /// since it could put the release on the pointer loaded from the + /// alloca. This confuses the optimizer so we must be more conservative in + /// that case. + bool MultipleOwners; + RRInfo() : - KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {} + KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0), + MultipleOwners(false) {} void clear(); @@ -428,6 +478,7 @@ namespace { void RRInfo::clear() { KnownSafe = false; IsTailCallRelease = false; + MultipleOwners = false; ReleaseMetadata = 0; Calls.clear(); ReverseInsertPts.clear(); @@ -457,10 +508,12 @@ namespace { Seq(S_None) {} void SetKnownPositiveRefCount() { + DEBUG(dbgs() << "Setting Known Positive.\n"); KnownPositiveRefCount = true; } void ClearKnownPositiveRefCount() { + DEBUG(dbgs() << "Clearing Known Positive.\n"); KnownPositiveRefCount = false; } @@ -516,6 +569,7 @@ PtrState::Merge(const PtrState &Other, bool TopDown) { RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease; RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); + RRI.MultipleOwners |= Other.RRI.MultipleOwners; // Merge the insert point sets. If there are any differences, // that makes this a partial merge. @@ -587,14 +641,26 @@ namespace { /// definition. void SetAsExit() { BottomUpPathCount = 1; } + /// Attempt to find the PtrState object describing the top down state for + /// pointer Arg. Return a new initialized PtrState describing the top down + /// state for Arg if we do not find one. PtrState &getPtrTopDownState(const Value *Arg) { return PerPtrTopDown[Arg]; } + /// Attempt to find the PtrState object describing the bottom up state for + /// pointer Arg. Return a new initialized PtrState describing the bottom up + /// state for Arg if we do not find one. PtrState &getPtrBottomUpState(const Value *Arg) { return PerPtrBottomUp[Arg]; } + /// Attempt to find the PtrState object describing the bottom up state for + /// pointer Arg. + ptr_iterator findPtrBottomUpState(const Value *Arg) { + return PerPtrBottomUp.find(Arg); + } + void clearBottomUpPointers() { PerPtrBottomUp.clear(); } @@ -1440,11 +1506,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_RetainBlock: // If we strength reduce an objc_retainBlock to an objc_retain, continue // onto the objc_retain peephole optimizations. Otherwise break. - if (!OptimizeRetainBlockCall(F, Inst, Class)) - break; - // FALLTHROUGH - case IC_Retain: - ++NumRetainsBeforeOpt; + OptimizeRetainBlockCall(F, Inst, Class); break; case IC_RetainRV: if (OptimizeRetainRVCall(F, Inst)) @@ -1453,9 +1515,6 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_AutoreleaseRV: OptimizeAutoreleaseRVCall(F, Inst, Class); break; - case IC_Release: - ++NumReleasesBeforeOpt; - break; } // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. @@ -1866,6 +1925,28 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, case IC_None: // These are irrelevant. return NestingDetected; + case IC_User: + // If we have a store into an alloca of a pointer we are tracking, the + // pointer has multiple owners implying that we must be more conservative. + // + // This comes up in the context of a pointer being ``KnownSafe''. In the + // presense of a block being initialized, the frontend will emit the + // objc_retain on the original pointer and the release on the pointer loaded + // from the alloca. The optimizer will through the provenance analysis + // realize that the two are related, but since we only require KnownSafe in + // one direction, will match the inner retain on the original pointer with + // the guard release on the original pointer. This is fixed by ensuring that + // in the presense of allocas we only unconditionally remove pointers if + // both our retain and our release are KnownSafe. + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand())) { + BBState::ptr_iterator I = MyStates.findPtrBottomUpState( + StripPointerCastsAndObjCCalls(SI->getValueOperand())); + if (I != MyStates.bottom_up_ptr_end()) + I->second.RRI.MultipleOwners = true; + } + } + break; default: break; } @@ -2412,8 +2493,10 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> bool KnownSafe, bool &AnyPairsCompletelyEliminated) { // If a pair happens in a region where it is known that the reference count - // is already incremented, we can similarly ignore possible decrements. + // is already incremented, we can similarly ignore possible decrements unless + // we are dealing with a retainable object with multiple provenance sources. bool KnownSafeTD = true, KnownSafeBU = true; + bool MultipleOwners = false; // Connect the dots between the top-down-collected RetainsToMove and // bottom-up-collected ReleasesToMove to form sets of related calls. @@ -2432,6 +2515,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> assert(It != Retains.end()); const RRInfo &NewRetainRRI = It->second; KnownSafeTD &= NewRetainRRI.KnownSafe; + MultipleOwners |= NewRetainRRI.MultipleOwners; for (SmallPtrSet<Instruction *, 2>::const_iterator LI = NewRetainRRI.Calls.begin(), LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { @@ -2525,9 +2609,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> if (NewRetains.empty()) break; } - // If the pointer is known incremented or nested, we can safely delete the - // pair regardless of what's between them. - if (KnownSafeTD || KnownSafeBU) { + // If the pointer is known incremented in 1 direction and we do not have + // MultipleOwners, we can safely remove the retain/releases. Otherwise we need + // to be known safe in both directions. + bool UnconditionallySafe = (KnownSafeTD && KnownSafeBU) || + ((KnownSafeTD || KnownSafeBU) && !MultipleOwners); + if (UnconditionallySafe) { RetainsToMove.ReverseInsertPts.clear(); ReleasesToMove.ReverseInsertPts.clear(); NewCount = 0; @@ -3050,6 +3137,12 @@ bool ObjCARCOpt::runOnFunction(Function &F) { PA.setAA(&getAnalysis<AliasAnalysis>()); +#ifndef NDEBUG + if (AreStatisticsEnabled()) { + GatherStatistics(F, false); + } +#endif + // This pass performs several distinct transformations. As a compile-time aid // when compiling code that isn't ObjC, skip these if the relevant ObjC // library functions aren't declared. diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 0dd6abb1ae..58a1a74655 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -318,6 +318,93 @@ private: ValueMap WidenMap; }; +/// \brief Check if conditionally executed loads are hoistable. +/// +/// This class has two functions: isHoistableLoad and canHoistAllLoads. +/// isHoistableLoad should be called on all load instructions that are executed +/// conditionally. After all conditional loads are processed, the client should +/// call canHoistAllLoads to determine if all of the conditional executed loads +/// have an unconditional memory access to the same memory address in the loop. +class LoadHoisting { + typedef SmallPtrSet<Value *, 8> MemorySet; + + Loop *TheLoop; + DominatorTree *DT; + MemorySet CondLoadAddrSet; + +public: + LoadHoisting(Loop *L, DominatorTree *D) : TheLoop(L), DT(D) {} + + /// \brief Check if the instruction is a load with a identifiable address. + bool isHoistableLoad(Instruction *L); + + /// \brief Check if all of the conditional loads are hoistable because there + /// exists an unconditional memory access to the same address in the loop. + bool canHoistAllLoads(); +}; + +bool LoadHoisting::isHoistableLoad(Instruction *L) { + LoadInst *LI = dyn_cast<LoadInst>(L); + if (!LI) + return false; + + CondLoadAddrSet.insert(LI->getPointerOperand()); + return true; +} + +static void addMemAccesses(BasicBlock *BB, SmallPtrSet<Value *, 8> &Set) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { + Instruction *I = &*BI; + Value *Addr = 0; + + // Try a load. + LoadInst *LI = dyn_cast<LoadInst>(I); + if (LI) { + Addr = LI->getPointerOperand(); + Set.insert(Addr); + continue; + } + + // Try a store. + StoreInst *SI = dyn_cast<StoreInst>(I); + if (!SI) + continue; + + Addr = SI->getPointerOperand(); + Set.insert(Addr); + } +} + +bool LoadHoisting::canHoistAllLoads() { + // No conditional loads. + if (CondLoadAddrSet.empty()) + return true; + + MemorySet UncondMemAccesses; + std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector(); + BasicBlock *LoopLatch = TheLoop->getLoopLatch(); + + // Iterate over the unconditional blocks and collect memory access addresses. + for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) { + BasicBlock *BB = LoopBlocks[i]; + + // Ignore conditional blocks. + if (BB != LoopLatch && !DT->dominates(BB, LoopLatch)) + continue; + + addMemAccesses(BB, UncondMemAccesses); + } + + // And make sure there is a matching unconditional access for every + // conditional load. + for (MemorySet::iterator MI = CondLoadAddrSet.begin(), + ME = CondLoadAddrSet.end(); MI != ME; ++MI) + if (!UncondMemAccesses.count(*MI)) + return false; + + return true; +} + /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and /// to what vectorization factor. /// This class does not look at the profitability of vectorization, only the @@ -337,7 +424,8 @@ public: DominatorTree *DT, TargetTransformInfo* TTI, AliasAnalysis *AA, TargetLibraryInfo *TLI) : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI), - Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false) {} + Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false), + LoadSpeculation(L, DT) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -598,6 +686,9 @@ private: RuntimePointerCheck PtrRtCheck; /// Can we assume the absence of NaNs. bool HasFunNoNaNAttr; + + /// Utility to determine whether loads can be speculated. + LoadHoisting LoadSpeculation; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -1389,9 +1480,10 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { case LoopVectorizationLegality::IK_NoInduction: llvm_unreachable("Unknown induction"); case LoopVectorizationLegality::IK_IntInduction: { - // Handle the integer induction counter: + // Handle the integer induction counter. assert(OrigPhi->getType()->isIntegerTy() && "Invalid type"); - assert(OrigPhi == OldInduction && "Unknown integer PHI"); + + // We have the canonical induction variable. if (OrigPhi == OldInduction) { // Create a truncated version of the resume value for the scalar loop, // we might have promoted the type to a larger width. @@ -1402,11 +1494,20 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); TruncResumeVal->addIncoming(EndValue, VecBody); + + // We know what the end value is. + EndValue = IdxEndRoundDown; + // We also know which PHI node holds it. + ResumeIndex = ResumeVal; + break; } - // We know what the end value is. - EndValue = IdxEndRoundDown; - // We also know which PHI node holds it. - ResumeIndex = ResumeVal; + + // Not the canonical induction variable - add the vector loop count to the + // start value. + Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown, + II.StartValue->getType(), + "cast.crd"); + EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end"); break; } case LoopVectorizationLegality::IK_ReverseIntInduction: { @@ -2056,12 +2157,25 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, case LoopVectorizationLegality::IK_NoInduction: llvm_unreachable("Unknown induction"); case LoopVectorizationLegality::IK_IntInduction: { - assert(P == OldInduction && "Unexpected PHI"); - // We might have had to extend the type. - Value *Trunc = Builder.CreateTrunc(Induction, P->getType()); - Value *Broadcasted = getBroadcastInstrs(Trunc); - // After broadcasting the induction variable we need to make the - // vector consecutive by adding 0, 1, 2 ... + assert(P->getType() == II.StartValue->getType() && "Types must match"); + Type *PhiTy = P->getType(); + Value *Broadcasted; + if (P == OldInduction) { + // Handle the canonical induction variable. We might have had to + // extend the type. + Broadcasted = Builder.CreateTrunc(Induction, PhiTy); + } else { + // Handle other induction variables that are now based on the + // canonical one. + Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx, + "normalized.idx"); + NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy); + Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx, + "offset.idx"); + } + Broadcasted = getBroadcastInstrs(Broadcasted); + // After broadcasting the induction variable we need to make the vector + // consecutive by adding 0, 1, 2, etc. for (unsigned part = 0; part < UF; ++part) Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false); continue; @@ -2466,11 +2580,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Int inductions are special because we only allow one IV. if (IK == IK_IntInduction) { - if (Induction) { - DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n"); - return false; - } - Induction = Phi; + // Use the phi node with the widest type as induction. Use the last + // one if there are multiple (no good reason for doing this other + // than it is expedient). + if (!Induction || PhiTy == WidestIndTy) + Induction = Phi; } DEBUG(dbgs() << "LV: Found an induction variable.\n"); @@ -3236,8 +3350,12 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) { for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - // We don't predicate loads/stores at the moment. - if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow()) + // We might be able to hoist the load. + if (it->mayReadFromMemory() && !LoadSpeculation.isHoistableLoad(it)) + return false; + + // We don't predicate stores at the moment. + if (it->mayWriteToMemory() || it->mayThrow()) return false; // The instructions below can trap. @@ -3251,6 +3369,10 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) { } } + // Check that we can actually speculate the hoistable loads. + if (!LoadSpeculation.canHoistAllLoads()) + return false; + return true; } diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp index 55adf8a816..50d2af0f65 100644 --- a/lib/Transforms/Vectorize/VecUtils.cpp +++ b/lib/Transforms/Vectorize/VecUtils.cpp @@ -282,6 +282,7 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) { DEBUG(dbgs()<<"SLP: Adding to MustExtract " "because of a safe out of tree usage.\n"); MustExtract.insert(*it); + continue; } if (Lane == -1) Lane = LaneMap[*I]; if (Lane != LaneMap[*I]) { @@ -610,6 +611,9 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) { GatherInstructions.push_back(Vec); } + for (unsigned i = 0; i < Ty->getNumElements(); ++i) + VectorizedValues[VL[i]] = Vec; + return Vec; } @@ -617,6 +621,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) { Value *V = vectorizeTree_rec(VL, VF); Instruction *LastInstr = GetLastInstr(VL, VL.size()); + int LastInstrIdx = InstrIdx[LastInstr]; IRBuilder<> Builder(LastInstr); for (ValueSet::iterator it = MustExtract.begin(), e = MustExtract.end(); it != e; ++it) { @@ -625,7 +630,16 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) { assert(LaneMap.count(I) && "Unable to find the lane for the external use"); Value *Idx = Builder.getInt32(LaneMap[I]); Value *Extract = Builder.CreateExtractElement(Vec, Idx); - I->replaceAllUsesWith(Extract); + bool Replaced = false; + for (Value::use_iterator U = I->use_begin(), UE = U->use_end(); U != UE; + ++U) { + Instruction *UI = cast<Instruction>(*U); + if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx) + UI->replaceUsesOfWith(I ,Extract); + Replaced = true; + } + assert(Replaced && "Must replace at least one outside user"); + (void)Replaced; } // We moved some instructions around. We have to number them again @@ -633,6 +647,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) { numberInstructions(); MustScalarize.clear(); MustExtract.clear(); + VectorizedValues.clear(); return V; } @@ -690,7 +705,10 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) { IRBuilder<> Builder(GetLastInstr(VL, VF)); CastInst *CI = dyn_cast<CastInst>(VL0); Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); - VectorizedValues[VL0] = V; + + for (int i = 0; i < VF; ++i) + VectorizedValues[VL[i]] = V; + return V; } case Instruction::Add: @@ -713,16 +731,19 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) { case Instruction::Xor: { ValueList LHSVL, RHSVL; for (int i = 0; i < VF; ++i) { - RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); - LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1)); + LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); + RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1)); } - Value *RHS = vectorizeTree_rec(RHSVL, VF); Value *LHS = vectorizeTree_rec(LHSVL, VF); + Value *RHS = vectorizeTree_rec(RHSVL, VF); IRBuilder<> Builder(GetLastInstr(VL, VF)); BinaryOperator *BinOp = cast<BinaryOperator>(VL0); - Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS); - VectorizedValues[VL0] = V; + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS,RHS); + + for (int i = 0; i < VF; ++i) + VectorizedValues[VL[i]] = V; + return V; } case Instruction::Load: { @@ -739,7 +760,10 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) { VecTy->getPointerTo()); LI = Builder.CreateLoad(VecPtr); LI->setAlignment(Alignment); - VectorizedValues[VL0] = LI; + + for (int i = 0; i < VF; ++i) + VectorizedValues[VL[i]] = LI; + return LI; } case Instruction::Store: { @@ -762,9 +786,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) { return 0; } default: - Value *S = Scalarize(VL, VecTy); - VectorizedValues[VL0] = S; - return S; + return Scalarize(VL, VecTy); } } diff --git a/test/CodeGen/ARM/2013-05-13-DAGCombiner-undef-mask.ll b/test/CodeGen/ARM/2013-05-13-DAGCombiner-undef-mask.ll new file mode 100644 index 0000000000..8f6709ec5e --- /dev/null +++ b/test/CodeGen/ARM/2013-05-13-DAGCombiner-undef-mask.ll @@ -0,0 +1,10 @@ +; RUN: llc < %s +target triple = "armv7-none-linux-gnueabi" + +define <3 x i64> @shuffle(i1 %dec1, i1 %dec0, <3 x i64> %b) { +entry: + %.sink = select i1 %dec1, <3 x i64> %b, <3 x i64> zeroinitializer + %.sink15 = select i1 %dec0, <3 x i64> %b, <3 x i64> zeroinitializer + %vecinit7 = shufflevector <3 x i64> %.sink, <3 x i64> %.sink15, <3 x i32> <i32 0, i32 4, i32 undef> + ret <3 x i64> %vecinit7 +} diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll index 58fbbda0f6..c7e17ea353 100644 --- a/test/CodeGen/ARM/call-tc.ll +++ b/test/CodeGen/ARM/call-tc.ll @@ -162,3 +162,20 @@ define i32 @t9() nounwind { declare %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock*) unnamed_addr nounwind align 2 declare %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock*) unnamed_addr nounwind align 2 + +; rdar://13827621 +; Correctly preserve the input chain for the tailcall node in the bitcast case, +; otherwise the call to floorf is lost. +define float @libcall_tc_test2(float* nocapture %a, float %b) { +; CHECKT2D: libcall_tc_test2: +; CHECKT2D: blx _floorf +; CHECKT2D: b.w _truncf + %1 = load float* %a, align 4 + %call = tail call float @floorf(float %1) + store float %call, float* %a, align 4 + %call1 = tail call float @truncf(float %b) + ret float %call1 +} + +declare float @floorf(float) readnone +declare float @truncf(float) readnone diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll index 60bc6a62f5..28a84e3bf9 100644 --- a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll +++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll @@ -26,8 +26,8 @@ entry: ; THUMB: t2 %addr = alloca i32*, align 4 store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4 -; ARM: movw r1, #1148 -; ARM: add r0, r0, r1 +; ARM: movw [[R:r[0-9]+]], #1148 +; ARM: add r0, r{{[0-9]+}}, [[R]] ; THUMB: addw r0, r0, #1148 %0 = load i32** %addr, align 4 ret i32* %0 diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll index 4e6efd2489..aefe200dc7 100644 --- a/test/CodeGen/ARM/fast-isel-br-const.ll +++ b/test/CodeGen/ARM/fast-isel-br-const.ll @@ -7,8 +7,8 @@ entry: ; ARM: t1: %x = add i32 %a, %b br i1 1, label %if.then, label %if.else -; THUMB-NOT: b LBB0_1 -; ARM-NOT: b LBB0_1 +; THUMB-NOT: b {{\.?}}LBB0_1 +; ARM-NOT: b {{\.?}}LBB0_1 if.then: ; preds = %entry call void @foo1() @@ -16,8 +16,8 @@ if.then: ; preds = %entry if.else: ; preds = %entry br i1 0, label %if.then2, label %if.else3 -; THUMB: b LBB0_4 -; ARM: b LBB0_4 +; THUMB: b {{\.?}}LBB0_4 +; ARM: b {{\.?}}LBB0_4 if.then2: ; preds = %if.else call void @foo2() @@ -26,8 +26,8 @@ if.then2: ; preds = %if.else if.else3: ; preds = %if.else %y = sub i32 %a, %b br i1 1, label %if.then5, label %if.end -; THUMB-NOT: b LBB0_5 -; ARM-NOT: b LBB0_5 +; THUMB-NOT: b {{\.?}}LBB0_5 +; ARM-NOT: b {{\.?}}LBB0_5 if.then5: ; preds = %if.else3 call void @foo1() diff --git a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll index b6f201728c..46d5f997c6 100644 --- a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll +++ b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB ; Fast-isel can't handle non-double multi-reg retvals. ; This test just check to make sure we don't hit the assert in FinishCall. diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll index b6c9098613..6ee2c349ab 100644 --- a/test/CodeGen/ARM/fast-isel-call.ll +++ b/test/CodeGen/ARM/fast-isel-call.ll @@ -2,8 +2,12 @@ ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG -; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP -; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP + +; Note that some of these tests assume that relocations are either +; movw/movt or constant pool loads. Different platforms will select +; different approaches. define i32 @t0(i1 zeroext %a) nounwind { %1 = zext i1 %a to i32 @@ -88,53 +92,53 @@ declare zeroext i1 @t9(); define i32 @t10(i32 %argc, i8** nocapture %argv) { entry: ; ARM: @t10 -; ARM: movw r0, #0 -; ARM: movw r1, #248 -; ARM: movw r2, #187 -; ARM: movw r3, #28 -; ARM: movw r9, #40 -; ARM: movw r12, #186 -; ARM: uxtb r0, r0 -; ARM: uxtb r1, r1 -; ARM: uxtb r2, r2 -; ARM: uxtb r3, r3 -; ARM: uxtb r9, r9 -; ARM: str r9, [sp] -; ARM: uxtb r9, r12 -; ARM: str r9, [sp, #4] -; ARM: bl _bar +; ARM: movw [[R0:l?r[0-9]*]], #0 +; ARM: movw [[R1:l?r[0-9]*]], #248 +; ARM: movw [[R2:l?r[0-9]*]], #187 +; ARM: movw [[R3:l?r[0-9]*]], #28 +; ARM: movw [[R4:l?r[0-9]*]], #40 +; ARM: movw [[R5:l?r[0-9]*]], #186 +; ARM: uxtb [[R0]], [[R0]] +; ARM: uxtb [[R1]], [[R1]] +; ARM: uxtb [[R2]], [[R2]] +; ARM: uxtb [[R3]], [[R3]] +; ARM: uxtb [[R4]], [[R4]] +; ARM: str [[R4]], [sp] +; ARM: uxtb [[R4]], [[R5]] +; ARM: str [[R4]], [sp, #4] +; ARM: bl {{_?}}bar ; ARM-LONG: @t10 -; ARM-LONG: movw lr, :lower16:L_bar$non_lazy_ptr -; ARM-LONG: movt lr, :upper16:L_bar$non_lazy_ptr -; ARM-LONG: ldr lr, [lr] -; ARM-LONG: blx lr +; ARM-LONG: {{(movw)|(ldr)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} +; ARM-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}} +; ARM-LONG: ldr [[R]], {{\[}}[[R]]{{\]}} +; ARM-LONG: blx [[R]] ; THUMB: @t10 -; THUMB: movs r0, #0 -; THUMB: movt r0, #0 -; THUMB: movs r1, #248 -; THUMB: movt r1, #0 -; THUMB: movs r2, #187 -; THUMB: movt r2, #0 -; THUMB: movs r3, #28 -; THUMB: movt r3, #0 -; THUMB: movw r9, #40 -; THUMB: movt r9, #0 -; THUMB: movw r12, #186 -; THUMB: movt r12, #0 -; THUMB: uxtb r0, r0 -; THUMB: uxtb r1, r1 -; THUMB: uxtb r2, r2 -; THUMB: uxtb r3, r3 -; THUMB: uxtb.w r9, r9 -; THUMB: str.w r9, [sp] -; THUMB: uxtb.w r9, r12 -; THUMB: str.w r9, [sp, #4] -; THUMB: bl _bar +; THUMB: movs [[R0:l?r[0-9]*]], #0 +; THUMB: movt [[R0]], #0 +; THUMB: movs [[R1:l?r[0-9]*]], #248 +; THUMB: movt [[R1]], #0 +; THUMB: movs [[R2:l?r[0-9]*]], #187 +; THUMB: movt [[R2]], #0 +; THUMB: movs [[R3:l?r[0-9]*]], #28 +; THUMB: movt [[R3]], #0 +; THUMB: movw [[R4:l?r[0-9]*]], #40 +; THUMB: movt [[R4]], #0 +; THUMB: movw [[R5:l?r[0-9]*]], #186 +; THUMB: movt [[R5]], #0 +; THUMB: uxtb [[R0]], [[R0]] +; THUMB: uxtb [[R1]], [[R1]] +; THUMB: uxtb [[R2]], [[R2]] +; THUMB: uxtb [[R3]], [[R3]] +; THUMB: uxtb.w [[R4]], [[R4]] +; THUMB: str.w [[R4]], [sp] +; THUMB: uxtb.w [[R4]], [[R5]] +; THUMB: str.w [[R4]], [sp, #4] +; THUMB: bl {{_?}}bar ; THUMB-LONG: @t10 -; THUMB-LONG: movw lr, :lower16:L_bar$non_lazy_ptr -; THUMB-LONG: movt lr, :upper16:L_bar$non_lazy_ptr -; THUMB-LONG: ldr.w lr, [lr] -; THUMB-LONG: blx lr +; THUMB-LONG: {{(movw)|(ldr.n)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} +; THUMB-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}} +; THUMB-LONG: ldr{{(.w)?}} [[R]], {{\[}}[[R]]{{\]}} +; THUMB-LONG: blx [[R]] %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70) ret i32 0 } @@ -147,12 +151,12 @@ define i32 @bar0(i32 %i) nounwind { define void @foo3() uwtable { ; ARM: movw r0, #0 -; ARM: movw r1, :lower16:_bar0 -; ARM: movt r1, :upper16:_bar0 +; ARM: {{(movw r1, :lower16:_?bar0)|(ldr r1, .LCPI)}} +; ARM: {{(movt r1, :upper16:_?bar0)|(ldr r1, \[r1\])}} ; ARM: blx r1 ; THUMB: movs r0, #0 -; THUMB: movw r1, :lower16:_bar0 -; THUMB: movt r1, :upper16:_bar0 +; THUMB: {{(movw r1, :lower16:_?bar0)|(ldr.n r1, .LCPI)}} +; THUMB: {{(movt r1, :upper16:_?bar0)|(ldr r1, \[r1\])}} ; THUMB: blx r1 %fptr = alloca i32 (i32)*, align 8 store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8 @@ -164,66 +168,23 @@ define void @foo3() uwtable { define i32 @LibCall(i32 %a, i32 %b) { entry: ; ARM: LibCall -; ARM: bl ___udivsi3 +; ARM: bl {{___udivsi3|__aeabi_uidiv}} ; ARM-LONG: LibCall -; ARM-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr -; ARM-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr +; ARM-LONG: {{(movw r2, :lower16:L___udivsi3\$non_lazy_ptr)|(ldr r2, .LCPI)}} +; ARM-LONG: {{(movt r2, :upper16:L___udivsi3\$non_lazy_ptr)?}} ; ARM-LONG: ldr r2, [r2] ; ARM-LONG: blx r2 ; THUMB: LibCall -; THUMB: bl ___udivsi3 +; THUMB: bl {{___udivsi3|__aeabi_uidiv}} ; THUMB-LONG: LibCall -; THUMB-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr -; THUMB-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr +; THUMB-LONG: {{(movw r2, :lower16:L___udivsi3\$non_lazy_ptr)|(ldr.n r2, .LCPI)}} +; THUMB-LONG: {{(movt r2, :upper16:L___udivsi3\$non_lazy_ptr)?}} ; THUMB-LONG: ldr r2, [r2] ; THUMB-LONG: blx r2 %tmp1 = udiv i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } -define i32 @VarArg() nounwind { -entry: - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %k = alloca i32, align 4 - %m = alloca i32, align 4 - %n = alloca i32, align 4 - %tmp = alloca i32, align 4 - %0 = load i32* %i, align 4 - %1 = load i32* %j, align 4 - %2 = load i32* %k, align 4 - %3 = load i32* %m, align 4 - %4 = load i32* %n, align 4 -; ARM: VarArg -; ARM: mov r7, sp -; ARM: movw r0, #5 -; ARM: ldr r1, [r7, #-4] -; ARM: ldr r2, [r7, #-8] -; ARM: ldr r3, [r7, #-12] -; ARM: ldr r9, [sp, #16] -; ARM: ldr r12, [sp, #12] -; ARM: str r9, [sp] -; ARM: str r12, [sp, #4] -; ARM: bl _CallVariadic -; THUMB: mov r7, sp -; THUMB: movs r0, #5 -; THUMB: movt r0, #0 -; THUMB: ldr r1, [sp, #28] -; THUMB: ldr r2, [sp, #24] -; THUMB: ldr r3, [sp, #20] -; THUMB: ldr.w r9, [sp, #16] -; THUMB: ldr.w r12, [sp, #12] -; THUMB: str.w r9, [sp] -; THUMB: str.w r12, [sp, #4] -; THUMB: bl _CallVariadic - %call = call i32 (i32, ...)* @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) - store i32 %call, i32* %tmp, align 4 - %5 = load i32* %tmp, align 4 - ret i32 %5 -} - -declare i32 @CallVariadic(i32, ...) - ; Test fastcc define fastcc void @fast_callee(float %i) ssp { diff --git a/test/CodeGen/ARM/fast-isel-crash.ll b/test/CodeGen/ARM/fast-isel-crash.ll index 8fb4b66b7d..7d45feff69 100644 --- a/test/CodeGen/ARM/fast-isel-crash.ll +++ b/test/CodeGen/ARM/fast-isel-crash.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin %union.anon = type { <16 x i32> } diff --git a/test/CodeGen/ARM/fast-isel-crash2.ll b/test/CodeGen/ARM/fast-isel-crash2.ll index f245168a8e..8867f87065 100644 --- a/test/CodeGen/ARM/fast-isel-crash2.ll +++ b/test/CodeGen/ARM/fast-isel-crash2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin ; rdar://9515076 ; (Make sure this doesn't crash.) diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll index 3a943d854b..5e6666c47d 100644 --- a/test/CodeGen/ARM/fast-isel-deadcode.ll +++ b/test/CodeGen/ARM/fast-isel-deadcode.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB ; Target-specific selector can't properly handle the double because it isn't ; being passed via a register, so the materialized arguments become dead code. diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll index 48105dd389..bc9769a537 100644 --- a/test/CodeGen/ARM/fast-isel-intrinsic.ll +++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -3,33 +3,37 @@ ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG +; Note that some of these tests assume that relocations are either +; movw/movt or constant pool loads. Different platforms will select +; different approaches. + @message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1 @temp = common global [60 x i8] zeroinitializer, align 1 define void @t1() nounwind ssp { ; ARM: t1 -; ARM: movw r0, :lower16:_message1 -; ARM: movt r0, :upper16:_message1 +; ARM: {{(movw r0, :lower16:_?message1)|(ldr r0, .LCPI)}} +; ARM: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}} ; ARM: add r0, r0, #5 ; ARM: movw r1, #64 ; ARM: movw r2, #10 ; ARM: uxtb r1, r1 -; ARM: bl _memset +; ARM: bl {{_?}}memset ; ARM-LONG: t1 ; ARM-LONG: movw r3, :lower16:L_memset$non_lazy_ptr ; ARM-LONG: movt r3, :upper16:L_memset$non_lazy_ptr ; ARM-LONG: ldr r3, [r3] ; ARM-LONG: blx r3 ; THUMB: t1 -; THUMB: movw r0, :lower16:_message1 -; THUMB: movt r0, :upper16:_message1 +; THUMB: {{(movw r0, :lower16:_?message1)|(ldr.n r0, .LCPI)}} +; THUMB: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}} ; THUMB: adds r0, #5 ; THUMB: movs r1, #64 ; THUMB: movt r1, #0 ; THUMB: movs r2, #10 ; THUMB: movt r2, #0 ; THUMB: uxtb r1, r1 -; THUMB: bl _memset +; THUMB: bl {{_?}}memset ; THUMB-LONG: t1 ; THUMB-LONG: movw r3, :lower16:L_memset$non_lazy_ptr ; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr @@ -43,31 +47,33 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind define void @t2() nounwind ssp { ; ARM: t2 -; ARM: movw r0, :lower16:L_temp$non_lazy_ptr -; ARM: movt r0, :upper16:L_temp$non_lazy_ptr +; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} +; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] ; ARM: add r1, r0, #4 ; ARM: add r0, r0, #16 ; ARM: movw r2, #17 -; ARM: str r0, [sp] @ 4-byte Spill +; ARM: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill ; ARM: mov r0, r1 -; ARM: ldr r1, [sp] @ 4-byte Reload -; ARM: bl _memcpy +; ARM: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; ARM: bl {{_?}}memcpy ; ARM-LONG: t2 ; ARM-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr ; ARM-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr ; ARM-LONG: ldr r3, [r3] ; ARM-LONG: blx r3 ; THUMB: t2 -; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr -; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr +; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} +; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] ; THUMB: adds r1, r0, #4 ; THUMB: adds r0, #16 ; THUMB: movs r2, #17 ; THUMB: movt r2, #0 +; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill ; THUMB: mov r0, r1 -; THUMB: bl _memcpy +; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; THUMB: bl {{_?}}memcpy ; THUMB-LONG: t2 ; THUMB-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr ; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr @@ -81,29 +87,31 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, define void @t3() nounwind ssp { ; ARM: t3 -; ARM: movw r0, :lower16:L_temp$non_lazy_ptr -; ARM: movt r0, :upper16:L_temp$non_lazy_ptr +; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} +; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] ; ARM: add r1, r0, #4 ; ARM: add r0, r0, #16 ; ARM: movw r2, #10 ; ARM: mov r0, r1 -; ARM: bl _memmove +; ARM: bl {{_?}}memmove ; ARM-LONG: t3 ; ARM-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr ; ARM-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr ; ARM-LONG: ldr r3, [r3] ; ARM-LONG: blx r3 ; THUMB: t3 -; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr -; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr +; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} +; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] ; THUMB: adds r1, r0, #4 ; THUMB: adds r0, #16 ; THUMB: movs r2, #10 ; THUMB: movt r2, #0 +; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill ; THUMB: mov r0, r1 -; THUMB: bl _memmove +; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; THUMB: bl {{_?}}memmove ; THUMB-LONG: t3 ; THUMB-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr ; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr @@ -115,8 +123,8 @@ define void @t3() nounwind ssp { define void @t4() nounwind ssp { ; ARM: t4 -; ARM: movw r0, :lower16:L_temp$non_lazy_ptr -; ARM: movt r0, :upper16:L_temp$non_lazy_ptr +; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} +; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] ; ARM: ldr r1, [r0, #16] ; ARM: str r1, [r0, #4] @@ -126,8 +134,8 @@ define void @t4() nounwind ssp { ; ARM: strh r1, [r0, #12] ; ARM: bx lr ; THUMB: t4 -; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr -; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr +; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} +; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] ; THUMB: ldr r1, [r0, #16] ; THUMB: str r1, [r0, #4] @@ -144,8 +152,8 @@ declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, define void @t5() nounwind ssp { ; ARM: t5 -; ARM: movw r0, :lower16:L_temp$non_lazy_ptr -; ARM: movt r0, :upper16:L_temp$non_lazy_ptr +; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} +; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] ; ARM: ldrh r1, [r0, #16] ; ARM: strh r1, [r0, #4] @@ -159,8 +167,8 @@ define void @t5() nounwind ssp { ; ARM: strh r1, [r0, #12] ; ARM: bx lr ; THUMB: t5 -; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr -; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr +; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} +; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] ; THUMB: ldrh r1, [r0, #16] ; THUMB: strh r1, [r0, #4] @@ -179,8 +187,8 @@ define void @t5() nounwind ssp { define void @t6() nounwind ssp { ; ARM: t6 -; ARM: movw r0, :lower16:L_temp$non_lazy_ptr -; ARM: movt r0, :upper16:L_temp$non_lazy_ptr +; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} +; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] ; ARM: ldrb r1, [r0, #16] ; ARM: strb r1, [r0, #4] @@ -204,8 +212,8 @@ define void @t6() nounwind ssp { ; ARM: strb r1, [r0, #13] ; ARM: bx lr ; THUMB: t6 -; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr -; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr +; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} +; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] ; THUMB: ldrb r1, [r0, #16] ; THUMB: strb r1, [r0, #4] diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll index dfb8c53735..cf294bcfbe 100644 --- a/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll +++ b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=ARM define i32 @t1(i32* nocapture %ptr) nounwind readonly { entry: diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll index 0b5267ddc9..0e71322d4e 100644 --- a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll +++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM ; rdar://10418009 define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp { diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll index b180e439dd..328168a84f 100644 --- a/test/CodeGen/ARM/fast-isel-mvn.ll +++ b/test/CodeGen/ARM/fast-isel-mvn.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB ; rdar://10412592 ; Note: The Thumb code is being generated by the target-independent selector. diff --git a/test/CodeGen/ARM/fast-isel-pic.ll b/test/CodeGen/ARM/fast-isel-pic.ll index 867d53f973..6bb9ea3a8c 100644 --- a/test/CodeGen/ARM/fast-isel-pic.ll +++ b/test/CodeGen/ARM/fast-isel-pic.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7 -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7 +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF @g = global i32 0, align 4 diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll index 563880dab0..ee150facac 100644 --- a/test/CodeGen/ARM/fast-isel-redefinition.ll +++ b/test/CodeGen/ARM/fast-isel-redefinition.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -verify-machineinstrs -optimize-regalloc -regalloc=basic < %s +; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -optimize-regalloc -regalloc=basic < %s ; This isn't exactly a useful set of command-line options, but check that it ; doesn't crash. (It was crashing because a register was getting redefined.) diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll index b83a733669..a937036284 100644 --- a/test/CodeGen/ARM/fast-isel-select.ll +++ b/test/CodeGen/ARM/fast-isel-select.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB define i32 @t1(i1 %c) nounwind readnone { diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll index e8759a7fc4..afdfa84f39 100644 --- a/test/CodeGen/ARM/fast-isel-static.ll +++ b/test/CodeGen/ARM/fast-isel-static.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -relocation-model=static | FileCheck -check-prefix=NORM %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=NORM %s define void @myadd(float* %sum, float* %addend) nounwind { entry: @@ -24,7 +24,7 @@ entry: store float 0.000000e+00, float* %ztot, align 4 store float 1.000000e+00, float* %z, align 4 ; CHECK-LONG: blx r -; CHECK-NORM: bl _myadd +; CHECK-NORM: bl {{_?}}myadd call void @myadd(float* %ztot, float* %z) ret i32 0 } diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll index 41fda41326..39ffcac292 100644 --- a/test/CodeGen/ARM/fast-isel.ll +++ b/test/CodeGen/ARM/fast-isel.ll @@ -144,15 +144,19 @@ define void @test4() { store i32 %b, i32* @test4g ret void -; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr -; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr + +; Note that relocations are either movw/movt or constant pool +; loads. Different platforms will select different approaches. + +; THUMB: {{(movw r0, :lower16:L_test4g\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} +; THUMB: {{(movt r0, :upper16:L_test4g\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] ; THUMB: ldr r1, [r0] ; THUMB: adds r1, #1 ; THUMB: str r1, [r0] -; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr -; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr +; ARM: {{(movw r0, :lower16:L_test4g\$non_lazy_ptr)|(ldr r0, .LCPI)}} +; ARM: {{(movt r0, :upper16:L_test4g\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] ; ARM: ldr r1, [r0] ; ARM: add r1, r1, #1 diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll index 74628f0c5c..eb5ad8f0c3 100644 --- a/test/CodeGen/ARM/vmul.ll +++ b/test/CodeGen/ARM/vmul.ll @@ -599,3 +599,27 @@ for.end179: ; preds = %for.cond.loopexit, declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone + +; vmull lowering would create a zext(v4i8 load()) instead of a zextload(v4i8), +; creating an illegal type during legalization and causing an assert. +; PR15970 +define void @no_illegal_types_vmull_sext(<4 x i32> %a) { +entry: + %wide.load283.i = load <4 x i8>* undef, align 1 + %0 = sext <4 x i8> %wide.load283.i to <4 x i32> + %1 = sub nsw <4 x i32> %0, %a + %2 = mul nsw <4 x i32> %1, %1 + %predphi290.v.i = select <4 x i1> undef, <4 x i32> undef, <4 x i32> %2 + store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4 + ret void +} +define void @no_illegal_types_vmull_zext(<4 x i32> %a) { +entry: + %wide.load283.i = load <4 x i8>* undef, align 1 + %0 = zext <4 x i8> %wide.load283.i to <4 x i32> + %1 = sub nsw <4 x i32> %0, %a + %2 = mul nsw <4 x i32> %1, %1 + %predphi290.v.i = select <4 x i1> undef, <4 x i32> undef, <4 x i32> %2 + store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4 + ret void +} diff --git a/test/CodeGen/Hexagon/BranchPredict.ll b/test/CodeGen/Hexagon/BranchPredict.ll new file mode 100644 index 0000000000..716e85da5a --- /dev/null +++ b/test/CodeGen/Hexagon/BranchPredict.ll @@ -0,0 +1,79 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s + +; Check if the branch probabilities are reflected in the instructions: +; The basic block placement pass should place the more probable successor +; block as the fall-through block. The unconditional jump in the predecessor +; should then get the right hint (not_taken or ":nt") + + +@j = external global i32 + +define i32 @foo(i32 %a) nounwind { +; CHECK: if{{ *}}(!p{{[0-3]}}.new) jump:nt +entry: + %tobool = icmp eq i32 %a, 0 + br i1 %tobool, label %if.else, label %if.then, !prof !0 + +if.then: ; preds = %entry + %add = add nsw i32 %a, 10 + %call = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 %add) nounwind + br label %return + +if.else: ; preds = %entry + %call2 = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 4) nounwind + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %retval.0 +} + +declare i32 @foobar(...) + +define i32 @bar(i32 %a) nounwind { +; CHECK: if{{ *}}(p{{[0-3]}}.new) jump:nt +entry: + %tobool = icmp eq i32 %a, 0 + br i1 %tobool, label %if.else, label %if.then, !prof !1 + +if.then: ; preds = %entry + %add = add nsw i32 %a, 10 + %call = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 %add) nounwind + br label %return + +if.else: ; preds = %entry + %call2 = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 4) nounwind + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %retval.0 +} + +define i32 @foo_bar(i32 %a, i16 signext %b) nounwind { +; CHECK: if{{ *}}(!cmp.eq(r{{[0-9]*}}.new, #0)) jump:nt +entry: + %0 = load i32* @j, align 4, !tbaa !2 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %if.else, label %if.then, !prof !0 + +if.then: ; preds = %entry + %add = add nsw i32 %a, 10 + %call = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 %add) nounwind + br label %return + +if.else: ; preds = %entry + %add1 = add nsw i32 %a, 4 + %call2 = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 %add1) nounwind + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %retval.0 +} + +!0 = metadata !{metadata !"branch_weights", i32 64, i32 4} +!1 = metadata !{metadata !"branch_weights", i32 4, i32 64} +!2 = metadata !{metadata !"int", metadata !3} +!3 = metadata !{metadata !"omnipotent char", metadata !4} +!4 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll index f8c9e44c83..aea4ffe2ee 100644 --- a/test/CodeGen/Hexagon/args.ll +++ b/test/CodeGen/Hexagon/args.ll @@ -1,11 +1,8 @@ ; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched -disable-hexagon-misched < %s | FileCheck %s ; CHECK: memw(r29{{ *}}+{{ *}}#0){{ *}}={{ *}}#7 -; CHECK: r0 = #1 -; CHECK: r1 = #2 -; CHECK: r2 = #3 -; CHECK: r3 = #4 -; CHECK: r4 = #5 -; CHECK: r5 = #6 +; CHECK: r1:0 = combine(#2, #1) +; CHECK: r3:2 = combine(#4, #3) +; CHECK: r5:4 = combine(#6, #5) define void @foo() nounwind { diff --git a/test/CodeGen/Hexagon/extload-combine.ll b/test/CodeGen/Hexagon/extload-combine.ll new file mode 100644 index 0000000000..b3b8bf0703 --- /dev/null +++ b/test/CodeGen/Hexagon/extload-combine.ll @@ -0,0 +1,80 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s +; Check that the combine/stxw instructions are being generated. +; In case of combine one of the operand should be 0 and another should be +; the output of absolute addressing load instruction. + +@a = external global i16 +@b = external global i16 +@c = external global i16 +@char_a = external global i8 +@char_b = external global i8 +@char_c = external global i8 +@int_a = external global i32 +@int_b = external global i32 +@int_c = external global i32 + +; Function Attrs: nounwind +define i64 @short_test1() #0 { +; CHECK: [[VAR:r[0-9]+]]{{ *}}={{ *}}memuh(## +; CHECK: combine(#0, [[VAR]]) +entry: + store i16 0, i16* @a, align 2 + %0 = load i16* @b, align 2 + %conv2 = zext i16 %0 to i64 + ret i64 %conv2 +} + +; Function Attrs: nounwind +define i64 @short_test2() #0 { +; CHECK: [[VAR1:r[0-9]+]]{{ *}}={{ *}}memh(## +; CHECK: sxtw([[VAR1]]) +entry: + store i16 0, i16* @a, align 2 + %0 = load i16* @c, align 2 + %conv2 = sext i16 %0 to i64 + ret i64 %conv2 +} + +; Function Attrs: nounwind +define i64 @char_test1() #0 { +; CHECK: [[VAR2:r[0-9]+]]{{ *}}={{ *}}memub(## +; CHECK: combine(#0, [[VAR2]]) +entry: + store i8 0, i8* @char_a, align 1 + %0 = load i8* @char_b, align 1 + %conv2 = zext i8 %0 to i64 + ret i64 %conv2 +} + +; Function Attrs: nounwind +define i64 @char_test2() #0 { +; CHECK: [[VAR3:r[0-9]+]]{{ *}}={{ *}}memb(## +; CHECK: sxtw([[VAR3]]) +entry: + store i8 0, i8* @char_a, align 1 + %0 = load i8* @char_c, align 1 + %conv2 = sext i8 %0 to i64 + ret i64 %conv2 +} + +; Function Attrs: nounwind +define i64 @int_test1() #0 { +; CHECK: [[VAR4:r[0-9]+]]{{ *}}={{ *}}memw(## +; CHECK: combine(#0, [[VAR4]]) +entry: + store i32 0, i32* @int_a, align 4 + %0 = load i32* @int_b, align 4 + %conv = zext i32 %0 to i64 + ret i64 %conv +} + +; Function Attrs: nounwind +define i64 @int_test2() #0 { +; CHECK: [[VAR5:r[0-9]+]]{{ *}}={{ *}}memw(## +; CHECK: sxtw([[VAR5]]) +entry: + store i32 0, i32* @int_a, align 4 + %0 = load i32* @int_c, align 4 + %conv = sext i32 %0 to i64 + ret i64 %conv +} diff --git a/test/CodeGen/Hexagon/packetize_cond_inst.ll b/test/CodeGen/Hexagon/packetize_cond_inst.ll new file mode 100644 index 0000000000..a48a9f62ec --- /dev/null +++ b/test/CodeGen/Hexagon/packetize_cond_inst.ll @@ -0,0 +1,32 @@ +; RUN: llc -mcpu=hexagonv4 -tail-dup-size=1 < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" +target triple = "hexagon-unknown--elf" + +; Make sure we put the two conditionally executed adds in a packet. +; ifcnv_add: +; { +; p0 = cmp.gt(r2, r1) +; if (!p0.new) r0 = add(r2, r1) +; if (p0.new) r0 = add(r0, #10) +; } +; CHECK: cmp +; CHECK-NEXT: add +; CHECH-NEXT: add +define i32 @ifcnv_add(i32, i32, i32) nounwind readnone { + %4 = icmp sgt i32 %2, %1 + br i1 %4, label %5, label %7 + +; <label>:5 ; preds = %3 + %6 = add nsw i32 %0, 10 + br label %9 + +; <label>:7 ; preds = %3 + %8 = add nsw i32 %2, %1 + br label %9 + +; <label>:9 ; preds = %7, %5 + %10 = phi i32 [ %6, %5 ], [ %8, %7 ] + %11 = add nsw i32 %10, 1 + ret i32 %11 +} diff --git a/test/CodeGen/Hexagon/tfr-to-combine.ll b/test/CodeGen/Hexagon/tfr-to-combine.ll new file mode 100644 index 0000000000..e3057cd161 --- /dev/null +++ b/test/CodeGen/Hexagon/tfr-to-combine.ll @@ -0,0 +1,35 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv5 -O3 < %s | FileCheck %s + +; Check that we combine TFRs and TFRIs into COMBINEs. + +@a = external global i16 +@b = external global i16 +@c = external global i16 + +; Function Attrs: nounwind +define i64 @test1() #0 { +; CHECK: combine(#10, #0) +entry: + store i16 0, i16* @a, align 2 + store i16 10, i16* @b, align 2 + ret i64 10 +} + +; Function Attrs: nounwind +define i64 @test2() #0 { +; CHECK: combine(#0, r{{[0-9]+}}) +entry: + store i16 0, i16* @a, align 2 + %0 = load i16* @c, align 2 + %conv2 = zext i16 %0 to i64 + ret i64 %conv2 +} + +; Function Attrs: nounwind +define i64 @test4() #0 { +; CHECK: combine(#0, ##100) +entry: + store i16 100, i16* @b, align 2 + store i16 0, i16* @a, align 2 + ret i64 0 +} diff --git a/test/CodeGen/Mips/hf16call32.ll b/test/CodeGen/Mips/hf16call32.ll new file mode 100644 index 0000000000..41249e1be2 --- /dev/null +++ b/test/CodeGen/Mips/hf16call32.ll @@ -0,0 +1,1028 @@ +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=stel + +@x = common global float 0.000000e+00, align 4 +@y = common global float 0.000000e+00, align 4 +@xd = common global double 0.000000e+00, align 8 +@yd = common global double 0.000000e+00, align 8 +@xy = common global { float, float } zeroinitializer, align 4 +@xyd = common global { double, double } zeroinitializer, align 8 +@ret_sf = common global float 0.000000e+00, align 4 +@ret_df = common global double 0.000000e+00, align 8 +@ret_sc = common global { float, float } zeroinitializer, align 4 +@ret_dc = common global { double, double } zeroinitializer, align 8 +@lx = common global float 0.000000e+00, align 4 +@ly = common global float 0.000000e+00, align 4 +@lxd = common global double 0.000000e+00, align 8 +@lyd = common global double 0.000000e+00, align 8 +@lxy = common global { float, float } zeroinitializer, align 4 +@lxyd = common global { double, double } zeroinitializer, align 8 +@lret_sf = common global float 0.000000e+00, align 4 +@lret_df = common global double 0.000000e+00, align 8 +@lret_sc = common global { float, float } zeroinitializer, align 4 +@lret_dc = common global { double, double } zeroinitializer, align 8 +@.str = private unnamed_addr constant [10 x i8] c"%f %f %i\0A\00", align 1 +@.str1 = private unnamed_addr constant [16 x i8] c"%f=%f %f=%f %i\0A\00", align 1 +@.str2 = private unnamed_addr constant [22 x i8] c"%f=%f %f=%f %f=%f %i\0A\00", align 1 +@.str3 = private unnamed_addr constant [18 x i8] c"%f+%fi=%f+%fi %i\0A\00", align 1 +@.str4 = private unnamed_addr constant [24 x i8] c"%f+%fi=%f+%fi %f=%f %i\0A\00", align 1 + +; Function Attrs: nounwind +define void @clear() #0 { +entry: + store float 1.000000e+00, float* @x, align 4 + store float 1.000000e+00, float* @y, align 4 + store double 1.000000e+00, double* @xd, align 8 + store double 1.000000e+00, double* @yd, align 8 + store float 1.000000e+00, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0) + store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1) + store double 1.000000e+00, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0) + store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1) + store float 1.000000e+00, float* @ret_sf, align 4 + store double 1.000000e+00, double* @ret_df, align 8 + store float 1.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0) + store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1) + store double 1.000000e+00, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0) + store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1) + store float 0.000000e+00, float* @lx, align 4 + store float 0.000000e+00, float* @ly, align 4 + store double 0.000000e+00, double* @lxd, align 8 + store double 0.000000e+00, double* @lyd, align 8 + store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lxy, i32 0, i32 0) + store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lxy, i32 0, i32 1) + store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lxyd, i32 0, i32 0) + store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lxyd, i32 0, i32 1) + store float 0.000000e+00, float* @lret_sf, align 4 + store double 0.000000e+00, double* @lret_df, align 8 + store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0) + store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1) + store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0) + store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1) + ret void +} + +; Function Attrs: nounwind +define i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + call void @clear() + store float 1.500000e+00, float* @lx, align 4 + %0 = load float* @lx, align 4 + call void @v_sf(float %0) + %1 = load float* @x, align 4 + %conv = fpext float %1 to double + %2 = load float* @lx, align 4 + %conv1 = fpext float %2 to double + %3 = load float* @x, align 4 + %4 = load float* @lx, align 4 + %cmp = fcmp oeq float %3, %4 + %conv2 = zext i1 %cmp to i32 + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %conv, double %conv1, i32 %conv2) + call void @clear() + store double 0x41678C29C0000000, double* @lxd, align 8 + %5 = load double* @lxd, align 8 + call void @v_df(double %5) + %6 = load double* @xd, align 8 + %7 = load double* @lxd, align 8 + %8 = load double* @xd, align 8 + %9 = load double* @lxd, align 8 + %cmp3 = fcmp oeq double %8, %9 + %conv4 = zext i1 %cmp3 to i32 + %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %6, double %7, i32 %conv4) + call void @clear() + store float 9.000000e+00, float* @lx, align 4 + store float 1.000000e+01, float* @ly, align 4 + %10 = load float* @lx, align 4 + %11 = load float* @ly, align 4 + call void @v_sf_sf(float %10, float %11) + %12 = load float* @x, align 4 + %conv6 = fpext float %12 to double + %13 = load float* @lx, align 4 + %conv7 = fpext float %13 to double + %14 = load float* @y, align 4 + %conv8 = fpext float %14 to double + %15 = load float* @ly, align 4 + %conv9 = fpext float %15 to double + %16 = load float* @x, align 4 + %17 = load float* @lx, align 4 + %cmp10 = fcmp oeq float %16, %17 + br i1 %cmp10, label %land.rhs, label %land.end + +land.rhs: ; preds = %entry + %18 = load float* @y, align 4 + %19 = load float* @ly, align 4 + %cmp12 = fcmp oeq float %18, %19 + br label %land.end + +land.end: ; preds = %land.rhs, %entry + %20 = phi i1 [ false, %entry ], [ %cmp12, %land.rhs ] + %land.ext = zext i1 %20 to i32 + %call14 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv6, double %conv7, double %conv8, double %conv9, i32 %land.ext) + call void @clear() + store float 0x3FFE666660000000, float* @lx, align 4 + store double 0x4007E613249FF279, double* @lyd, align 8 + %21 = load float* @lx, align 4 + %22 = load double* @lyd, align 8 + call void @v_sf_df(float %21, double %22) + %23 = load float* @x, align 4 + %conv15 = fpext float %23 to double + %24 = load float* @lx, align 4 + %conv16 = fpext float %24 to double + %25 = load double* @yd, align 8 + %26 = load double* @lyd, align 8 + %27 = load float* @x, align 4 + %28 = load float* @lx, align 4 + %cmp17 = fcmp oeq float %27, %28 + %conv18 = zext i1 %cmp17 to i32 + %29 = load double* @yd, align 8 + %30 = load double* @lyd, align 8 + %cmp19 = fcmp oeq double %29, %30 + %conv20 = zext i1 %cmp19 to i32 + %and = and i32 %conv18, %conv20 + %call21 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv15, double %conv16, double %25, double %26, i32 %and) + call void @clear() + store double 0x4194E54F94000000, double* @lxd, align 8 + store float 7.600000e+01, float* @ly, align 4 + %31 = load double* @lxd, align 8 + %32 = load float* @ly, align 4 + call void @v_df_sf(double %31, float %32) + %33 = load double* @xd, align 8 + %34 = load double* @lxd, align 8 + %35 = load float* @y, align 4 + %conv22 = fpext float %35 to double + %36 = load float* @ly, align 4 + %conv23 = fpext float %36 to double + %37 = load double* @xd, align 8 + %38 = load double* @lxd, align 8 + %cmp24 = fcmp oeq double %37, %38 + %conv25 = zext i1 %cmp24 to i32 + %39 = load float* @y, align 4 + %40 = load float* @ly, align 4 + %cmp26 = fcmp oeq float %39, %40 + %conv27 = zext i1 %cmp26 to i32 + %and28 = and i32 %conv25, %conv27 + %call29 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %33, double %34, double %conv22, double %conv23, i32 %and28) + call void @clear() + store double 7.365198e+07, double* @lxd, align 8 + store double 0x416536CD80000000, double* @lyd, align 8 + %41 = load double* @lxd, align 8 + %42 = load double* @lyd, align 8 + call void @v_df_df(double %41, double %42) + %43 = load double* @xd, align 8 + %44 = load double* @lxd, align 8 + %45 = load double* @yd, align 8 + %46 = load double* @lyd, align 8 + %47 = load double* @xd, align 8 + %48 = load double* @lxd, align 8 + %cmp30 = fcmp oeq double %47, %48 + %conv31 = zext i1 %cmp30 to i32 + %49 = load double* @yd, align 8 + %50 = load double* @lyd, align 8 + %cmp32 = fcmp oeq double %49, %50 + %conv33 = zext i1 %cmp32 to i32 + %and34 = and i32 %conv31, %conv33 + %call35 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %43, double %44, double %45, double %46, i32 %and34) + call void @clear() + store float 0x4016666660000000, float* @ret_sf, align 4 + %call36 = call float @sf_v() + store float %call36, float* @lret_sf, align 4 + %51 = load float* @ret_sf, align 4 + %conv37 = fpext float %51 to double + %52 = load float* @lret_sf, align 4 + %conv38 = fpext float %52 to double + %53 = load float* @ret_sf, align 4 + %54 = load float* @lret_sf, align 4 + %cmp39 = fcmp oeq float %53, %54 + %conv40 = zext i1 %cmp39 to i32 + %call41 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %conv37, double %conv38, i32 %conv40) + call void @clear() + store float 4.587300e+06, float* @ret_sf, align 4 + store float 3.420000e+02, float* @lx, align 4 + %55 = load float* @lx, align 4 + %call42 = call float @sf_sf(float %55) + store float %call42, float* @lret_sf, align 4 + %56 = load float* @ret_sf, align 4 + %conv43 = fpext float %56 to double + %57 = load float* @lret_sf, align 4 + %conv44 = fpext float %57 to double + %58 = load float* @x, align 4 + %conv45 = fpext float %58 to double + %59 = load float* @lx, align 4 + %conv46 = fpext float %59 to double + %60 = load float* @ret_sf, align 4 + %61 = load float* @lret_sf, align 4 + %cmp47 = fcmp oeq float %60, %61 + %conv48 = zext i1 %cmp47 to i32 + %62 = load float* @x, align 4 + %63 = load float* @lx, align 4 + %cmp49 = fcmp oeq float %62, %63 + %conv50 = zext i1 %cmp49 to i32 + %and51 = and i32 %conv48, %conv50 + %call52 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv43, double %conv44, double %conv45, double %conv46, i32 %and51) + call void @clear() + store float 4.445910e+06, float* @ret_sf, align 4 + store double 0x419A7DB294000000, double* @lxd, align 8 + %64 = load double* @lxd, align 8 + %call53 = call float @sf_df(double %64) + store float %call53, float* @lret_sf, align 4 + %65 = load float* @ret_sf, align 4 + %conv54 = fpext float %65 to double + %66 = load float* @lret_sf, align 4 + %conv55 = fpext float %66 to double + %67 = load double* @xd, align 8 + %68 = load double* @lxd, align 8 + %69 = load float* @ret_sf, align 4 + %70 = load float* @lret_sf, align 4 + %cmp56 = fcmp oeq float %69, %70 + %conv57 = zext i1 %cmp56 to i32 + %71 = load double* @xd, align 8 + %72 = load double* @lxd, align 8 + %cmp58 = fcmp oeq double %71, %72 + %conv59 = zext i1 %cmp58 to i32 + %and60 = and i32 %conv57, %conv59 + %call61 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv54, double %conv55, double %67, double %68, i32 %and60) + call void @clear() + store float 0x3FFF4BC6A0000000, float* @ret_sf, align 4 + store float 4.445500e+03, float* @lx, align 4 + store float 0x4068ACCCC0000000, float* @ly, align 4 + %73 = load float* @lx, align 4 + %74 = load float* @ly, align 4 + %call62 = call float @sf_sf_sf(float %73, float %74) + store float %call62, float* @lret_sf, align 4 + %75 = load float* @ret_sf, align 4 + %conv63 = fpext float %75 to double + %76 = load float* @lret_sf, align 4 + %conv64 = fpext float %76 to double + %77 = load float* @x, align 4 + %conv65 = fpext float %77 to double + %78 = load float* @lx, align 4 + %conv66 = fpext float %78 to double + %79 = load float* @y, align 4 + %conv67 = fpext float %79 to double + %80 = load float* @ly, align 4 + %conv68 = fpext float %80 to double + %81 = load float* @ret_sf, align 4 + %82 = load float* @lret_sf, align 4 + %cmp69 = fcmp oeq float %81, %82 + br i1 %cmp69, label %land.lhs.true, label %land.end76 + +land.lhs.true: ; preds = %land.end + %83 = load float* @x, align 4 + %84 = load float* @lx, align 4 + %cmp71 = fcmp oeq float %83, %84 + br i1 %cmp71, label %land.rhs73, label %land.end76 + +land.rhs73: ; preds = %land.lhs.true + %85 = load float* @y, align 4 + %86 = load float* @ly, align 4 + %cmp74 = fcmp oeq float %85, %86 + br label %land.end76 + +land.end76: ; preds = %land.rhs73, %land.lhs.true, %land.end + %87 = phi i1 [ false, %land.lhs.true ], [ false, %land.end ], [ %cmp74, %land.rhs73 ] + %land.ext77 = zext i1 %87 to i32 + %call78 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv63, double %conv64, double %conv65, double %conv66, double %conv67, double %conv68, i32 %land.ext77) + call void @clear() + store float 9.991300e+04, float* @ret_sf, align 4 + store float 1.114500e+04, float* @lx, align 4 + store double 9.994445e+07, double* @lyd, align 8 + %88 = load float* @lx, align 4 + %89 = load double* @lyd, align 8 + %call79 = call float @sf_sf_df(float %88, double %89) + store float %call79, float* @lret_sf, align 4 + %90 = load float* @ret_sf, align 4 + %conv80 = fpext float %90 to double + %91 = load float* @lret_sf, align 4 + %conv81 = fpext float %91 to double + %92 = load float* @x, align 4 + %conv82 = fpext float %92 to double + %93 = load float* @lx, align 4 + %conv83 = fpext float %93 to double + %94 = load double* @yd, align 8 + %95 = load double* @lyd, align 8 + %96 = load float* @ret_sf, align 4 + %97 = load float* @lret_sf, align 4 + %cmp84 = fcmp oeq float %96, %97 + br i1 %cmp84, label %land.lhs.true86, label %land.end92 + +land.lhs.true86: ; preds = %land.end76 + %98 = load float* @x, align 4 + %99 = load float* @lx, align 4 + %cmp87 = fcmp oeq float %98, %99 + br i1 %cmp87, label %land.rhs89, label %land.end92 + +land.rhs89: ; preds = %land.lhs.true86 + %100 = load double* @yd, align 8 + %101 = load double* @lyd, align 8 + %cmp90 = fcmp oeq double %100, %101 + br label %land.end92 + +land.end92: ; preds = %land.rhs89, %land.lhs.true86, %land.end76 + %102 = phi i1 [ false, %land.lhs.true86 ], [ false, %land.end76 ], [ %cmp90, %land.rhs89 ] + %land.ext93 = zext i1 %102 to i32 + %call94 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv80, double %conv81, double %conv82, double %conv83, double %94, double %95, i32 %land.ext93) + call void @clear() + store float 0x417CCC7A00000000, float* @ret_sf, align 4 + store double 0x4172034530000000, double* @lxd, align 8 + store float 4.456200e+04, float* @ly, align 4 + %103 = load double* @lxd, align 8 + %104 = load float* @ly, align 4 + %call95 = call float @sf_df_sf(double %103, float %104) + store float %call95, float* @lret_sf, align 4 + %105 = load float* @ret_sf, align 4 + %conv96 = fpext float %105 to double + %106 = load float* @lret_sf, align 4 + %conv97 = fpext float %106 to double + %107 = load double* @xd, align 8 + %108 = load double* @lxd, align 8 + %109 = load float* @y, align 4 + %conv98 = fpext float %109 to double + %110 = load float* @ly, align 4 + %conv99 = fpext float %110 to double + %111 = load float* @ret_sf, align 4 + %112 = load float* @lret_sf, align 4 + %cmp100 = fcmp oeq float %111, %112 + br i1 %cmp100, label %land.lhs.true102, label %land.end108 + +land.lhs.true102: ; preds = %land.end92 + %113 = load double* @xd, align 8 + %114 = load double* @lxd, align 8 + %cmp103 = fcmp oeq double %113, %114 + br i1 %cmp103, label %land.rhs105, label %land.end108 + +land.rhs105: ; preds = %land.lhs.true102 + %115 = load float* @y, align 4 + %116 = load float* @ly, align 4 + %cmp106 = fcmp oeq float %115, %116 + br label %land.end108 + +land.end108: ; preds = %land.rhs105, %land.lhs.true102, %land.end92 + %117 = phi i1 [ false, %land.lhs.true102 ], [ false, %land.end92 ], [ %cmp106, %land.rhs105 ] + %land.ext109 = zext i1 %117 to i32 + %call110 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv96, double %conv97, double %107, double %108, double %conv98, double %conv99, i32 %land.ext109) + call void @clear() + store float 3.987721e+06, float* @ret_sf, align 4 + store double 0x3FF1F49F6DDDC2D8, double* @lxd, align 8 + store double 0x409129F306A2B170, double* @lyd, align 8 + %118 = load double* @lxd, align 8 + %119 = load double* @lyd, align 8 + %call111 = call float @sf_df_df(double %118, double %119) + store float %call111, float* @lret_sf, align 4 + %120 = load float* @ret_sf, align 4 + %conv112 = fpext float %120 to double + %121 = load float* @lret_sf, align 4 + %conv113 = fpext float %121 to double + %122 = load double* @xd, align 8 + %123 = load double* @lxd, align 8 + %124 = load double* @yd, align 8 + %125 = load double* @lyd, align 8 + %126 = load float* @ret_sf, align 4 + %127 = load float* @lret_sf, align 4 + %cmp114 = fcmp oeq float %126, %127 + br i1 %cmp114, label %land.lhs.true116, label %land.end122 + +land.lhs.true116: ; preds = %land.end108 + %128 = load double* @xd, align 8 + %129 = load double* @lxd, align 8 + %cmp117 = fcmp oeq double %128, %129 + br i1 %cmp117, label %land.rhs119, label %land.end122 + +land.rhs119: ; preds = %land.lhs.true116 + %130 = load double* @yd, align 8 + %131 = load double* @lyd, align 8 + %cmp120 = fcmp oeq double %130, %131 + br label %land.end122 + +land.end122: ; preds = %land.rhs119, %land.lhs.true116, %land.end108 + %132 = phi i1 [ false, %land.lhs.true116 ], [ false, %land.end108 ], [ %cmp120, %land.rhs119 ] + %land.ext123 = zext i1 %132 to i32 + %call124 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv112, double %conv113, double %122, double %123, double %124, double %125, i32 %land.ext123) + call void @clear() + store double 1.561234e+01, double* @ret_df, align 8 + %call125 = call double @df_v() + store double %call125, double* @lret_df, align 8 + %133 = load double* @ret_df, align 8 + %134 = load double* @lret_df, align 8 + %135 = load double* @ret_df, align 8 + %136 = load double* @lret_df, align 8 + %cmp126 = fcmp oeq double %135, %136 + %conv127 = zext i1 %cmp126 to i32 + %call128 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %133, double %134, i32 %conv127) + call void @clear() + store double 1.345873e+01, double* @ret_df, align 8 + store float 3.434520e+05, float* @lx, align 4 + %137 = load float* @lx, align 4 + %call129 = call double @df_sf(float %137) + store double %call129, double* @lret_df, align 8 + %138 = load double* @ret_df, align 8 + %139 = load double* @lret_df, align 8 + %140 = load float* @x, align 4 + %conv130 = fpext float %140 to double + %141 = load float* @lx, align 4 + %conv131 = fpext float %141 to double + %142 = load double* @ret_df, align 8 + %143 = load double* @lret_df, align 8 + %cmp132 = fcmp oeq double %142, %143 + %conv133 = zext i1 %cmp132 to i32 + %144 = load float* @x, align 4 + %145 = load float* @lx, align 4 + %cmp134 = fcmp oeq float %144, %145 + %conv135 = zext i1 %cmp134 to i32 + %and136 = and i32 %conv133, %conv135 + %call137 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %138, double %139, double %conv130, double %conv131, i32 %and136) + call void @clear() + store double 0x4084F3AB7AA25D8D, double* @ret_df, align 8 + store double 0x4114F671D2F1A9FC, double* @lxd, align 8 + %146 = load double* @lxd, align 8 + %call138 = call double @df_df(double %146) + store double %call138, double* @lret_df, align 8 + %147 = load double* @ret_df, align 8 + %148 = load double* @lret_df, align 8 + %149 = load double* @xd, align 8 + %150 = load double* @lxd, align 8 + %151 = load double* @ret_df, align 8 + %152 = load double* @lret_df, align 8 + %cmp139 = fcmp oeq double %151, %152 + %conv140 = zext i1 %cmp139 to i32 + %153 = load double* @xd, align 8 + %154 = load double* @lxd, align 8 + %cmp141 = fcmp oeq double %153, %154 + %conv142 = zext i1 %cmp141 to i32 + %and143 = and i32 %conv140, %conv142 + %call144 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %147, double %148, double %149, double %150, i32 %and143) + call void @clear() + store double 6.781956e+03, double* @ret_df, align 8 + store float 4.445500e+03, float* @lx, align 4 + store float 0x4068ACCCC0000000, float* @ly, align 4 + %155 = load float* @lx, align 4 + %156 = load float* @ly, align 4 + %call145 = call double @df_sf_sf(float %155, float %156) + store double %call145, double* @lret_df, align 8 + %157 = load double* @ret_df, align 8 + %158 = load double* @lret_df, align 8 + %159 = load float* @x, align 4 + %conv146 = fpext float %159 to double + %160 = load float* @lx, align 4 + %conv147 = fpext float %160 to double + %161 = load float* @y, align 4 + %conv148 = fpext float %161 to double + %162 = load float* @ly, align 4 + %conv149 = fpext float %162 to double + %163 = load double* @ret_df, align 8 + %164 = load double* @lret_df, align 8 + %cmp150 = fcmp oeq double %163, %164 + br i1 %cmp150, label %land.lhs.true152, label %land.end158 + +land.lhs.true152: ; preds = %land.end122 + %165 = load float* @x, align 4 + %166 = load float* @lx, align 4 + %cmp153 = fcmp oeq float %165, %166 + br i1 %cmp153, label %land.rhs155, label %land.end158 + +land.rhs155: ; preds = %land.lhs.true152 + %167 = load float* @y, align 4 + %168 = load float* @ly, align 4 + %cmp156 = fcmp oeq float %167, %168 + br label %land.end158 + +land.end158: ; preds = %land.rhs155, %land.lhs.true152, %land.end122 + %169 = phi i1 [ false, %land.lhs.true152 ], [ false, %land.end122 ], [ %cmp156, %land.rhs155 ] + %land.ext159 = zext i1 %169 to i32 + %call160 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %157, double %158, double %conv146, double %conv147, double %conv148, double %conv149, i32 %land.ext159) + call void @clear() + store double 1.889130e+05, double* @ret_df, align 8 + store float 9.111450e+05, float* @lx, align 4 + store double 0x4185320A58000000, double* @lyd, align 8 + %170 = load float* @lx, align 4 + %171 = load double* @lyd, align 8 + %call161 = call double @df_sf_df(float %170, double %171) + store double %call161, double* @lret_df, align 8 + %172 = load double* @ret_df, align 8 + %173 = load double* @lret_df, align 8 + %174 = load float* @x, align 4 + %conv162 = fpext float %174 to double + %175 = load float* @lx, align 4 + %conv163 = fpext float %175 to double + %176 = load double* @yd, align 8 + %177 = load double* @lyd, align 8 + %178 = load double* @ret_df, align 8 + %179 = load double* @lret_df, align 8 + %cmp164 = fcmp oeq double %178, %179 + br i1 %cmp164, label %land.lhs.true166, label %land.end172 + +land.lhs.true166: ; preds = %land.end158 + %180 = load float* @x, align 4 + %181 = load float* @lx, align 4 + %cmp167 = fcmp oeq float %180, %181 + br i1 %cmp167, label %land.rhs169, label %land.end172 + +land.rhs169: ; preds = %land.lhs.true166 + %182 = load double* @yd, align 8 + %183 = load double* @lyd, align 8 + %cmp170 = fcmp oeq double %182, %183 + br label %land.end172 + +land.end172: ; preds = %land.rhs169, %land.lhs.true166, %land.end158 + %184 = phi i1 [ false, %land.lhs.true166 ], [ false, %land.end158 ], [ %cmp170, %land.rhs169 ] + %land.ext173 = zext i1 %184 to i32 + %call174 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %172, double %173, double %conv162, double %conv163, double %176, double %177, i32 %land.ext173) + call void @clear() + store double 0x418B2DB900000000, double* @ret_df, align 8 + store double 0x41B1EF2ED3000000, double* @lxd, align 8 + store float 1.244562e+06, float* @ly, align 4 + %185 = load double* @lxd, align 8 + %186 = load float* @ly, align 4 + %call175 = call double @df_df_sf(double %185, float %186) + store double %call175, double* @lret_df, align 8 + %187 = load double* @ret_df, align 8 + %188 = load double* @lret_df, align 8 + %189 = load double* @xd, align 8 + %190 = load double* @lxd, align 8 + %191 = load float* @y, align 4 + %conv176 = fpext float %191 to double + %192 = load float* @ly, align 4 + %conv177 = fpext float %192 to double + %193 = load double* @ret_df, align 8 + %194 = load double* @lret_df, align 8 + %cmp178 = fcmp oeq double %193, %194 + br i1 %cmp178, label %land.lhs.true180, label %land.end186 + +land.lhs.true180: ; preds = %land.end172 + %195 = load double* @xd, align 8 + %196 = load double* @lxd, align 8 + %cmp181 = fcmp oeq double %195, %196 + br i1 %cmp181, label %land.rhs183, label %land.end186 + +land.rhs183: ; preds = %land.lhs.true180 + %197 = load float* @y, align 4 + %198 = load float* @ly, align 4 + %cmp184 = fcmp oeq float %197, %198 + br label %land.end186 + +land.end186: ; preds = %land.rhs183, %land.lhs.true180, %land.end172 + %199 = phi i1 [ false, %land.lhs.true180 ], [ false, %land.end172 ], [ %cmp184, %land.rhs183 ] + %land.ext187 = zext i1 %199 to i32 + %call188 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %187, double %188, double %189, double %190, double %conv176, double %conv177, i32 %land.ext187) + call void @clear() + store double 3.987721e+06, double* @ret_df, align 8 + store double 5.223560e+00, double* @lxd, align 8 + store double 0x40B7D37CC1A8AC5C, double* @lyd, align 8 + %200 = load double* @lxd, align 8 + %201 = load double* @lyd, align 8 + %call189 = call double @df_df_df(double %200, double %201) + store double %call189, double* @lret_df, align 8 + %202 = load double* @ret_df, align 8 + %203 = load double* @lret_df, align 8 + %204 = load double* @xd, align 8 + %205 = load double* @lxd, align 8 + %206 = load double* @yd, align 8 + %207 = load double* @lyd, align 8 + %208 = load double* @ret_df, align 8 + %209 = load double* @lret_df, align 8 + %cmp190 = fcmp oeq double %208, %209 + br i1 %cmp190, label %land.lhs.true192, label %land.end198 + +land.lhs.true192: ; preds = %land.end186 + %210 = load double* @xd, align 8 + %211 = load double* @lxd, align 8 + %cmp193 = fcmp oeq double %210, %211 + br i1 %cmp193, label %land.rhs195, label %land.end198 + +land.rhs195: ; preds = %land.lhs.true192 + %212 = load double* @yd, align 8 + %213 = load double* @lyd, align 8 + %cmp196 = fcmp oeq double %212, %213 + br label %land.end198 + +land.end198: ; preds = %land.rhs195, %land.lhs.true192, %land.end186 + %214 = phi i1 [ false, %land.lhs.true192 ], [ false, %land.end186 ], [ %cmp196, %land.rhs195 ] + %land.ext199 = zext i1 %214 to i32 + %call200 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %202, double %203, double %204, double %205, double %206, double %207, i32 %land.ext199) + call void @clear() + store float 4.500000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0) + store float 7.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1) + %call201 = call { float, float } @sc_v() + %215 = extractvalue { float, float } %call201, 0 + %216 = extractvalue { float, float } %call201, 1 + store float %215, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0) + store float %216, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1) + %ret_sc.real = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0) + %ret_sc.imag = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1) + %conv202 = fpext float %ret_sc.real to double + %conv203 = fpext float %ret_sc.imag to double + %ret_sc.real204 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0) + %ret_sc.imag205 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1) + %conv206 = fpext float %ret_sc.real204 to double + %conv207 = fpext float %ret_sc.imag205 to double + %lret_sc.real = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0) + %lret_sc.imag = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1) + %conv208 = fpext float %lret_sc.real to double + %conv209 = fpext float %lret_sc.imag to double + %lret_sc.real210 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0) + %lret_sc.imag211 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1) + %conv212 = fpext float %lret_sc.real210 to double + %conv213 = fpext float %lret_sc.imag211 to double + %ret_sc.real214 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0) + %ret_sc.imag215 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1) + %lret_sc.real216 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0) + %lret_sc.imag217 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1) + %cmp.r = fcmp oeq float %ret_sc.real214, %lret_sc.real216 + %cmp.i = fcmp oeq float %ret_sc.imag215, %lret_sc.imag217 + %and.ri = and i1 %cmp.r, %cmp.i + %conv218 = zext i1 %and.ri to i32 + %call219 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str3, i32 0, i32 0), double %conv202, double %conv207, double %conv208, double %conv213, i32 %conv218) + call void @clear() + store float 0x3FF7A99300000000, float* @lx, align 4 + store float 4.500000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0) + store float 7.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1) + %217 = load float* @lx, align 4 + %call220 = call { float, float } @sc_sf(float %217) + %218 = extractvalue { float, float } %call220, 0 + %219 = extractvalue { float, float } %call220, 1 + store float %218, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0) + store float %219, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1) + %ret_sc.real221 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0) + %ret_sc.imag222 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1) + %conv223 = fpext float %ret_sc.real221 to double + %conv224 = fpext float %ret_sc.imag222 to double + %ret_sc.real225 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0) + %ret_sc.imag226 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1) + %conv227 = fpext float %ret_sc.real225 to double + %conv228 = fpext float %ret_sc.imag226 to double + %lret_sc.real229 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0) + %lret_sc.imag230 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1) + %conv231 = fpext float %lret_sc.real229 to double + %conv232 = fpext float %lret_sc.imag230 to double + %lret_sc.real233 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0) + %lret_sc.imag234 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1) + %conv235 = fpext float %lret_sc.real233 to double + %conv236 = fpext float %lret_sc.imag234 to double + %220 = load float* @x, align 4 + %conv237 = fpext float %220 to double + %221 = load float* @lx, align 4 + %conv238 = fpext float %221 to double + %ret_sc.real239 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0) + %ret_sc.imag240 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1) + %lret_sc.real241 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0) + %lret_sc.imag242 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1) + %cmp.r243 = fcmp oeq float %ret_sc.real239, %lret_sc.real241 + %cmp.i244 = fcmp oeq float %ret_sc.imag240, %lret_sc.imag242 + %and.ri245 = and i1 %cmp.r243, %cmp.i244 + br i1 %and.ri245, label %land.rhs247, label %land.end250 + +land.rhs247: ; preds = %land.end198 + %222 = load float* @x, align 4 + %223 = load float* @lx, align 4 + %cmp248 = fcmp oeq float %222, %223 + br label %land.end250 + +land.end250: ; preds = %land.rhs247, %land.end198 + %224 = phi i1 [ false, %land.end198 ], [ %cmp248, %land.rhs247 ] + %land.ext251 = zext i1 %224 to i32 + %call252 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([24 x i8]* @.str4, i32 0, i32 0), double %conv223, double %conv228, double %conv231, double %conv236, double %conv237, double %conv238, i32 %land.ext251) + call void @clear() + store double 1.234500e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0) + store double 7.677000e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1) + %call253 = call { double, double } @dc_v() + %225 = extractvalue { double, double } %call253, 0 + %226 = extractvalue { double, double } %call253, 1 + store double %225, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0) + store double %226, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1) + %ret_dc.real = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0) + %ret_dc.imag = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1) + %ret_dc.real254 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0) + %ret_dc.imag255 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1) + %lret_dc.real = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0) + %lret_dc.imag = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1) + %lret_dc.real256 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0) + %lret_dc.imag257 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1) + %ret_dc.real258 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0) + %ret_dc.imag259 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1) + %lret_dc.real260 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0) + %lret_dc.imag261 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1) + %cmp.r262 = fcmp oeq double %ret_dc.real258, %lret_dc.real260 + %cmp.i263 = fcmp oeq double %ret_dc.imag259, %lret_dc.imag261 + %and.ri264 = and i1 %cmp.r262, %cmp.i263 + %conv265 = zext i1 %and.ri264 to i32 + %call266 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str3, i32 0, i32 0), double %ret_dc.real, double %ret_dc.imag255, double %lret_dc.real, double %lret_dc.imag257, i32 %conv265) + call void @clear() + store double 0x40AAF6F532617C1C, double* @lxd, align 8 + store double 4.444500e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0) + store double 7.888000e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1) + %227 = load float* @lx, align 4 + %call267 = call { double, double } @dc_sf(float %227) + %228 = extractvalue { double, double } %call267, 0 + %229 = extractvalue { double, double } %call267, 1 + store double %228, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0) + store double %229, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1) + %ret_dc.real268 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0) + %ret_dc.imag269 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1) + %ret_dc.real270 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0) + %ret_dc.imag271 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1) + %lret_dc.real272 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0) + %lret_dc.imag273 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1) + %lret_dc.real274 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0) + %lret_dc.imag275 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1) + %230 = load float* @x, align 4 + %conv276 = fpext float %230 to double + %231 = load float* @lx, align 4 + %conv277 = fpext float %231 to double + %ret_dc.real278 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0) + %ret_dc.imag279 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1) + %lret_dc.real280 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0) + %lret_dc.imag281 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1) + %cmp.r282 = fcmp oeq double %ret_dc.real278, %lret_dc.real280 + %cmp.i283 = fcmp oeq double %ret_dc.imag279, %lret_dc.imag281 + %and.ri284 = and i1 %cmp.r282, %cmp.i283 + br i1 %and.ri284, label %land.rhs286, label %land.end289 + +land.rhs286: ; preds = %land.end250 + %232 = load float* @x, align 4 + %233 = load float* @lx, align 4 + %cmp287 = fcmp oeq float %232, %233 + br label %land.end289 + +land.end289: ; preds = %land.rhs286, %land.end250 + %234 = phi i1 [ false, %land.end250 ], [ %cmp287, %land.rhs286 ] + %land.ext290 = zext i1 %234 to i32 + %call291 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([24 x i8]* @.str4, i32 0, i32 0), double %ret_dc.real268, double %ret_dc.imag271, double %lret_dc.real272, double %lret_dc.imag275, double %conv276, double %conv277, i32 %land.ext290) + %235 = load i32* %retval + ret i32 %235 +} + +declare void @v_sf(float) #1 +; stel: .section .mips16.call.fp.v_sf,"ax",@progbits +; stel: .ent __call_stub_v_sf +; stel: mtc1 $4,$f12 +; stel: lui $25,%hi(v_sf) +; stel: addiu $25,$25,%lo(v_sf) +; stel: jr $25 +; stel: .end __call_stub_v_sf + +declare i32 @printf(i8*, ...) #1 + +declare void @v_df(double) #1 +; stel: .section .mips16.call.fp.v_df,"ax",@progbits +; stel: .ent __call_stub_v_df +; stel: mtc1 $4,$f12 +; stel: mtc1 $5,$f13 +; stel: lui $25,%hi(v_df) +; stel: addiu $25,$25,%lo(v_df) +; stel: jr $25 +; stel: .end __call_stub_v_df + +declare void @v_sf_sf(float, float) #1 +; stel: .section .mips16.call.fp.v_sf_sf,"ax",@progbits +; stel: .ent __call_stub_v_sf_sf +; stel: mtc1 $4,$f12 +; stel: mtc1 $5,$f14 +; stel: lui $25,%hi(v_sf_sf) +; stel: addiu $25,$25,%lo(v_sf_sf) +; stel: jr $25 +; stel: .end __call_stub_v_sf_sf + +declare void @v_sf_df(float, double) #1 +; stel: .section .mips16.call.fp.v_sf_df,"ax",@progbits +; stel: .ent __call_stub_v_sf_df +; stel: mtc1 $4,$f12 +; stel: mtc1 $6,$f14 +; stel: mtc1 $7,$f15 +; stel: lui $25,%hi(v_sf_df) +; stel: addiu $25,$25,%lo(v_sf_df) +; stel: jr $25 +; stel: .end __call_stub_v_sf_df + +declare void @v_df_sf(double, float) #1 +; stel: .section .mips16.call.fp.v_df_sf,"ax",@progbits +; stel: .ent __call_stub_v_df_sf +; stel: mtc1 $4,$f12 +; stel: mtc1 $5,$f13 +; stel: mtc1 $6,$f14 +; stel: lui $25,%hi(v_df_sf) +; stel: addiu $25,$25,%lo(v_df_sf) +; stel: jr $25 +; stel: .end __call_stub_v_df_sf + +declare void @v_df_df(double, double) #1 +; stel: .section .mips16.call.fp.v_df_df,"ax",@progbits +; stel: .ent __call_stub_v_df_df +; stel: mtc1 $4,$f12 +; stel: mtc1 $5,$f13 +; stel: mtc1 $6,$f14 +; stel: mtc1 $7,$f15 +; stel: lui $25,%hi(v_df_df) +; stel: addiu $25,$25,%lo(v_df_df) +; stel: jr $25 +; stel: .end __call_stub_v_df_df + +declare float @sf_v() #1 +; stel: .section .mips16.call.fp.sf_v,"ax",@progbits +; stel: .ent __call_stub_sf_v +; stel: move $18, $31 +; stel: jal sf_v +; stel: mfc1 $2,$f0 +; stel: jr $18 +; stel: .end __call_stub_sf_v + +declare float @sf_sf(float) #1 +; stel: .section .mips16.call.fp.sf_sf,"ax",@progbits +; stel: .ent __call_stub_sf_sf +; stel: mtc1 $4,$f12 +; stel: move $18, $31 +; stel: jal sf_sf +; stel: mfc1 $2,$f0 +; stel: jr $18 +; stel: .end __call_stub_sf_sf + +declare float @sf_df(double) #1 +; stel: .section .mips16.call.fp.sf_df,"ax",@progbits +; stel: .ent __call_stub_sf_df +; stel: mtc1 $4,$f12 +; stel: mtc1 $5,$f13 +; stel: move $18, $31 +; stel: jal sf_df +; stel: mfc1 $2,$f0 +; stel: jr $18 +; stel: .end __call_stub_sf_df + +declare float @sf_sf_sf(float, float) #1 +; stel: .section .mips16.call.fp.sf_sf_sf,"ax",@progbits +; stel: .ent __call_stub_sf_sf_sf +; stel: mtc1 $4,$f12 +; stel: mtc1 $5,$f14 +; stel: move $18, $31 +; stel: jal sf_sf_sf +; stel: mfc1 $2,$f0 +; stel: jr $18 +; stel: .end __call_stub_sf_sf_sf + +declare float @sf_sf_df(float, double) #1 +; stel: .section .mips16.call.fp.sf_sf_df,"ax",@progbits +; stel: .ent __call_stub_sf_sf_df +; stel: mtc1 $4,$f12 +; stel: mtc1 $6,$f14 +; stel: mtc1 $7,$f15 +; stel: move $18, $31 +; stel: jal sf_sf_df +; stel: mfc1 $2,$f0 +; stel: jr $18 +; stel: .end __call_stub_sf_sf_df + +declare float @sf_df_sf(double, float) #1 +; stel: .section .mips16.call.fp.sf_df_sf,"ax",@progbits +; stel: .ent __call_stub_sf_df_sf +; stel: mtc1 $4,$f12 +; stel: mtc1 $5,$f13 +; stel: mtc1 $6,$f14 +; stel: move $18, $31 +; stel: jal sf_df_sf +; stel: mfc1 $2,$f0 +; stel: jr $18 +; stel: .end __call_stub_sf_df_sf + +declare float @sf_df_df(double, double) #1 +; stel: .section .mips16.call.fp.sf_df_df,"ax",@progbits +; stel: .ent __call_stub_sf_df_df +; stel: mtc1 $4,$f12 +; stel: mtc1 $5,$f13 +; stel: mtc1 $6,$f14 +; stel: mtc1 $7,$f15 +; stel: move $18, $31 +; stel: jal sf_df_df +; stel: mfc1 $2,$f0 +; stel: jr $18 +; stel: .end __call_stub_sf_df_df + +declare double @df_v() #1 +; stel: .section .mips16.call.fp.df_v,"ax",@progbits +; stel: .ent __call_stub_df_v +; stel: move $18, $31 +; stel: jal df_v +; stel: mfc1 $2,$f0 +; stel: mfc1 $3,$f1 +; stel: jr $18 +; stel: .end __call_stub_df_v + +declare double @df_sf(float) #1 +; stel: .section .mips16.call.fp.df_sf,"ax",@progbits +; stel: .ent __call_stub_df_sf +; stel: mtc1 $4,$f12 +; stel: move $18, $31 +; stel: jal df_sf +; stel: mfc1 $2,$f0 +; stel: mfc1 $3,$f1 +; stel: jr $18 +; stel: .end __call_stub_df_sf + +declare double @df_df(double) #1 +; stel: .section .mips16.call.fp.df_df,"ax",@progbits +; stel: .ent __call_stub_df_df +; stel: mtc1 $4,$f12 +; stel: mtc1 $5,$f13 +; stel: move $18, $31 +; stel: jal df_df +; stel: mfc1 $2,$f0 +; stel: mfc1 $3,$f1 +; stel: jr $18 +; stel: .end __call_stub_df_df + +declare double @df_sf_sf(float, float) #1 +; stel: .section .mips16.call.fp.df_sf_sf,"ax",@progbits +; stel: .ent __call_stub_df_sf_sf +; stel: mtc1 $4,$f12 +; stel: mtc1 $5,$f14 +; stel: move $18, $31 +; stel: jal df_sf_sf +; stel: mfc1 $2,$f0 +; stel: mfc1 $3,$f1 +; stel: jr $18 +; stel: .end __call_stub_df_sf_sf + +declare double @df_sf_df(float, double) #1 +; stel: .section .mips16.call.fp.df_sf_df,"ax",@progbits +; stel: .ent __call_stub_df_sf_df +; stel: mtc1 $4,$f12 +; stel: mtc1 $6,$f14 +; stel: mtc1 $7,$f15 +; stel: move $18, $31 +; stel: jal df_sf_df +; stel: mfc1 $2,$f0 +; stel: mfc1 $3,$f1 +; stel: jr $18 +; stel: .end __call_stub_df_sf_df + +declare double @df_df_sf(double, float) #1 +; stel: .section .mips16.call.fp.df_df_sf,"ax",@progbits +; stel: .ent __call_stub_df_df_sf +; stel: mtc1 $4,$f12 +; stel: mtc1 $5,$f13 +; stel: mtc1 $6,$f14 +; stel: move $18, $31 +; stel: jal df_df_sf +; stel: mfc1 $2,$f0 +; stel: mfc1 $3,$f1 +; stel: jr $18 +; stel: .end __call_stub_df_df_sf + +declare double @df_df_df(double, double) #1 +; stel: .section .mips16.call.fp.df_df_df,"ax",@progbits +; stel: .ent __call_stub_df_df_df +; stel: mtc1 $4,$f12 +; stel: mtc1 $5,$f13 +; stel: mtc1 $6,$f14 +; stel: mtc1 $7,$f15 +; stel: move $18, $31 +; stel: jal df_df_df +; stel: mfc1 $2,$f0 +; stel: mfc1 $3,$f1 +; stel: jr $18 +; stel: .end __call_stub_df_df_df + +declare { float, float } @sc_v() #1 +; stel: .section .mips16.call.fp.sc_v,"ax",@progbits +; stel: .ent __call_stub_sc_v +; stel: move $18, $31 +; stel: jal sc_v +; stel: mfc1 $2,$f0 +; stel: mfc1 $3,$f2 +; stel: jr $18 +; stel: .end __call_stub_sc_v + +declare { float, float } @sc_sf(float) #1 +; stel: .section .mips16.call.fp.sc_sf,"ax",@progbits +; stel: .ent __call_stub_sc_sf +; stel: mtc1 $4,$f12 +; stel: move $18, $31 +; stel: jal sc_sf +; stel: mfc1 $2,$f0 +; stel: mfc1 $3,$f2 +; stel: jr $18 +; stel: .end __call_stub_sc_sf + +declare { double, double } @dc_v() #1 +; stel: .section .mips16.call.fp.dc_v,"ax",@progbits +; stel: .ent __call_stub_dc_v +; stel: move $18, $31 +; stel: jal dc_v +; stel: mfc1 $4,$f2 +; stel: mfc1 $5,$f3 +; stel: mfc1 $2,$f0 +; stel: mfc1 $3,$f1 +; stel: jr $18 +; stel: .end __call_stub_dc_v + +declare { double, double } @dc_sf(float) #1 +; stel: .section .mips16.call.fp.dc_sf,"ax",@progbits +; stel: .ent __call_stub_dc_sf +; stel: mtc1 $4,$f12 +; stel: move $18, $31 +; stel: jal dc_sf +; stel: mfc1 $4,$f2 +; stel: mfc1 $5,$f3 +; stel: mfc1 $2,$f0 +; stel: mfc1 $3,$f1 +; stel: jr $18 +; stel: .end __call_stub_dc_sf + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/Mips/mno-ldc1-sdc1.ll b/test/CodeGen/Mips/mno-ldc1-sdc1.ll new file mode 100644 index 0000000000..eae9a2216a --- /dev/null +++ b/test/CodeGen/Mips/mno-ldc1-sdc1.ll @@ -0,0 +1,45 @@ +; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 < %s | \ +; RUN: FileCheck %s -check-prefix=LE-PIC +; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 < %s | \ +; RUN: FileCheck %s -check-prefix=LE-STATIC +; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 < %s | \ +; RUN: FileCheck %s -check-prefix=BE-PIC +; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=CHECK-LDC1-SDC1 + +@g0 = common global double 0.000000e+00, align 8 + +; LE-PIC: test_ldc1: +; LE-PIC: lwc1 $f0, 0(${{[0-9]+}}) +; LE-PIC: lwc1 $f1, 4(${{[0-9]+}}) +; LE-STATIC: test_ldc1: +; LE-STATIC: lwc1 $f0, %lo(g0)(${{[0-9]+}}) +; LE-STATIC: lwc1 $f1, %lo(g0+4)(${{[0-9]+}}) +; BE-PIC: test_ldc1: +; BE-PIC: lwc1 $f1, 0(${{[0-9]+}}) +; BE-PIC: lwc1 $f0, 4(${{[0-9]+}}) +; CHECK-LDC1-SDC1: test_ldc1: +; CHECK-LDC1-SDC1: ldc1 $f{{[0-9]+}} + +define double @test_ldc1() { +entry: + %0 = load double* @g0, align 8 + ret double %0 +} + +; LE-PIC: test_sdc1: +; LE-PIC: swc1 $f12, 0(${{[0-9]+}}) +; LE-PIC: swc1 $f13, 4(${{[0-9]+}}) +; LE-STATIC: test_sdc1: +; LE-STATIC: swc1 $f12, %lo(g0)(${{[0-9]+}}) +; LE-STATIC: swc1 $f13, %lo(g0+4)(${{[0-9]+}}) +; BE-PIC: test_sdc1: +; BE-PIC: swc1 $f13, 0(${{[0-9]+}}) +; BE-PIC: swc1 $f12, 4(${{[0-9]+}}) +; CHECK-LDC1-SDC1: test_sdc1: +; CHECK-LDC1-SDC1: sdc1 $f{{[0-9]+}} + +define void @test_sdc1(double %a) { +entry: + store double %a, double* @g0, align 8 + ret void +} diff --git a/test/CodeGen/Mips/stackcoloring.ll b/test/CodeGen/Mips/stackcoloring.ll new file mode 100644 index 0000000000..76cc08679d --- /dev/null +++ b/test/CodeGen/Mips/stackcoloring.ll @@ -0,0 +1,39 @@ +; RUN: llc -march=mipsel < %s | FileCheck %s + +@g1 = external global i32* + +; CHECK: foo1: +; CHECK: lw ${{[0-9]+}}, %got(g1) +; CHECK: # %for.body +; CHECK: # %for.end + +define i32 @foo1() { +entry: + %b = alloca [16 x i32], align 4 + %0 = bitcast [16 x i32]* %b to i8* + call void @llvm.lifetime.start(i64 64, i8* %0) + %arraydecay = getelementptr inbounds [16 x i32]* %b, i32 0, i32 0 + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %v.04 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %1 = load i32** @g1, align 4 + %arrayidx = getelementptr inbounds i32* %1, i32 %i.05 + %2 = load i32* %arrayidx, align 4 + %call = call i32 @foo2(i32 %2, i32* %arraydecay) + %add = add nsw i32 %call, %v.04 + %inc = add nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, 10000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + call void @llvm.lifetime.end(i64 64, i8* %0) + ret i32 %add +} + +declare void @llvm.lifetime.start(i64, i8* nocapture) + +declare i32 @foo2(i32, i32*) + +declare void @llvm.lifetime.end(i64, i8* nocapture) diff --git a/test/CodeGen/PowerPC/addrfuncstr.ll b/test/CodeGen/PowerPC/addrfuncstr.ll new file mode 100644 index 0000000000..60c02d498f --- /dev/null +++ b/test/CodeGen/PowerPC/addrfuncstr.ll @@ -0,0 +1,27 @@ +; RUN: llc -O0 < %s | FileCheck %s + +; Verify that a constant with an initializer that may turn into a dynamic +; relocation is not placed in .rodata, but rather in .data.rel.ro. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.x = type { i64 (i8*, i64, i64, %struct._IO_FILE*)* } +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@_ZL1y = internal constant %struct.x { i64 (i8*, i64, i64, %struct._IO_FILE*)* @fread }, align 8 + +; Function Attrs: nounwind +define %struct.x* @_Z3foov() #0 { +entry: + ret %struct.x* @_ZL1y +} + +declare i64 @fread(i8*, i64, i64, %struct._IO_FILE*) #1 + +; CHECK: .section .data.rel.ro +; CHECK: .quad fread + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/PowerPC/crsave.ll b/test/CodeGen/PowerPC/crsave.ll index d698ab031d..f1cbc5afa8 100644 --- a/test/CodeGen/PowerPC/crsave.ll +++ b/test/CodeGen/PowerPC/crsave.ll @@ -13,9 +13,11 @@ entry: ret i32 %1 } +; PPC32: stw 31, -4(1) +; PPC32: stwu 1, -32(1) ; PPC32: mfcr 12 -; PPC32-NEXT: stw 12, {{[0-9]+}}(31) -; PPC32: lwz 12, {{[0-9]+}}(31) +; PPC32-NEXT: stw 12, 24(31) +; PPC32: lwz 12, 24(31) ; PPC32-NEXT: mtcrf 32, 12 ; PPC64: mfcr 12 @@ -35,9 +37,11 @@ entry: ret i32 %1 } +; PPC32: stw 31, -4(1) +; PPC32: stwu 1, -32(1) ; PPC32: mfcr 12 -; PPC32-NEXT: stw 12, {{[0-9]+}}(31) -; PPC32: lwz 12, {{[0-9]+}}(31) +; PPC32-NEXT: stw 12, 24(31) +; PPC32: lwz 12, 24(31) ; PPC32-NEXT: mtcrf 32, 12 ; PPC32-NEXT: mtcrf 16, 12 ; PPC32-NEXT: mtcrf 8, 12 diff --git a/test/CodeGen/R600/llvm.AMDGPU.imax.ll b/test/CodeGen/R600/llvm.AMDGPU.imax.ll new file mode 100644 index 0000000000..3e854c840f --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.imax.ll @@ -0,0 +1,21 @@ +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s + +;CHECK: V_MAX_I32_e32 + +define void @main(i32 %p0, i32 %p1) #0 { +main_body: + %0 = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %p1) + %1 = bitcast i32 %0 to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1) + ret void +} + +; Function Attrs: readnone +declare i32 @llvm.AMDGPU.imax(i32, i32) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { readnone } + +!0 = metadata !{metadata !"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.AMDGPU.imin.ll b/test/CodeGen/R600/llvm.AMDGPU.imin.ll new file mode 100644 index 0000000000..e227bf8d55 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.imin.ll @@ -0,0 +1,21 @@ +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s + +;CHECK: V_MIN_I32_e32 + +define void @main(i32 %p0, i32 %p1) #0 { +main_body: + %0 = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %p1) + %1 = bitcast i32 %0 to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1) + ret void +} + +; Function Attrs: readnone +declare i32 @llvm.AMDGPU.imin(i32, i32) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { readnone } + +!0 = metadata !{metadata !"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll index ff22a69196..cdc03f8a41 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll @@ -1,16 +1,16 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s -;CHECK: TRUNC * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600-CHECK: @amdgpu_trunc +; R600-CHECK: TRUNC * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; SI-CHECK: @amdgpu_trunc +; SI-CHECK: V_TRUNC_F32 -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.AMDGPU.trunc( float %r0) - call void @llvm.AMDGPU.store.output(float %r1, i32 0) - ret void +define void @amdgpu_trunc(float addrspace(1)* %out, float %x) { +entry: + %0 = call float @llvm.AMDGPU.trunc(float %x) + store float %0, float addrspace(1)* %out + ret void } -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) - declare float @llvm.AMDGPU.trunc(float ) readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/test/CodeGen/R600/llvm.AMDGPU.umax.ll new file mode 100644 index 0000000000..7699c04c36 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.umax.ll @@ -0,0 +1,21 @@ +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s + +;CHECK: V_MAX_U32_e32 + +define void @main(i32 %p0, i32 %p1) #0 { +main_body: + %0 = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %p1) + %1 = bitcast i32 %0 to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1) + ret void +} + +; Function Attrs: readnone +declare i32 @llvm.AMDGPU.umax(i32, i32) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { readnone } + +!0 = metadata !{metadata !"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/test/CodeGen/R600/llvm.AMDGPU.umin.ll new file mode 100644 index 0000000000..a911ad9bb3 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.umin.ll @@ -0,0 +1,21 @@ +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s + +;CHECK: V_MIN_U32_e32 + +define void @main(i32 %p0, i32 %p1) #0 { +main_body: + %0 = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %p1) + %1 = bitcast i32 %0 to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1) + ret void +} + +; Function Attrs: readnone +declare i32 @llvm.AMDGPU.umin(i32, i32) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { readnone } + +!0 = metadata !{metadata !"const", null, i32 1} diff --git a/test/CodeGen/R600/uitofp.ll b/test/CodeGen/R600/uitofp.ll new file mode 100644 index 0000000000..6cf9e6a225 --- /dev/null +++ b/test/CodeGen/R600/uitofp.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s + +;CHECK: V_CVT_F32_U32_e32 + +define void @main(i32 %p) #0 { +main_body: + %0 = uitofp i32 %p to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %0, float %0, float %0, float %0) + ret void +} + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" } + +!0 = metadata !{metadata !"const", null, i32 1} diff --git a/test/CodeGen/SPARC/64cond.ll b/test/CodeGen/SPARC/64cond.ll index 6e66a262a4..cf1a039af5 100644 --- a/test/CodeGen/SPARC/64cond.ll +++ b/test/CodeGen/SPARC/64cond.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=sparcv9 | FileCheck %s -; Testing 64-bit conditionals. +; RUN: llc < %s -mtriple=sparc64-pc-openbsd | FileCheck %s +; Testing 64-bit conditionals. The sparc64 triple is an alias for sparcv9. ; CHECK: cmpri ; CHECK: subcc %i1, 1 diff --git a/test/CodeGen/SystemZ/int-sub-07.ll b/test/CodeGen/SystemZ/int-sub-07.ll new file mode 100644 index 0000000000..9bf5ed9055 --- /dev/null +++ b/test/CodeGen/SystemZ/int-sub-07.ll @@ -0,0 +1,131 @@ +; Test 32-bit subtraction in which the second operand is a sign-extended +; i16 memory value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the SH range. +define i32 @f1(i32 %lhs, i16 *%src) { +; CHECK: f1: +; CHECK: sh %r2, 0(%r3) +; CHECK: br %r14 + %half = load i16 *%src + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check the high end of the aligned SH range. +define i32 @f2(i32 %lhs, i16 *%src) { +; CHECK: f2: +; CHECK: sh %r2, 4094(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 2047 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check the next halfword up, which should use SHY instead of SH. +define i32 @f3(i32 %lhs, i16 *%src) { +; CHECK: f3: +; CHECK: shy %r2, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 2048 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check the high end of the aligned SHY range. +define i32 @f4(i32 %lhs, i16 *%src) { +; CHECK: f4: +; CHECK: shy %r2, 524286(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262143 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f5(i32 %lhs, i16 *%src) { +; CHECK: f5: +; CHECK: agfi %r3, 524288 +; CHECK: sh %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262144 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check the high end of the negative aligned SHY range. +define i32 @f6(i32 %lhs, i16 *%src) { +; CHECK: f6: +; CHECK: shy %r2, -2(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -1 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check the low end of the SHY range. +define i32 @f7(i32 %lhs, i16 *%src) { +; CHECK: f7: +; CHECK: shy %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262144 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f8(i32 %lhs, i16 *%src) { +; CHECK: f8: +; CHECK: agfi %r3, -524290 +; CHECK: sh %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262145 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check that SH allows an index. +define i32 @f9(i32 %lhs, i64 %src, i64 %index) { +; CHECK: f9: +; CHECK: sh %r2, 4094({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %sub1 = add i64 %src, %index + %sub2 = add i64 %sub1, 4094 + %ptr = inttoptr i64 %sub2 to i16 * + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} + +; Check that SHY allows an index. +define i32 @f10(i32 %lhs, i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: shy %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %sub1 = add i64 %src, %index + %sub2 = add i64 %sub1, 4096 + %ptr = inttoptr i64 %sub2 to i16 * + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = sub i32 %lhs, %rhs + ret i32 %res +} diff --git a/test/CodeGen/Thumb2/large-call.ll b/test/CodeGen/Thumb2/large-call.ll index 61c477aa91..1b4d4625dd 100644 --- a/test/CodeGen/Thumb2/large-call.ll +++ b/test/CodeGen/Thumb2/large-call.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mcpu=cortex-a8 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios0.0.0" diff --git a/test/CodeGen/X86/x86-64-psub.ll b/test/CodeGen/X86/x86-64-psub.ll new file mode 100644 index 0000000000..7869a80b2a --- /dev/null +++ b/test/CodeGen/X86/x86-64-psub.ll @@ -0,0 +1,213 @@ +; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck %s + +; MMX packed sub opcodes were wrongly marked as commutative. +; This test checks that the operands of packed sub instructions are +; never interchanged by the "Two-Address instruction pass". + +declare { i64, double } @getFirstParam() +declare { i64, double } @getSecondParam() + +define i64 @test_psubb() { +entry: + %call = tail call { i64, double } @getFirstParam() + %0 = extractvalue { i64, double } %call, 0 + %call2 = tail call { i64, double } @getSecondParam() + %1 = extractvalue { i64, double } %call2, 0 + %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 + %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 + %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8> + %3 = bitcast <8 x i8> %2 to x86_mmx + %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8> + %5 = bitcast <8 x i8> %4 to x86_mmx + %6 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %3, x86_mmx %5) nounwind + %7 = bitcast x86_mmx %6 to <8 x i8> + %8 = bitcast <8 x i8> %7 to <1 x i64> + %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 + ret i64 %retval.0.extract.i15 +} + +; CHECK: test_psubb: +; CHECK: callq getFirstParam +; CHECK: callq getSecondParam +; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] +; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] +; CHECK: psubb [[PARAM2]], [[PARAM1]] +; CHECK: ret + +define i64 @test_psubw() { +entry: + %call = tail call { i64, double } @getFirstParam() + %0 = extractvalue { i64, double } %call, 0 + %call2 = tail call { i64, double } @getSecondParam() + %1 = extractvalue { i64, double } %call2, 0 + %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 + %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 + %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16> + %3 = bitcast <4 x i16> %2 to x86_mmx + %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16> + %5 = bitcast <4 x i16> %4 to x86_mmx + %6 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %3, x86_mmx %5) nounwind + %7 = bitcast x86_mmx %6 to <4 x i16> + %8 = bitcast <4 x i16> %7 to <1 x i64> + %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 + ret i64 %retval.0.extract.i15 +} + +; CHECK: test_psubw: +; CHECK: callq getFirstParam +; CHECK: callq getSecondParam +; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] +; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] +; CHECK: psubw [[PARAM2]], [[PARAM1]] +; CHECK: ret + + +define i64 @test_psubd() { +entry: + %call = tail call { i64, double } @getFirstParam() + %0 = extractvalue { i64, double } %call, 0 + %call2 = tail call { i64, double } @getSecondParam() + %1 = extractvalue { i64, double } %call2, 0 + %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 + %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 + %2 = bitcast <1 x i64> %__m1.0.insert.i to <2 x i32> + %3 = bitcast <2 x i32> %2 to x86_mmx + %4 = bitcast <1 x i64> %__m2.0.insert.i to <2 x i32> + %5 = bitcast <2 x i32> %4 to x86_mmx + %6 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %3, x86_mmx %5) nounwind + %7 = bitcast x86_mmx %6 to <2 x i32> + %8 = bitcast <2 x i32> %7 to <1 x i64> + %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 + ret i64 %retval.0.extract.i15 +} + +; CHECK: test_psubd: +; CHECK: callq getFirstParam +; CHECK: callq getSecondParam +; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] +; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] +; CHECK: psubd [[PARAM2]], [[PARAM1]] +; CHECK: ret + +define i64 @test_psubsb() { +entry: + %call = tail call { i64, double } @getFirstParam() + %0 = extractvalue { i64, double } %call, 0 + %call2 = tail call { i64, double } @getSecondParam() + %1 = extractvalue { i64, double } %call2, 0 + %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 + %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 + %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8> + %3 = bitcast <8 x i8> %2 to x86_mmx + %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8> + %5 = bitcast <8 x i8> %4 to x86_mmx + %6 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %3, x86_mmx %5) nounwind + %7 = bitcast x86_mmx %6 to <8 x i8> + %8 = bitcast <8 x i8> %7 to <1 x i64> + %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 + ret i64 %retval.0.extract.i15 +} + +; CHECK: test_psubsb: +; CHECK: callq getFirstParam +; CHECK: callq getSecondParam +; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] +; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] +; CHECK: psubsb [[PARAM2]], [[PARAM1]] +; CHECK: ret + +define i64 @test_psubswv() { +entry: + %call = tail call { i64, double } @getFirstParam() + %0 = extractvalue { i64, double } %call, 0 + %call2 = tail call { i64, double } @getSecondParam() + %1 = extractvalue { i64, double } %call2, 0 + %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 + %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 + %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16> + %3 = bitcast <4 x i16> %2 to x86_mmx + %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16> + %5 = bitcast <4 x i16> %4 to x86_mmx + %6 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %3, x86_mmx %5) nounwind + %7 = bitcast x86_mmx %6 to <4 x i16> + %8 = bitcast <4 x i16> %7 to <1 x i64> + %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 + ret i64 %retval.0.extract.i15 +} + +; CHECK: test_psubswv: +; CHECK: callq getFirstParam +; CHECK: callq getSecondParam +; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] +; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] +; CHECK: psubsw [[PARAM2]], [[PARAM1]] +; CHECK: ret + +define i64 @test_psubusbv() { +entry: + %call = tail call { i64, double } @getFirstParam() + %0 = extractvalue { i64, double } %call, 0 + %call2 = tail call { i64, double } @getSecondParam() + %1 = extractvalue { i64, double } %call2, 0 + %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 + %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 + %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8> + %3 = bitcast <8 x i8> %2 to x86_mmx + %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8> + %5 = bitcast <8 x i8> %4 to x86_mmx + %6 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %3, x86_mmx %5) nounwind + %7 = bitcast x86_mmx %6 to <8 x i8> + %8 = bitcast <8 x i8> %7 to <1 x i64> + %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 + ret i64 %retval.0.extract.i15 +} + +; CHECK: test_psubusbv: +; CHECK: callq getFirstParam +; CHECK: callq getSecondParam +; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] +; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] +; CHECK: psubusb [[PARAM2]], [[PARAM1]] +; CHECK: ret + +define i64 @test_psubuswv() { +entry: + %call = tail call { i64, double } @getFirstParam() + %0 = extractvalue { i64, double } %call, 0 + %call2 = tail call { i64, double } @getSecondParam() + %1 = extractvalue { i64, double } %call2, 0 + %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 + %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 + %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16> + %3 = bitcast <4 x i16> %2 to x86_mmx + %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16> + %5 = bitcast <4 x i16> %4 to x86_mmx + %6 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %3, x86_mmx %5) nounwind + %7 = bitcast x86_mmx %6 to <4 x i16> + %8 = bitcast <4 x i16> %7 to <1 x i64> + %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 + ret i64 %retval.0.extract.i15 +} + +; CHECK: test_psubuswv: +; CHECK: callq getFirstParam +; CHECK: callq getSecondParam +; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] +; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] +; CHECK: psubusw [[PARAM2]], [[PARAM1]] +; CHECK: ret + + +declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone + +declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone + +declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone + +declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone + +declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone + +declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone + +declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone diff --git a/test/ExecutionEngine/MCJIT/eh.ll b/test/ExecutionEngine/MCJIT/eh.ll index cd67dd70c5..c2135736ad 100644 --- a/test/ExecutionEngine/MCJIT/eh.ll +++ b/test/ExecutionEngine/MCJIT/eh.ll @@ -1,5 +1,5 @@ ; RUN: %lli_mcjit %s -; XFAIL: arm, cygwin, win32 +; XFAIL: arm, cygwin, win32, mingw declare i8* @__cxa_allocate_exception(i64) declare void @__cxa_throw(i8*, i8*, i8*) declare i32 @__gxx_personality_v0(...) diff --git a/test/FileCheck/check-dag-xfails.txt b/test/FileCheck/check-dag-xfails.txt new file mode 100644 index 0000000000..3f4f98e348 --- /dev/null +++ b/test/FileCheck/check-dag-xfails.txt @@ -0,0 +1,85 @@ +; RUN: not FileCheck -check-prefix=X1 -input-file %s %s +; RUN: not FileCheck -check-prefix=X2 -input-file %s %s +; RUN: not FileCheck -check-prefix=X3 -input-file %s %s +; RUN: not FileCheck -check-prefix=X4 -input-file %s %s +; RUN: not FileCheck -check-prefix=X5 -input-file %s %s +; RUN: not FileCheck -check-prefix=X6 -input-file %s %s + +__x1 +add r10, r1, r2 +add r11, r3, r4 +mul r5, r10, r12 +__x1 + +; X1: __x1 +; X1-DAG: add [[REG1:r[0-9]+]], r1, r2 +; X1-DAG: add [[REG2:r[0-9]+]], r3, r4 +; X1: mul r5, [[REG1]], [[REG2]] +; X1: __x1 + +__x2 +mul r11, r3, r4 +mul r10, r1, r2 +add r5, r11, r11 +__x2 + +; X2: __x2 +; X2-DAG: mul [[REG1:r[0-9]+]], r1, r2 +; X2-DAG: mul [[REG2:r[0-9]+]], r3, r4 +; X2: add r5, [[REG1]], [[REG2]] +; X2: __x2 + +__x3 +add r11, r3, r4 +add r12, r1, r2 +mul r5, r10, r11 +__x3 + +; X3: __x3 +; X3-DAG: add [[REG1:r[0-9]+]], r1, r2 +; X3-DAG: add [[REG2:r[0-9]+]], r3, r4 +; X3-DAG: mul r5, [[REG1]], [[REG2]] +; X3: __x3 + +__x4 +add r11, r3, r4 +add r12, r1, r2 +not +mul r5, r12, r11 +__x4 + +; X4: __x4 +; X4-DAG: add [[REG1:r[0-9]+]], r1, r2 +; X4-DAG: add [[REG2:r[0-9]+]], r3, r4 +; X4-NOT: not +; X4-DAG: mul r5, [[REG1]], [[REG2]] +; X4: __x4 + +__x5 +mul r5, r12, r11 +add r11, r3, r4 +add r12, r1, r2 +not +__x5 + +; X5: __x5 +; X5-DAG: add [[REG1:r[0-9]+]], r1, r2 +; X5-DAG: add [[REG2:r[0-9]+]], r3, r4 +; X5-NOT: not +; X5-DAG: mul r5, [[REG1]], [[REG2]] +; X5: __x5 + +__x6 +add r11, r3, r4 +mul r6, r12, r11 +add r12, r1, r2 +mul r5, r12, r11 +__x6 + +; X6: __x6 +; X6-DAG: add [[REG1:r[0-9]+]], r1, r2 +; X6-DAG: add [[REG2:r[0-9]+]], r3, r4 +; X6-NOT: not +; X6-DAG: mul r5, [[REG1]], [[REG2]] +; X6-DAG: mul r6, [[REG1]], [[REG2]] +; X6: __x6 diff --git a/test/FileCheck/check-dag.txt b/test/FileCheck/check-dag.txt new file mode 100644 index 0000000000..6325e06e5a --- /dev/null +++ b/test/FileCheck/check-dag.txt @@ -0,0 +1,25 @@ +; RUN: FileCheck -input-file %s %s + +add r10, r1, r2 +add r11, r3, r4 +mul r5, r10, r11 + +mul r11, r3, r4 +mul r10, r1, r2 +add r5, r10, r11 + +add r11, r3, r4 +add r10, r1, r2 +mul r5, r10, r11 + +; CHECK-DAG: add [[REG1:r[0-9]+]], r1, r2 +; CHECK-DAG: add [[REG2:r[0-9]+]], r3, r4 +; CHECK: mul r5, [[REG1]], [[REG2]] + +; CHECK-DAG: mul [[REG1:r[0-9]+]], r1, r2 +; CHECK-DAG: mul [[REG2:r[0-9]+]], r3, r4 +; CHECK: add r5, [[REG1]], [[REG2]] + +; CHECK-DAG: add [[REG1:r[0-9]+]], r1, r2 +; CHECK-DAG: add [[REG2:r[0-9]+]], r3, r4 +; CHECK-DAG: mul r5, [[REG1]], [[REG2]] diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s index 71b5b5da09..5227bdd239 100644 --- a/test/MC/ARM/basic-arm-instructions.s +++ b/test/MC/ARM/basic-arm-instructions.s @@ -1062,10 +1062,18 @@ Lforward: @ MRC/MRC2 @------------------------------------------------------------------------------ mrc p14, #0, r1, c1, c2, #4 + mrc p15, #7, apsr_nzcv, c15, c6, #6 + mrc p15, #7, pc, c15, c6, #6 mrc2 p14, #0, r1, c1, c2, #4 - -@ CHECK: mrc p14, #0, r1, c1, c2, #4 @ encoding: [0x92,0x1e,0x11,0xee] -@ CHECK: mrc2 p14, #0, r1, c1, c2, #4 @ encoding: [0x92,0x1e,0x11,0xfe] + mrc2 p10, #7, apsr_nzcv, c15, c0, #1 + mrc2 p10, #7, pc, c15, c0, #1 + +@ CHECK: mrc p14, #0, r1, c1, c2, #4 @ encoding: [0x92,0x1e,0x11,0xee] +@ CHECK: mrc p15, #7, apsr_nzcv, c15, c6, #6 @ encoding: [0xd6,0xff,0xff,0xee] +@ CHECK: mrc p15, #7, pc, c15, c6, #6 @ encoding: [0xd6,0xff,0xff,0xee] +@ CHECK: mrc2 p14, #0, r1, c1, c2, #4 @ encoding: [0x92,0x1e,0x11,0xfe] +@ CHECK: mrc2 p10, #7, apsr_nzcv, c15, c0, #1 @ encoding: [0x30,0xfa,0xff,0xfe] +@ CHECK: mrc2 p10, #7, pc, c15, c0, #1 @ encoding: [0x30,0xfa,0xff,0xfe] @------------------------------------------------------------------------------ @ MRRC/MRRC2 diff --git a/test/MC/ARM/data-in-code.ll b/test/MC/ARM/data-in-code.ll index e3325b6bf6..9fccf2e9f8 100644 --- a/test/MC/ARM/data-in-code.ll +++ b/test/MC/ARM/data-in-code.ll @@ -1,7 +1,9 @@ -;; RUN: llc -O0 -mtriple=armv7-linux-gnueabi -filetype=obj %s -o - | \ +;; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort \ +;; RUN: -mtriple=armv7-linux-gnueabi -filetype=obj %s -o - | \ ;; RUN: llvm-readobj -t | FileCheck -check-prefix=ARM %s -;; RUN: llc -O0 -mtriple=thumbv7-linux-gnueabi -filetype=obj %s -o - | \ +;; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort \ +;; RUN: -mtriple=thumbv7-linux-gnueabi -filetype=obj %s -o - | \ ;; RUN: llvm-readobj -t | FileCheck -check-prefix=TMB %s ;; Ensure that if a jump table is generated that it has Mapping Symbols @@ -119,7 +121,7 @@ exit: ;; ARM: Symbol { ;; ARM: Name: $a -;; ARM-NEXT: Value: 0xAC +;; ARM-NEXT: Value: 0x{{[0-9A-F]+}} ;; ARM-NEXT: Size: 0 ;; ARM-NEXT: Binding: Local ;; ARM-NEXT: Type: None @@ -135,7 +137,7 @@ exit: ;; ARM: Symbol { ;; ARM: Name: $d -;; ARM-NEXT: Value: 0x30 +;; ARM-NEXT: Value: 0x{{[0-9A-F]+}} ;; ARM-NEXT: Size: 0 ;; ARM-NEXT: Binding: Local ;; ARM-NEXT: Type: None @@ -146,7 +148,7 @@ exit: ;; TMB: Symbol { ;; TMB: Name: $d.2 -;; TMB-NEXT: Value: 0x16 +;; TMB-NEXT: Value: 0x{{[0-9A-F]+}} ;; TMB-NEXT: Size: 0 ;; TMB-NEXT: Binding: Local ;; TMB-NEXT: Type: None @@ -164,7 +166,7 @@ exit: ;; TMB: Symbol { ;; TMB: Name: $t -;; TMB-NEXT: Value: 0x36 +;; TMB-NEXT: Value: 0x{{[0-9A-F]+}} ;; TMB-NEXT: Size: 0 ;; TMB-NEXT: Binding: Local ;; TMB-NEXT: Type: None diff --git a/test/MC/Disassembler/ARM/basic-arm-instructions.txt b/test/MC/Disassembler/ARM/basic-arm-instructions.txt index 9f63e1e914..c92322e8d7 100644 --- a/test/MC/Disassembler/ARM/basic-arm-instructions.txt +++ b/test/MC/Disassembler/ARM/basic-arm-instructions.txt @@ -757,10 +757,14 @@ # MRC/MRC2 #------------------------------------------------------------------------------ # CHECK: mrc p14, #0, r1, c1, c2, #4 +# CHECK: mrc p15, #7, apsr_nzcv, c15, c6, #6 # CHECK: mrc2 p14, #0, r1, c1, c2, #4 +# CHECK: mrc2 p9, #7, apsr_nzcv, c15, c0, #1 0x92 0x1e 0x11 0xee +0xd6 0xff 0xff 0xee 0x92 0x1e 0x11 0xfe +0x30 0xf9 0xff 0xfe #------------------------------------------------------------------------------ # MRRC/MRRC2 diff --git a/test/MC/Disassembler/SystemZ/insns-pcrel.txt b/test/MC/Disassembler/SystemZ/insns-pcrel.txt new file mode 100644 index 0000000000..1f2d8d0094 --- /dev/null +++ b/test/MC/Disassembler/SystemZ/insns-pcrel.txt @@ -0,0 +1,933 @@ +# Test instructions that have PC-relative operands. There is no attempt +# to keep the instructions in alphabetical order, since adding new instructions +# in the middle would mean updating all later offsets. +# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu | FileCheck %s + +# 0x00000000: +# CHECK: brasl %r0, 0x0 +0xc0 0x05 0x00 0x00 0x00 0x00 + +# 0x00000006: +# CHECK: brasl %r14, 0x6 +0xc0 0xe5 0x00 0x00 0x00 0x00 + +# 0x0000000c: +# CHECK: brasl %r15, 0xc +0xc0 0xf5 0x00 0x00 0x00 0x00 + +# 0x00000012: +# CHECK: brasl %r0, 0x10 +0xc0 0x05 0xff 0xff 0xff 0xff + +# 0x00000018: +# CHECK: brasl %r14, 0xffffffff00000018 +0xc0 0xe5 0x80 0x00 0x00 0x00 + +# 0x0000001e: +# CHECK: brasl %r15, 0x10000001c +0xc0 0xf5 0x7f 0xff 0xff 0xff + +# 0x00000024: +# CHECK: bras %r0, 0x24 +0xa7 0x05 0x00 0x00 + +# 0x00000028: +# CHECK: bras %r14, 0x28 +0xa7 0xe5 0x00 0x00 + +# 0x0000002c: +# CHECK: bras %r15, 0x2c +0xa7 0xf5 0x00 0x00 + +# 0x00000030: +# CHECK: bras %r0, 0x2e +0xa7 0x05 0xff 0xff + +# 0x00000034: +# CHECK: bras %r14, 0xffffffffffff0034 +0xa7 0xe5 0x80 0x00 + +# 0x00000038: +# CHECK: bras %r15, 0x10036 +0xa7 0xf5 0x7f 0xff + +# 0x0000003c: +# CHECK: brcl 0, 0x3c +0xc0 0x04 0x00 0x00 0x00 0x00 + +# 0x00000042: +# CHECK: jgo 0x42 +0xc0 0x14 0x00 0x00 0x00 0x00 + +# 0x00000048: +# CHECK: jgh 0x48 +0xc0 0x24 0x00 0x00 0x00 0x00 + +# 0x0000004e: +# CHECK: jgnle 0x4e +0xc0 0x34 0x00 0x00 0x00 0x00 + +# 0x00000054: +# CHECK: jgl 0x54 +0xc0 0x44 0x00 0x00 0x00 0x00 + +# 0x0000005a: +# CHECK: jgnhe 0x5a +0xc0 0x54 0x00 0x00 0x00 0x00 + +# 0x00000060: +# CHECK: jglh 0x60 +0xc0 0x64 0x00 0x00 0x00 0x00 + +# 0x00000066: +# CHECK: jgne 0x66 +0xc0 0x74 0x00 0x00 0x00 0x00 + +# 0x0000006c: +# CHECK: jge 0x6c +0xc0 0x84 0x00 0x00 0x00 0x00 + +# 0x00000072: +# CHECK: jgnlh 0x72 +0xc0 0x94 0x00 0x00 0x00 0x00 + +# 0x00000078: +# CHECK: jghe 0x78 +0xc0 0xa4 0x00 0x00 0x00 0x00 + +# 0x0000007e: +# CHECK: jgnl 0x7e +0xc0 0xb4 0x00 0x00 0x00 0x00 + +# 0x00000084: +# CHECK: jgle 0x84 +0xc0 0xc4 0x00 0x00 0x00 0x00 + +# 0x0000008a: +# CHECK: jgnh 0x8a +0xc0 0xd4 0x00 0x00 0x00 0x00 + +# 0x00000090: +# CHECK: jgno 0x90 +0xc0 0xe4 0x00 0x00 0x00 0x00 + +# 0x00000096: +# CHECK: jg 0x96 +0xc0 0xf4 0x00 0x00 0x00 0x00 + +# 0x0000009c: +# CHECK: brcl 0, 0x9a +0xc0 0x04 0xff 0xff 0xff 0xff + +# 0x000000a2: +# CHECK: brcl 0, 0xffffffff000000a2 +0xc0 0x04 0x80 0x00 0x00 0x00 + +# 0x000000a8: +# CHECK: brcl 0, 0x1000000a6 +0xc0 0x04 0x7f 0xff 0xff 0xff + +# 0x000000ae: +# CHECK: jg 0xac +0xc0 0xf4 0xff 0xff 0xff 0xff + +# 0x000000b4: +# CHECK: jg 0xffffffff000000b4 +0xc0 0xf4 0x80 0x00 0x00 0x00 + +# 0x000000ba: +# CHECK: jg 0x1000000b8 +0xc0 0xf4 0x7f 0xff 0xff 0xff + +# 0x000000c0: +# CHECK: brc 0, 0xc0 +0xa7 0x04 0x00 0x00 + +# 0x000000c4: +# CHECK: jo 0xc4 +0xa7 0x14 0x00 0x00 + +# 0x000000c8: +# CHECK: jh 0xc8 +0xa7 0x24 0x00 0x00 + +# 0x000000cc: +# CHECK: jnle 0xcc +0xa7 0x34 0x00 0x00 + +# 0x000000d0: +# CHECK: jl 0xd0 +0xa7 0x44 0x00 0x00 + +# 0x000000d4: +# CHECK: jnhe 0xd4 +0xa7 0x54 0x00 0x00 + +# 0x000000d8: +# CHECK: jlh 0xd8 +0xa7 0x64 0x00 0x00 + +# 0x000000dc: +# CHECK: jne 0xdc +0xa7 0x74 0x00 0x00 + +# 0x000000e0: +# CHECK: je 0xe0 +0xa7 0x84 0x00 0x00 + +# 0x000000e4: +# CHECK: jnlh 0xe4 +0xa7 0x94 0x00 0x00 + +# 0x000000e8: +# CHECK: jhe 0xe8 +0xa7 0xa4 0x00 0x00 + +# 0x000000ec: +# CHECK: jnl 0xec +0xa7 0xb4 0x00 0x00 + +# 0x000000f0: +# CHECK: jle 0xf0 +0xa7 0xc4 0x00 0x00 + +# 0x000000f4: +# CHECK: jnh 0xf4 +0xa7 0xd4 0x00 0x00 + +# 0x000000f8: +# CHECK: jno 0xf8 +0xa7 0xe4 0x00 0x00 + +# 0x000000fc: +# CHECK: j 0xfc +0xa7 0xf4 0x00 0x00 + +# 0x00000100: +# CHECK: brc 0, 0xfe +0xa7 0x04 0xff 0xff + +# 0x00000104: +# CHECK: brc 0, 0xffffffffffff0104 +0xa7 0x04 0x80 0x00 + +# 0x00000108: +# CHECK: brc 0, 0x10106 +0xa7 0x04 0x7f 0xff + +# 0x0000010c: +# CHECK: j 0x10a +0xa7 0xf4 0xff 0xff + +# 0x00000110: +# CHECK: j 0xffffffffffff0110 +0xa7 0xf4 0x80 0x00 + +# 0x00000114: +# CHECK: j 0x10112 +0xa7 0xf4 0x7f 0xff + +# 0x00000118: +# CHECK: cgfrl %r0, 0x118 +0xc6 0x0c 0x00 0x00 0x00 0x00 + +# 0x0000011e: +# CHECK: cgfrl %r15, 0x11e +0xc6 0xfc 0x00 0x00 0x00 0x00 + +# 0x00000124: +# CHECK: cgfrl %r0, 0x122 +0xc6 0x0c 0xff 0xff 0xff 0xff + +# 0x0000012a: +# CHECK: cgfrl %r15, 0x128 +0xc6 0xfc 0xff 0xff 0xff 0xff + +# 0x00000130: +# CHECK: cgfrl %r0, 0xffffffff00000130 +0xc6 0x0c 0x80 0x00 0x00 0x00 + +# 0x00000136: +# CHECK: cgfrl %r15, 0xffffffff00000136 +0xc6 0xfc 0x80 0x00 0x00 0x00 + +# 0x0000013c: +# CHECK: cgfrl %r0, 0x10000013a +0xc6 0x0c 0x7f 0xff 0xff 0xff + +# 0x00000142: +# CHECK: cgfrl %r15, 0x100000140 +0xc6 0xfc 0x7f 0xff 0xff 0xff + +# 0x00000148: +# CHECK: cghrl %r0, 0x148 +0xc6 0x04 0x00 0x00 0x00 0x00 + +# 0x0000014e: +# CHECK: cghrl %r15, 0x14e +0xc6 0xf4 0x00 0x00 0x00 0x00 + +# 0x00000154: +# CHECK: cghrl %r0, 0x152 +0xc6 0x04 0xff 0xff 0xff 0xff + +# 0x0000015a: +# CHECK: cghrl %r15, 0x158 +0xc6 0xf4 0xff 0xff 0xff 0xff + +# 0x00000160: +# CHECK: cghrl %r0, 0xffffffff00000160 +0xc6 0x04 0x80 0x00 0x00 0x00 + +# 0x00000166: +# CHECK: cghrl %r15, 0xffffffff00000166 +0xc6 0xf4 0x80 0x00 0x00 0x00 + +# 0x0000016c: +# CHECK: cghrl %r0, 0x10000016a +0xc6 0x04 0x7f 0xff 0xff 0xff + +# 0x00000172: +# CHECK: cghrl %r15, 0x100000170 +0xc6 0xf4 0x7f 0xff 0xff 0xff + +# 0x00000178: +# CHECK: cgrl %r0, 0x178 +0xc6 0x08 0x00 0x00 0x00 0x00 + +# 0x0000017e: +# CHECK: cgrl %r15, 0x17e +0xc6 0xf8 0x00 0x00 0x00 0x00 + +# 0x00000184: +# CHECK: cgrl %r0, 0x182 +0xc6 0x08 0xff 0xff 0xff 0xff + +# 0x0000018a: +# CHECK: cgrl %r15, 0x188 +0xc6 0xf8 0xff 0xff 0xff 0xff + +# 0x00000190: +# CHECK: cgrl %r0, 0xffffffff00000190 +0xc6 0x08 0x80 0x00 0x00 0x00 + +# 0x00000196: +# CHECK: cgrl %r15, 0xffffffff00000196 +0xc6 0xf8 0x80 0x00 0x00 0x00 + +# 0x0000019c: +# CHECK: cgrl %r0, 0x10000019a +0xc6 0x08 0x7f 0xff 0xff 0xff + +# 0x000001a2: +# CHECK: cgrl %r15, 0x1000001a0 +0xc6 0xf8 0x7f 0xff 0xff 0xff + +# 0x000001a8: +# CHECK: chrl %r0, 0x1a8 +0xc6 0x05 0x00 0x00 0x00 0x00 + +# 0x000001ae: +# CHECK: chrl %r15, 0x1ae +0xc6 0xf5 0x00 0x00 0x00 0x00 + +# 0x000001b4: +# CHECK: chrl %r0, 0x1b2 +0xc6 0x05 0xff 0xff 0xff 0xff + +# 0x000001ba: +# CHECK: chrl %r15, 0x1b8 +0xc6 0xf5 0xff 0xff 0xff 0xff + +# 0x000001c0: +# CHECK: chrl %r0, 0xffffffff000001c0 +0xc6 0x05 0x80 0x00 0x00 0x00 + +# 0x000001c6: +# CHECK: chrl %r15, 0xffffffff000001c6 +0xc6 0xf5 0x80 0x00 0x00 0x00 + +# 0x000001cc: +# CHECK: chrl %r0, 0x1000001ca +0xc6 0x05 0x7f 0xff 0xff 0xff + +# 0x000001d2: +# CHECK: chrl %r15, 0x1000001d0 +0xc6 0xf5 0x7f 0xff 0xff 0xff + +# 0x000001d8: +# CHECK: clgfrl %r0, 0x1d8 +0xc6 0x0e 0x00 0x00 0x00 0x00 + +# 0x000001de: +# CHECK: clgfrl %r15, 0x1de +0xc6 0xfe 0x00 0x00 0x00 0x00 + +# 0x000001e4: +# CHECK: clgfrl %r0, 0x1e2 +0xc6 0x0e 0xff 0xff 0xff 0xff + +# 0x000001ea: +# CHECK: clgfrl %r15, 0x1e8 +0xc6 0xfe 0xff 0xff 0xff 0xff + +# 0x000001f0: +# CHECK: clgfrl %r0, 0xffffffff000001f0 +0xc6 0x0e 0x80 0x00 0x00 0x00 + +# 0x000001f6: +# CHECK: clgfrl %r15, 0xffffffff000001f6 +0xc6 0xfe 0x80 0x00 0x00 0x00 + +# 0x000001fc: +# CHECK: clgfrl %r0, 0x1000001fa +0xc6 0x0e 0x7f 0xff 0xff 0xff + +# 0x00000202: +# CHECK: clgfrl %r15, 0x100000200 +0xc6 0xfe 0x7f 0xff 0xff 0xff + +# 0x00000208: +# CHECK: clghrl %r0, 0x208 +0xc6 0x06 0x00 0x00 0x00 0x00 + +# 0x0000020e: +# CHECK: clghrl %r15, 0x20e +0xc6 0xf6 0x00 0x00 0x00 0x00 + +# 0x00000214: +# CHECK: clghrl %r0, 0x212 +0xc6 0x06 0xff 0xff 0xff 0xff + +# 0x0000021a: +# CHECK: clghrl %r15, 0x218 +0xc6 0xf6 0xff 0xff 0xff 0xff + +# 0x00000220: +# CHECK: clghrl %r0, 0xffffffff00000220 +0xc6 0x06 0x80 0x00 0x00 0x00 + +# 0x00000226: +# CHECK: clghrl %r15, 0xffffffff00000226 +0xc6 0xf6 0x80 0x00 0x00 0x00 + +# 0x0000022c: +# CHECK: clghrl %r0, 0x10000022a +0xc6 0x06 0x7f 0xff 0xff 0xff + +# 0x00000232: +# CHECK: clghrl %r15, 0x100000230 +0xc6 0xf6 0x7f 0xff 0xff 0xff + +# 0x00000238: +# CHECK: clgrl %r0, 0x238 +0xc6 0x0a 0x00 0x00 0x00 0x00 + +# 0x0000023e: +# CHECK: clgrl %r15, 0x23e +0xc6 0xfa 0x00 0x00 0x00 0x00 + +# 0x00000244: +# CHECK: clgrl %r0, 0x242 +0xc6 0x0a 0xff 0xff 0xff 0xff + +# 0x0000024a: +# CHECK: clgrl %r15, 0x248 +0xc6 0xfa 0xff 0xff 0xff 0xff + +# 0x00000250: +# CHECK: clgrl %r0, 0xffffffff00000250 +0xc6 0x0a 0x80 0x00 0x00 0x00 + +# 0x00000256: +# CHECK: clgrl %r15, 0xffffffff00000256 +0xc6 0xfa 0x80 0x00 0x00 0x00 + +# 0x0000025c: +# CHECK: clgrl %r0, 0x10000025a +0xc6 0x0a 0x7f 0xff 0xff 0xff + +# 0x00000262: +# CHECK: clgrl %r15, 0x100000260 +0xc6 0xfa 0x7f 0xff 0xff 0xff + +# 0x00000268: +# CHECK: clhrl %r0, 0x268 +0xc6 0x07 0x00 0x00 0x00 0x00 + +# 0x0000026e: +# CHECK: clhrl %r15, 0x26e +0xc6 0xf7 0x00 0x00 0x00 0x00 + +# 0x00000274: +# CHECK: clhrl %r0, 0x272 +0xc6 0x07 0xff 0xff 0xff 0xff + +# 0x0000027a: +# CHECK: clhrl %r15, 0x278 +0xc6 0xf7 0xff 0xff 0xff 0xff + +# 0x00000280: +# CHECK: clhrl %r0, 0xffffffff00000280 +0xc6 0x07 0x80 0x00 0x00 0x00 + +# 0x00000286: +# CHECK: clhrl %r15, 0xffffffff00000286 +0xc6 0xf7 0x80 0x00 0x00 0x00 + +# 0x0000028c: +# CHECK: clhrl %r0, 0x10000028a +0xc6 0x07 0x7f 0xff 0xff 0xff + +# 0x00000292: +# CHECK: clhrl %r15, 0x100000290 +0xc6 0xf7 0x7f 0xff 0xff 0xff + +# 0x00000298: +# CHECK: clrl %r0, 0x298 +0xc6 0x0f 0x00 0x00 0x00 0x00 + +# 0x0000029e: +# CHECK: clrl %r15, 0x29e +0xc6 0xff 0x00 0x00 0x00 0x00 + +# 0x000002a4: +# CHECK: clrl %r0, 0x2a2 +0xc6 0x0f 0xff 0xff 0xff 0xff + +# 0x000002aa: +# CHECK: clrl %r15, 0x2a8 +0xc6 0xff 0xff 0xff 0xff 0xff + +# 0x000002b0: +# CHECK: clrl %r0, 0xffffffff000002b0 +0xc6 0x0f 0x80 0x00 0x00 0x00 + +# 0x000002b6: +# CHECK: clrl %r15, 0xffffffff000002b6 +0xc6 0xff 0x80 0x00 0x00 0x00 + +# 0x000002bc: +# CHECK: clrl %r0, 0x1000002ba +0xc6 0x0f 0x7f 0xff 0xff 0xff + +# 0x000002c2: +# CHECK: clrl %r15, 0x1000002c0 +0xc6 0xff 0x7f 0xff 0xff 0xff + +# 0x000002c8: +# CHECK: crl %r0, 0x2c8 +0xc6 0x0d 0x00 0x00 0x00 0x00 + +# 0x000002ce: +# CHECK: crl %r15, 0x2ce +0xc6 0xfd 0x00 0x00 0x00 0x00 + +# 0x000002d4: +# CHECK: crl %r0, 0x2d2 +0xc6 0x0d 0xff 0xff 0xff 0xff + +# 0x000002da: +# CHECK: crl %r15, 0x2d8 +0xc6 0xfd 0xff 0xff 0xff 0xff + +# 0x000002e0: +# CHECK: crl %r0, 0xffffffff000002e0 +0xc6 0x0d 0x80 0x00 0x00 0x00 + +# 0x000002e6: +# CHECK: crl %r15, 0xffffffff000002e6 +0xc6 0xfd 0x80 0x00 0x00 0x00 + +# 0x000002ec: +# CHECK: crl %r0, 0x1000002ea +0xc6 0x0d 0x7f 0xff 0xff 0xff + +# 0x000002f2: +# CHECK: crl %r15, 0x1000002f0 +0xc6 0xfd 0x7f 0xff 0xff 0xff + +# 0x000002f8: +# CHECK: larl %r0, 0x2f8 +0xc0 0x00 0x00 0x00 0x00 0x00 + +# 0x000002fe: +# CHECK: larl %r15, 0x2fe +0xc0 0xf0 0x00 0x00 0x00 0x00 + +# 0x00000304: +# CHECK: larl %r0, 0x302 +0xc0 0x00 0xff 0xff 0xff 0xff + +# 0x0000030a: +# CHECK: larl %r15, 0x308 +0xc0 0xf0 0xff 0xff 0xff 0xff + +# 0x00000310: +# CHECK: larl %r0, 0xffffffff00000310 +0xc0 0x00 0x80 0x00 0x00 0x00 + +# 0x00000316: +# CHECK: larl %r15, 0xffffffff00000316 +0xc0 0xf0 0x80 0x00 0x00 0x00 + +# 0x0000031c: +# CHECK: larl %r0, 0x10000031a +0xc0 0x00 0x7f 0xff 0xff 0xff + +# 0x00000322: +# CHECK: larl %r15, 0x100000320 +0xc0 0xf0 0x7f 0xff 0xff 0xff + +# 0x00000328: +# CHECK: lgfrl %r0, 0x328 +0xc4 0x0c 0x00 0x00 0x00 0x00 + +# 0x0000032e: +# CHECK: lgfrl %r15, 0x32e +0xc4 0xfc 0x00 0x00 0x00 0x00 + +# 0x00000334: +# CHECK: lgfrl %r0, 0x332 +0xc4 0x0c 0xff 0xff 0xff 0xff + +# 0x0000033a: +# CHECK: lgfrl %r15, 0x338 +0xc4 0xfc 0xff 0xff 0xff 0xff + +# 0x00000340: +# CHECK: lgfrl %r0, 0xffffffff00000340 +0xc4 0x0c 0x80 0x00 0x00 0x00 + +# 0x00000346: +# CHECK: lgfrl %r15, 0xffffffff00000346 +0xc4 0xfc 0x80 0x00 0x00 0x00 + +# 0x0000034c: +# CHECK: lgfrl %r0, 0x10000034a +0xc4 0x0c 0x7f 0xff 0xff 0xff + +# 0x00000352: +# CHECK: lgfrl %r15, 0x100000350 +0xc4 0xfc 0x7f 0xff 0xff 0xff + +# 0x00000358: +# CHECK: lghrl %r0, 0x358 +0xc4 0x04 0x00 0x00 0x00 0x00 + +# 0x0000035e: +# CHECK: lghrl %r15, 0x35e +0xc4 0xf4 0x00 0x00 0x00 0x00 + +# 0x00000364: +# CHECK: lghrl %r0, 0x362 +0xc4 0x04 0xff 0xff 0xff 0xff + +# 0x0000036a: +# CHECK: lghrl %r15, 0x368 +0xc4 0xf4 0xff 0xff 0xff 0xff + +# 0x00000370: +# CHECK: lghrl %r0, 0xffffffff00000370 +0xc4 0x04 0x80 0x00 0x00 0x00 + +# 0x00000376: +# CHECK: lghrl %r15, 0xffffffff00000376 +0xc4 0xf4 0x80 0x00 0x00 0x00 + +# 0x0000037c: +# CHECK: lghrl %r0, 0x10000037a +0xc4 0x04 0x7f 0xff 0xff 0xff + +# 0x00000382: +# CHECK: lghrl %r15, 0x100000380 +0xc4 0xf4 0x7f 0xff 0xff 0xff + +# 0x00000388: +# CHECK: lgrl %r0, 0x388 +0xc4 0x08 0x00 0x00 0x00 0x00 + +# 0x0000038e: +# CHECK: lgrl %r15, 0x38e +0xc4 0xf8 0x00 0x00 0x00 0x00 + +# 0x00000394: +# CHECK: lgrl %r0, 0x392 +0xc4 0x08 0xff 0xff 0xff 0xff + +# 0x0000039a: +# CHECK: lgrl %r15, 0x398 +0xc4 0xf8 0xff 0xff 0xff 0xff + +# 0x000003a0: +# CHECK: lgrl %r0, 0xffffffff000003a0 +0xc4 0x08 0x80 0x00 0x00 0x00 + +# 0x000003a6: +# CHECK: lgrl %r15, 0xffffffff000003a6 +0xc4 0xf8 0x80 0x00 0x00 0x00 + +# 0x000003ac: +# CHECK: lgrl %r0, 0x1000003aa +0xc4 0x08 0x7f 0xff 0xff 0xff + +# 0x000003b2: +# CHECK: lgrl %r15, 0x1000003b0 +0xc4 0xf8 0x7f 0xff 0xff 0xff + +# 0x000003b8: +# CHECK: lhrl %r0, 0x3b8 +0xc4 0x05 0x00 0x00 0x00 0x00 + +# 0x000003be: +# CHECK: lhrl %r15, 0x3be +0xc4 0xf5 0x00 0x00 0x00 0x00 + +# 0x000003c4: +# CHECK: lhrl %r0, 0x3c2 +0xc4 0x05 0xff 0xff 0xff 0xff + +# 0x000003ca: +# CHECK: lhrl %r15, 0x3c8 +0xc4 0xf5 0xff 0xff 0xff 0xff + +# 0x000003d0: +# CHECK: lhrl %r0, 0xffffffff000003d0 +0xc4 0x05 0x80 0x00 0x00 0x00 + +# 0x000003d6: +# CHECK: lhrl %r15, 0xffffffff000003d6 +0xc4 0xf5 0x80 0x00 0x00 0x00 + +# 0x000003dc: +# CHECK: lhrl %r0, 0x1000003da +0xc4 0x05 0x7f 0xff 0xff 0xff + +# 0x000003e2: +# CHECK: lhrl %r15, 0x1000003e0 +0xc4 0xf5 0x7f 0xff 0xff 0xff + +# 0x000003e8: +# CHECK: llgfrl %r0, 0x3e8 +0xc4 0x0e 0x00 0x00 0x00 0x00 + +# 0x000003ee: +# CHECK: llgfrl %r15, 0x3ee +0xc4 0xfe 0x00 0x00 0x00 0x00 + +# 0x000003f4: +# CHECK: llgfrl %r0, 0x3f2 +0xc4 0x0e 0xff 0xff 0xff 0xff + +# 0x000003fa: +# CHECK: llgfrl %r15, 0x3f8 +0xc4 0xfe 0xff 0xff 0xff 0xff + +# 0x00000400: +# CHECK: llgfrl %r0, 0xffffffff00000400 +0xc4 0x0e 0x80 0x00 0x00 0x00 + +# 0x00000406: +# CHECK: llgfrl %r15, 0xffffffff00000406 +0xc4 0xfe 0x80 0x00 0x00 0x00 + +# 0x0000040c: +# CHECK: llgfrl %r0, 0x10000040a +0xc4 0x0e 0x7f 0xff 0xff 0xff + +# 0x00000412: +# CHECK: llgfrl %r15, 0x100000410 +0xc4 0xfe 0x7f 0xff 0xff 0xff + +# 0x00000418: +# CHECK: llghrl %r0, 0x418 +0xc4 0x06 0x00 0x00 0x00 0x00 + +# 0x0000041e: +# CHECK: llghrl %r15, 0x41e +0xc4 0xf6 0x00 0x00 0x00 0x00 + +# 0x00000424: +# CHECK: llghrl %r0, 0x422 +0xc4 0x06 0xff 0xff 0xff 0xff + +# 0x0000042a: +# CHECK: llghrl %r15, 0x428 +0xc4 0xf6 0xff 0xff 0xff 0xff + +# 0x00000430: +# CHECK: llghrl %r0, 0xffffffff00000430 +0xc4 0x06 0x80 0x00 0x00 0x00 + +# 0x00000436: +# CHECK: llghrl %r15, 0xffffffff00000436 +0xc4 0xf6 0x80 0x00 0x00 0x00 + +# 0x0000043c: +# CHECK: llghrl %r0, 0x10000043a +0xc4 0x06 0x7f 0xff 0xff 0xff + +# 0x00000442: +# CHECK: llghrl %r15, 0x100000440 +0xc4 0xf6 0x7f 0xff 0xff 0xff + +# 0x00000448: +# CHECK: llhrl %r0, 0x448 +0xc4 0x02 0x00 0x00 0x00 0x00 + +# 0x0000044e: +# CHECK: llhrl %r15, 0x44e +0xc4 0xf2 0x00 0x00 0x00 0x00 + +# 0x00000454: +# CHECK: llhrl %r0, 0x452 +0xc4 0x02 0xff 0xff 0xff 0xff + +# 0x0000045a: +# CHECK: llhrl %r15, 0x458 +0xc4 0xf2 0xff 0xff 0xff 0xff + +# 0x00000460: +# CHECK: llhrl %r0, 0xffffffff00000460 +0xc4 0x02 0x80 0x00 0x00 0x00 + +# 0x00000466: +# CHECK: llhrl %r15, 0xffffffff00000466 +0xc4 0xf2 0x80 0x00 0x00 0x00 + +# 0x0000046c: +# CHECK: llhrl %r0, 0x10000046a +0xc4 0x02 0x7f 0xff 0xff 0xff + +# 0x00000472: +# CHECK: llhrl %r15, 0x100000470 +0xc4 0xf2 0x7f 0xff 0xff 0xff + +# 0x00000478: +# CHECK: lrl %r0, 0x478 +0xc4 0x0d 0x00 0x00 0x00 0x00 + +# 0x0000047e: +# CHECK: lrl %r15, 0x47e +0xc4 0xfd 0x00 0x00 0x00 0x00 + +# 0x00000484: +# CHECK: lrl %r0, 0x482 +0xc4 0x0d 0xff 0xff 0xff 0xff + +# 0x0000048a: +# CHECK: lrl %r15, 0x488 +0xc4 0xfd 0xff 0xff 0xff 0xff + +# 0x00000490: +# CHECK: lrl %r0, 0xffffffff00000490 +0xc4 0x0d 0x80 0x00 0x00 0x00 + +# 0x00000496: +# CHECK: lrl %r15, 0xffffffff00000496 +0xc4 0xfd 0x80 0x00 0x00 0x00 + +# 0x0000049c: +# CHECK: lrl %r0, 0x10000049a +0xc4 0x0d 0x7f 0xff 0xff 0xff + +# 0x000004a2: +# CHECK: lrl %r15, 0x1000004a0 +0xc4 0xfd 0x7f 0xff 0xff 0xff + +# 0x000004a8: +# CHECK: stgrl %r0, 0x4a8 +0xc4 0x0b 0x00 0x00 0x00 0x00 + +# 0x000004ae: +# CHECK: stgrl %r15, 0x4ae +0xc4 0xfb 0x00 0x00 0x00 0x00 + +# 0x000004b4: +# CHECK: stgrl %r0, 0x4b2 +0xc4 0x0b 0xff 0xff 0xff 0xff + +# 0x000004ba: +# CHECK: stgrl %r15, 0x4b8 +0xc4 0xfb 0xff 0xff 0xff 0xff + +# 0x000004c0: +# CHECK: stgrl %r0, 0xffffffff000004c0 +0xc4 0x0b 0x80 0x00 0x00 0x00 + +# 0x000004c6: +# CHECK: stgrl %r15, 0xffffffff000004c6 +0xc4 0xfb 0x80 0x00 0x00 0x00 + +# 0x000004cc: +# CHECK: stgrl %r0, 0x1000004ca +0xc4 0x0b 0x7f 0xff 0xff 0xff + +# 0x000004d2: +# CHECK: stgrl %r15, 0x1000004d0 +0xc4 0xfb 0x7f 0xff 0xff 0xff + +# 0x000004d8: +# CHECK: sthrl %r0, 0x4d8 +0xc4 0x07 0x00 0x00 0x00 0x00 + +# 0x000004de: +# CHECK: sthrl %r15, 0x4de +0xc4 0xf7 0x00 0x00 0x00 0x00 + +# 0x000004e4: +# CHECK: sthrl %r0, 0x4e2 +0xc4 0x07 0xff 0xff 0xff 0xff + +# 0x000004ea: +# CHECK: sthrl %r15, 0x4e8 +0xc4 0xf7 0xff 0xff 0xff 0xff + +# 0x000004f0: +# CHECK: sthrl %r0, 0xffffffff000004f0 +0xc4 0x07 0x80 0x00 0x00 0x00 + +# 0x000004f6: +# CHECK: sthrl %r15, 0xffffffff000004f6 +0xc4 0xf7 0x80 0x00 0x00 0x00 + +# 0x000004fc: +# CHECK: sthrl %r0, 0x1000004fa +0xc4 0x07 0x7f 0xff 0xff 0xff + +# 0x00000502: +# CHECK: sthrl %r15, 0x100000500 +0xc4 0xf7 0x7f 0xff 0xff 0xff + +# 0x00000508: +# CHECK: strl %r0, 0x508 +0xc4 0x0f 0x00 0x00 0x00 0x00 + +# 0x0000050e: +# CHECK: strl %r15, 0x50e +0xc4 0xff 0x00 0x00 0x00 0x00 + +# 0x00000514: +# CHECK: strl %r0, 0x512 +0xc4 0x0f 0xff 0xff 0xff 0xff + +# 0x0000051a: +# CHECK: strl %r15, 0x518 +0xc4 0xff 0xff 0xff 0xff 0xff + +# 0x00000520: +# CHECK: strl %r0, 0xffffffff00000520 +0xc4 0x0f 0x80 0x00 0x00 0x00 + +# 0x00000526: +# CHECK: strl %r15, 0xffffffff00000526 +0xc4 0xff 0x80 0x00 0x00 0x00 + +# 0x0000052c: +# CHECK: strl %r0, 0x10000052a +0xc4 0x0f 0x7f 0xff 0xff 0xff + +# 0x00000532: +# CHECK: strl %r15, 0x100000530 +0xc4 0xff 0x7f 0xff 0xff 0xff + diff --git a/test/MC/Disassembler/SystemZ/insns.txt b/test/MC/Disassembler/SystemZ/insns.txt new file mode 100644 index 0000000000..56236f7037 --- /dev/null +++ b/test/MC/Disassembler/SystemZ/insns.txt @@ -0,0 +1,6071 @@ +# Test instructions that don't have PC-relative operands. +# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu | FileCheck %s + +# CHECK: adbr %f0, %f0 +0xb3 0x1a 0x00 0x00 + +# CHECK: adbr %f0, %f15 +0xb3 0x1a 0x00 0x0f + +# CHECK: adbr %f7, %f8 +0xb3 0x1a 0x00 0x78 + +# CHECK: adbr %f15, %f0 +0xb3 0x1a 0x00 0xf0 + +# CHECK: adb %f0, 0 +0xed 0x00 0x00 0x00 0x00 0x1a + +# CHECK: adb %f0, 4095 +0xed 0x00 0x0f 0xff 0x00 0x1a + +# CHECK: adb %f0, 0(%r1) +0xed 0x00 0x10 0x00 0x00 0x1a + +# CHECK: adb %f0, 0(%r15) +0xed 0x00 0xf0 0x00 0x00 0x1a + +# CHECK: adb %f0, 4095(%r1,%r15) +0xed 0x01 0xff 0xff 0x00 0x1a + +# CHECK: adb %f0, 4095(%r15,%r1) +0xed 0x0f 0x1f 0xff 0x00 0x1a + +# CHECK: adb %f15, 0 +0xed 0xf0 0x00 0x00 0x00 0x1a + +# CHECK: aebr %f0, %f0 +0xb3 0x0a 0x00 0x00 + +# CHECK: aebr %f0, %f15 +0xb3 0x0a 0x00 0x0f + +# CHECK: aebr %f7, %f8 +0xb3 0x0a 0x00 0x78 + +# CHECK: aebr %f15, %f0 +0xb3 0x0a 0x00 0xf0 + +# CHECK: aeb %f0, 0 +0xed 0x00 0x00 0x00 0x00 0x0a + +# CHECK: aeb %f0, 4095 +0xed 0x00 0x0f 0xff 0x00 0x0a + +# CHECK: aeb %f0, 0(%r1) +0xed 0x00 0x10 0x00 0x00 0x0a + +# CHECK: aeb %f0, 0(%r15) +0xed 0x00 0xf0 0x00 0x00 0x0a + +# CHECK: aeb %f0, 4095(%r1,%r15) +0xed 0x01 0xff 0xff 0x00 0x0a + +# CHECK: aeb %f0, 4095(%r15,%r1) +0xed 0x0f 0x1f 0xff 0x00 0x0a + +# CHECK: aeb %f15, 0 +0xed 0xf0 0x00 0x00 0x00 0x0a + +# CHECK: afi %r0, -2147483648 +0xc2 0x09 0x80 0x00 0x00 0x00 + +# CHECK: afi %r0, -1 +0xc2 0x09 0xff 0xff 0xff 0xff + +# CHECK: afi %r0, 0 +0xc2 0x09 0x00 0x00 0x00 0x00 + +# CHECK: afi %r0, 1 +0xc2 0x09 0x00 0x00 0x00 0x01 + +# CHECK: afi %r0, 2147483647 +0xc2 0x09 0x7f 0xff 0xff 0xff + +# CHECK: afi %r15, 0 +0xc2 0xf9 0x00 0x00 0x00 0x00 + +# CHECK: agfi %r0, -2147483648 +0xc2 0x08 0x80 0x00 0x00 0x00 + +# CHECK: agfi %r0, -1 +0xc2 0x08 0xff 0xff 0xff 0xff + +# CHECK: agfi %r0, 0 +0xc2 0x08 0x00 0x00 0x00 0x00 + +# CHECK: agfi %r0, 1 +0xc2 0x08 0x00 0x00 0x00 0x01 + +# CHECK: agfi %r0, 2147483647 +0xc2 0x08 0x7f 0xff 0xff 0xff + +# CHECK: agfi %r15, 0 +0xc2 0xf8 0x00 0x00 0x00 0x00 + +# CHECK: agfr %r0, %r0 +0xb9 0x18 0x00 0x00 + +# CHECK: agfr %r0, %r15 +0xb9 0x18 0x00 0x0f + +# CHECK: agfr %r15, %r0 +0xb9 0x18 0x00 0xf0 + +# CHECK: agfr %r7, %r8 +0xb9 0x18 0x00 0x78 + +# CHECK: agf %r0, -524288 +0xe3 0x00 0x00 0x00 0x80 0x18 + +# CHECK: agf %r0, -1 +0xe3 0x00 0x0f 0xff 0xff 0x18 + +# CHECK: agf %r0, 0 +0xe3 0x00 0x00 0x00 0x00 0x18 + +# CHECK: agf %r0, 1 +0xe3 0x00 0x00 0x01 0x00 0x18 + +# CHECK: agf %r0, 524287 +0xe3 0x00 0x0f 0xff 0x7f 0x18 + +# CHECK: agf %r0, 0(%r1) +0xe3 0x00 0x10 0x00 0x00 0x18 + +# CHECK: agf %r0, 0(%r15) +0xe3 0x00 0xf0 0x00 0x00 0x18 + +# CHECK: agf %r0, 524287(%r1,%r15) +0xe3 0x01 0xff 0xff 0x7f 0x18 + +# CHECK: agf %r0, 524287(%r15,%r1) +0xe3 0x0f 0x1f 0xff 0x7f 0x18 + +# CHECK: agf %r15, 0 +0xe3 0xf0 0x00 0x00 0x00 0x18 + +# CHECK: aghi %r0, -32768 +0xa7 0x0b 0x80 0x00 + +# CHECK: aghi %r0, -1 +0xa7 0x0b 0xff 0xff + +# CHECK: aghi %r0, 0 +0xa7 0x0b 0x00 0x00 + +# CHECK: aghi %r0, 1 +0xa7 0x0b 0x00 0x01 + +# CHECK: aghi %r0, 32767 +0xa7 0x0b 0x7f 0xff + +# CHECK: aghi %r15, 0 +0xa7 0xfb 0x00 0x00 + +# CHECK: agr %r0, %r0 +0xb9 0x08 0x00 0x00 + +# CHECK: agr %r0, %r15 +0xb9 0x08 0x00 0x0f + +# CHECK: agr %r15, %r0 +0xb9 0x08 0x00 0xf0 + +# CHECK: agr %r7, %r8 +0xb9 0x08 0x00 0x78 + +# CHECK: agsi -524288, 0 +0xeb 0x00 0x00 0x00 0x80 0x7a + +# CHECK: agsi -1, 0 +0xeb 0x00 0x0f 0xff 0xff 0x7a + +# CHECK: agsi 0, 0 +0xeb 0x00 0x00 0x00 0x00 0x7a + +# CHECK: agsi 1, 0 +0xeb 0x00 0x00 0x01 0x00 0x7a + +# CHECK: agsi 524287, 0 +0xeb 0x00 0x0f 0xff 0x7f 0x7a + +# CHECK: agsi 0, -128 +0xeb 0x80 0x00 0x00 0x00 0x7a + +# CHECK: agsi 0, -1 +0xeb 0xff 0x00 0x00 0x00 0x7a + +# CHECK: agsi 0, 1 +0xeb 0x01 0x00 0x00 0x00 0x7a + +# CHECK: agsi 0, 127 +0xeb 0x7f 0x00 0x00 0x00 0x7a + +# CHECK: agsi 0(%r1), 42 +0xeb 0x2a 0x10 0x00 0x00 0x7a + +# CHECK: agsi 0(%r15), 42 +0xeb 0x2a 0xf0 0x00 0x00 0x7a + +# CHECK: agsi 524287(%r1), |