112 files changed, 3023 insertions, 831 deletions
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index c0009cb989..674ce3aea7 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -89,7 +89,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) {
   AA = &getAnalysis<AliasAnalysis>();
   TD = getAnalysisIfAvailable<DataLayout>();
   DT = getAnalysisIfAvailable<DominatorTree>();
-  if (PredCache == 0)
+  if (!PredCache)
     PredCache.reset(new PredIteratorCache());
   return false;
 }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index eb744d243b..7ad4f57f75 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -636,14 +636,13 @@ void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
     OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding());
 
   MachineModuleInfo &MMI = MF->getMMI();
-  const std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  std::vector<MCCFIInstruction> Instructions = MMI.getFrameInstructions();
   bool FoundOne = false;
   (void)FoundOne;
-  for (std::vector<MachineMove>::const_iterator I = Moves.begin(),
-                                                E = Moves.end();
-       I != E; ++I) {
+  for (std::vector<MCCFIInstruction>::iterator I = Instructions.begin(),
+         E = Instructions.end(); I != E; ++I) {
     if (I->getLabel() == Label) {
-      EmitCFIFrameMove(*I);
+      emitCFIInstruction(*I);
       FoundOne = true;
     }
   }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 31e42d47cf..e6d67e8822 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -169,28 +169,21 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
 // Dwarf Lowering Routines
 //===----------------------------------------------------------------------===//
 
-/// EmitCFIFrameMove - Emit a frame instruction.
-void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const {
-  const TargetRegisterInfo *RI = TM.getRegisterInfo();
-
-  const MachineLocation &Dst = Move.getDestination();
-  const MachineLocation &Src = Move.getSource();
-
-  // If advancing cfa.
-  if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
-    if (Src.getReg() == MachineLocation::VirtualFP) {
-      OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
-    } else {
-      // Reg + Offset
-      OutStreamer.EmitCFIDefCfa(RI->getDwarfRegNum(Src.getReg(), true),
-                                Src.getOffset());
-    }
-  } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
-    assert(Dst.isReg() && "Machine move not supported yet.");
-    OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
-  } else {
-    assert(!Dst.isReg() && "Machine move not supported yet.");
-    OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
-                              Dst.getOffset());
+void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
+  switch (Inst.getOperation()) {
+  default:
+    llvm_unreachable("Unexpected instruction");
+  case MCCFIInstruction::OpDefCfaOffset:
+    OutStreamer.EmitCFIDefCfaOffset(Inst.getOffset());
+    break;
+  case MCCFIInstruction::OpDefCfa:
+    OutStreamer.EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
+    break;
+  case MCCFIInstruction::OpDefCfaRegister:
+    OutStreamer.EmitCFIDefCfaRegister(Inst.getRegister());
+    break;
+  case MCCFIInstruction::OpOffset:
+    OutStreamer.EmitCFIOffset(Inst.getRegister(), Inst.getOffset());
+    break;
   }
 }
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 673867ada1..cc0cb56e8b 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -114,8 +114,8 @@ DIE::~DIE() {
 
 /// Climb up the parent chain to get the compile unit DIE to which this DIE
 /// belongs.
-DIE *DIE::getCompileUnit() const {
-  DIE *p = getParent();
+DIE *DIE::getCompileUnit() {
+  DIE *p = this;
   while (p) {
     if (p->getTag() == dwarf::DW_TAG_compile_unit)
       return p;
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 3c06001686..550d873128 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -153,7 +153,7 @@ namespace llvm {
     DIE *getParent() const { return Parent; }
     /// Climb up the parent chain to get the compile unit DIE this DIE belongs
     /// to.
-    DIE *getCompileUnit() const;
+    DIE *getCompileUnit();
     void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
     void setOffset(unsigned O) { Offset = O; }
     void setSize(unsigned S) { Size = S; }
@@ -325,7 +325,9 @@ namespace llvm {
   class DIEEntry : public DIEValue {
     DIE *const Entry;
   public:
-    explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
+    explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {
+      assert(E && "Cannot construct a DIEEntry with a null DIE");
+    }
 
     DIE *getEntry() const { return Entry; }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 74b1b13367..49a85d81b4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -23,7 +23,6 @@ namespace llvm {
 template <typename T> class SmallVectorImpl;
 struct LandingPadInfo;
 class MachineModuleInfo;
-class MachineMove;
 class MachineInstr;
 class MachineFunction;
 class MCAsmInfo;
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 1a09837834..7ce5cc6f67 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -62,14 +62,8 @@ static bool getVerboseAsm() {
   llvm_unreachable("Invalid verbose asm state");
 }
 
-LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
-                                     StringRef CPU, StringRef FS,
-                                     TargetOptions Options,
-                                     Reloc::Model RM, CodeModel::Model CM,
-                                     CodeGenOpt::Level OL)
-  : TargetMachine(T, Triple, CPU, FS, Options) {
-  CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
-  AsmInfo = T.createMCAsmInfo(Triple);
+void LLVMTargetMachine::initAsmInfo() {
+  AsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), TargetTriple);
   // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
   // and if the old one gets included then MCAsmInfo will be NULL and
   // we'll crash later.
@@ -79,6 +73,15 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
          "and that InitializeAllTargetMCs() is being invoked!");
 }
 
+LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
+                                     StringRef CPU, StringRef FS,
+                                     TargetOptions Options,
+                                     Reloc::Model RM, CodeModel::Model CM,
+                                     CodeGenOpt::Level OL)
+  : TargetMachine(T, Triple, CPU, FS, Options) {
+  CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
+}
+
 void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
   PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
 }
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 8af9d053b1..74cf9f50df 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -268,6 +268,39 @@ MachineModuleInfo::MachineModuleInfo()
 MachineModuleInfo::~MachineModuleInfo() {
 }
 
+static MCCFIInstruction convertMoveToCFI(const MCRegisterInfo &MRI,
+                                         MCSymbol *Label,
+                                         const MachineLocation &Dst,
+                                         const MachineLocation &Src) {
+  // If advancing cfa.
+  if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+    if (Src.getReg() == MachineLocation::VirtualFP)
+      return MCCFIInstruction::createDefCfaOffset(Label, Src.getOffset());
+    // Reg + Offset
+    return MCCFIInstruction::createDefCfa(
+        Label, MRI.getDwarfRegNum(Src.getReg(), true), -Src.getOffset());
+  }
+
+  if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+    assert(Dst.isReg() && "Machine move not supported yet.");
+    return MCCFIInstruction::createDefCfaRegister(
+        Label, MRI.getDwarfRegNum(Dst.getReg(), true));
+  }
+
+  assert(!Dst.isReg() && "Machine move not supported yet.");
+  return MCCFIInstruction::createOffset(
+      Label, MRI.getDwarfRegNum(Src.getReg(), true), Dst.getOffset());
+}
+
+
+void MachineModuleInfo::addFrameMove(MCSymbol *Label,
+                                     const MachineLocation &Dst,
+                                     const MachineLocation &Src) {
+  MCCFIInstruction I =
+      convertMoveToCFI(Context.getRegisterInfo(), Label, Dst, Src);
+  FrameInstructions.push_back(I);
+}
+
 bool MachineModuleInfo::doInitialization(Module &M) {
 
   ObjFileMMI = 0;
@@ -303,7 +336,7 @@ bool MachineModuleInfo::doFinalization(Module &M) {
 ///
 void MachineModuleInfo::EndFunction() {
   // Clean up frame info.
-  FrameMoves.clear();
+  FrameInstructions.clear();
 
   // Clean up exception info.
   LandingPads.clear();
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 9eed1fc62a..49748289da 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -713,7 +713,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
     Intf.moveToBlock(BC.Number);
     BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
     BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
-    BC.ChangesValue = BI.FirstDef;
+    BC.ChangesValue = BI.FirstDef.isValid();
 
     if (!Intf.hasInterference())
       continue;
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c54dffbb13..a8621a89a8 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9254,19 +9254,33 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
   for (unsigned I = 0; I != NumConcats; ++I) {
     // Make sure we're dealing with a copy.
     unsigned Begin = I * NumElemsPerConcat;
-    if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
-      return SDValue();
+    bool AllUndef = true, NoUndef = true;
+    for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
+      if (SVN->getMaskElt(J) >= 0)
+        AllUndef = false;
+      else
+        NoUndef = false;
+    }
 
-    for (unsigned J = 1; J != NumElemsPerConcat; ++J) {
-      if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
+    if (NoUndef) {
+      if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
         return SDValue();
-    }
 
-    unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
-    if (FirstElt < N0.getNumOperands())
-      Ops.push_back(N0.getOperand(FirstElt));
-    else
-      Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
+      for (unsigned J = 1; J != NumElemsPerConcat; ++J)
+        if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
+          return SDValue();
+
+      unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
+      if (FirstElt < N0.getNumOperands())
+        Ops.push_back(N0.getOperand(FirstElt));
+      else
+        Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
+
+    } else if (AllUndef) {
+      Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
+    } else { // Mixed with general masks and undefs, can't do optimization.
+      return SDValue();
+    }
   }
 
   return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(),
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 3903743878..23984e9986 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -222,7 +222,9 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
     PersonalityFn = LPads[0]->getPersonalityFn();
   Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3,
                                                           "pers_fn_gep");
-  Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true);
+  Builder.CreateStore(Builder.CreateBitCast(PersonalityFn,
+                                            Builder.getInt8PtrTy()),
+                      PersonalityFieldPtr, /*isVolatile=*/true);
 
   // LSDA address
   Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr");
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index a789a2596d..90b93aaa72 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -42,6 +42,7 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/IR/Function.h"
@@ -528,6 +529,10 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
         if (!V)
           continue;
 
+        const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V);
+        if (PSV && PSV->isConstant(MFI))
+          continue;
+
         // Climb up and find the original alloca.
         V = GetUnderlyingObject(V);
         // If we did not find one, or if the one that we found is not in our
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index e43ba4f1dd..0191636307 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -14,6 +14,7 @@
 
 #define DEBUG_TYPE "jit"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
@@ -47,7 +48,7 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)(
 ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
   Module *M,
   std::string *ErrorStr,
-  JITMemoryManager *JMM,
+  RTDyldMemoryManager *MCJMM,
   bool GVsWithCode,
   TargetMachine *TM) = 0;
 ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
@@ -455,10 +456,12 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
   if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
     return 0;
 
+  assert(!(JMM && MCJMM));
+  
   // If the user specified a memory manager but didn't specify which engine to
   // create, we assume they only want the JIT, and we fail if they only want
   // the interpreter.
-  if (JMM) {
+  if (JMM || MCJMM) {
     if (WhichEngine & EngineKind::JIT)
       WhichEngine = EngineKind::JIT;
     else {
@@ -467,6 +470,14 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
       return 0;
     }
   }
+  
+  if (MCJMM && ! UseMCJIT) {
+    if (ErrorStr)
+      *ErrorStr =
+        "Cannot create a legacy JIT with a runtime dyld memory "
+        "manager.";
+    return 0;
+  }
 
   // Unless the interpreter was explicitly selected or the JIT is not linked,
   // try making a JIT.
@@ -480,7 +491,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
 
     if (UseMCJIT && ExecutionEngine::MCJITCtor) {
       ExecutionEngine *EE =
-        ExecutionEngine::MCJITCtor(M, ErrorStr, JMM,
+        ExecutionEngine::MCJITCtor(M, ErrorStr, MCJMM ? MCJMM : JMM,
                                    AllocateGVsWithCode, TheTM.take());
       if (EE) return EE;
     } else if (ExecutionEngine::JITCtor) {
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 38aa5474a3..ced567205a 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -39,7 +39,7 @@ extern "C" void LLVMLinkInMCJIT() {
 
 ExecutionEngine *MCJIT::createJIT(Module *M,
                                   std::string *ErrorStr,
-                                  JITMemoryManager *JMM,
+                                  RTDyldMemoryManager *MemMgr,
                                   bool GVsWithCode,
                                   TargetMachine *TM) {
   // Try to register the program as a source of symbols to resolve against.
@@ -47,14 +47,14 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
   // FIXME: Don't do this here.
   sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
 
-  return new MCJIT(M, TM, JMM ? JMM : new SectionMemoryManager(), GVsWithCode);
+  return new MCJIT(M, TM, MemMgr ? MemMgr : new SectionMemoryManager(),
+                   GVsWithCode);
 }
 
 MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM,
              bool AllocateGVsWithCode)
-  : ExecutionEngine(m), TM(tm), Ctx(0),
-    MemMgr(MM ? MM : new SectionMemoryManager()), Dyld(MemMgr),
-    IsLoaded(false), M(m), ObjCache(0)  {
+  : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM),
+    IsLoaded(false), M(m), ObjCache(0) {
 
   setDataLayout(TM->getDataLayout());
 }
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index 8c4bf6e1db..7f247e2dee 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -98,7 +98,7 @@ public:
 
   static ExecutionEngine *createJIT(Module *M,
                                     std::string *ErrorStr,
-                                    JITMemoryManager *JMM,
+                                    RTDyldMemoryManager *MemMgr,
                                     bool GVsWithCode,
                                     TargetMachine *TM);
 
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index 89a3c0578c..81d7efa774 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -112,21 +112,20 @@ bool Value::hasNUsesOrMore(unsigned N) const {
 /// isUsedInBasicBlock - Return true if this value is used in the specified
 /// basic block.
 bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
-  // Start by scanning over the instructions looking for a use before we start
-  // the expensive use iteration.
-  unsigned MaxBlockSize = 3;
-  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-    if (std::find(I->op_begin(), I->op_end(), this) != I->op_end())
+  // This can be computed either by scanning the instructions in BB, or by
+  // scanning the use list of this Value. Both lists can be very long, but
+  // usually one is quite short.
+  //
+  // Scan both lists simultaneously until one is exhausted. This limits the
+  // search to the shorter list.
+  BasicBlock::const_iterator BI = BB->begin(), BE = BB->end();
+  const_use_iterator UI = use_begin(), UE = use_end();
+  for (; BI != BE && UI != UE; ++BI, ++UI) {
+    // Scan basic block: Check if this Value is used by the instruction at BI.
+    if (std::find(BI->op_begin(), BI->op_end(), this) != BI->op_end())
       return true;
-    if (--MaxBlockSize == 0) // If the block is larger fall back to use_iterator
-      break;
-  }
-
-  if (MaxBlockSize != 0) // We scanned the entire block and found no use.
-    return false;
-
-  for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
-    const Instruction *User = dyn_cast<Instruction>(*I);
+    // Scan use list: Check if the use at UI is in BB.
+    const Instruction *User = dyn_cast<Instruction>(*UI);
     if (User && User->getParent() == BB)
       return true;
   }
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 3d995484e7..4f66156f6d 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -759,9 +759,6 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
   uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) +
     Fixup.getOffset();
 
-  // FIXME: no tests cover this. Is adjustFixupOffset dead code?
-  TargetObjectWriter->adjustFixupOffset(Fixup, RelocOffset);
-
   if (!hasRelocationAddend())
     Addend = 0;
 
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index 4766b37476..d3c019246c 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -42,8 +42,12 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
   const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
   assert(TheTarget && "Unable to create target!");
 
+  const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple);
+  if (!MRI)
+    return 0;
+
   // Get the assembler info needed to setup the MCContext.
-  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(Triple);
+  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, Triple);
   if (!MAI)
     return 0;
 
@@ -51,10 +55,6 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
   if (!MII)
     return 0;
 
-  const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple);
-  if (!MRI)
-    return 0;
-
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
 
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 7640a63ee3..efe0c46db8 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -873,17 +873,6 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
   streamer.EmitValue(v, size);
 }
 
-static const MachineLocation TranslateMachineLocation(
-                                                  const MCRegisterInfo &MRI,
-                                                  const MachineLocation &Loc) {
-  unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ?
-    MachineLocation::VirtualFP :
-    unsigned(MRI.getDwarfRegNum(Loc.getReg(), true));
-  const MachineLocation &NewLoc = Loc.isReg() ?
-    MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset());
-  return NewLoc;
-}
-
 namespace {
   class FrameEmitterImpl {
     int CFAOffset;
@@ -1316,32 +1305,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
   // Initial Instructions
 
   const MCAsmInfo &MAI = context.getAsmInfo();
-  const std::vector<MachineMove> &Moves = MAI.getInitialFrameState();
-  std::vector<MCCFIInstruction> Instructions;
-
-  for (int i = 0, n = Moves.size(); i != n; ++i) {
-    MCSymbol *Label = Moves[i].getLabel();
-    const MachineLocation &Dst =
-      TranslateMachineLocation(MRI, Moves[i].getDestination());
-    const MachineLocation &Src =
-      TranslateMachineLocation(MRI, Moves[i].getSource());
-
-    if (Dst.isReg()) {
-      assert(Dst.getReg() == MachineLocation::VirtualFP);
-      assert(!Src.isReg());
-      MCCFIInstruction Inst =
-        MCCFIInstruction::createDefCfa(Label, Src.getReg(), -Src.getOffset());
-      Instructions.push_back(Inst);
-    } else {
-      assert(Src.isReg());
-      unsigned Reg = Src.getReg();
-      int Offset = Dst.getOffset();
-      MCCFIInstruction Inst =
-        MCCFIInstruction::createOffset(Label, Reg, Offset);
-      Instructions.push_back(Inst);
-    }
-  }
-
+  const std::vector<MCCFIInstruction> &Instructions =
+      MAI.getInitialFrameState();
   EmitCFIInstructions(streamer, Instructions, NULL);
 
   // Padding
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
index 4cac84d666..ec7397d748 100644
--- a/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -39,13 +39,23 @@ const MCSymbol *MCELFObjectTargetWriter::undefinedExplicitRelSym(const MCValue &
   return &Symbol.AliasedSymbol();
 }
 
-void MCELFObjectTargetWriter::adjustFixupOffset(const MCFixup &Fixup,
-                                                uint64_t &RelocOffset) {
+// ELF doesn't require relocations to be in any order. We sort by the r_offset,
+// just to match gnu as for easier comparison. The use type and index is an
+// arbitrary way of making the sort deterministic.
+static int cmpRel(const void *AP, const void *BP) {
+  const ELFRelocationEntry &A = *(const ELFRelocationEntry *)AP;
+  const ELFRelocationEntry &B = *(const ELFRelocationEntry *)BP;
+  if (A.r_offset != B.r_offset)
+    return B.r_offset - A.r_offset;
+  if (B.Type != A.Type)
+    return A.Type - B.Type;
+  if (B.Index != A.Index)
+    return B.Index - A.Index;
+  llvm_unreachable("ELFRelocs might be unstable!");
 }
 
 void
 MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm,
                                     std::vector<ELFRelocationEntry> &Relocs) {
-  // Sort by the r_offset, just like gnu as does.
-  array_pod_sort(Relocs.begin(), Relocs.end());
+  array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel);
 }
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index edefdb4c36..f7c71e97e3 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -545,7 +545,7 @@ bool AsmParser::EnterIncludeFile(const std::string &Filename) {
   return false;
 }
 
-/// Process the specified .incbin file by seaching for it in the include paths
+/// Process the specified .incbin file by searching for it in the include paths
 /// then just emitting the byte contents of the file to the streamer. This
 /// returns true on failure.
 bool AsmParser::ProcessIncbinFile(const std::string &Filename) {
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index af14c72145..654af081f9 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -339,7 +339,7 @@ static void printRelocationTargetName(const MachOObjectFile *O,
 
   StringRef S;
   bool isExtern = O->getPlainRelocationExternal(RE);
-  uint64_t Val = O->getAnyRelocationAddress(RE);
+  uint64_t Val = O->getPlainRelocationSymbolNum(RE);
 
   if (isExtern) {
     symbol_iterator SI = O->begin_symbols();
@@ -347,7 +347,8 @@ static void printRelocationTargetName(const MachOObjectFile *O,
     SI->getName(S);
   } else {
     section_iterator SI = O->begin_sections();
-    advanceTo(SI, Val);
+    // Adjust for the fact that sections are 1-indexed.
+    advanceTo(SI, Val - 1);
     SI->getName(S);
   }
 
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 6182e34150..57e60dac45 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -872,7 +872,21 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
   exponent += rhs.exponent;
 
+  // Assume the operands involved in the multiplication are single-precision
+  // FP, and the two multiplicants are:
+  //   *this = a23 . a22 ... a0 * 2^e1
+  //     rhs = b23 . b22 ... b0 * 2^e2
+  // the result of multiplication is:
+  //   *this = c47 c46 . c45 ... c0 * 2^(e1+e2)
+  // Note that there are two significant bits at the left-hand side of the 
+  // radix point. Move the radix point toward left by one bit, and adjust
+  // exponent accordingly.
+  exponent += 1;
+
   if (addend) {
+    // The intermediate result of the multiplication has "2 * precision" 
+    // signicant bit; adjust the addend to be consistent with mul result.
+    //
     Significand savedSignificand = significand;
     const fltSemantics *savedSemantics = semantics;
     fltSemantics extendedSemantics;
@@ -880,8 +894,9 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
     unsigned int extendedPrecision;
 
     /* Normalize our MSB.  */
-    extendedPrecision = precision + precision - 1;
+    extendedPrecision = 2 * precision;
     if (omsb != extendedPrecision) {
+      assert(extendedPrecision > omsb);
       APInt::tcShiftLeft(fullSignificand, newPartsCount,
                          extendedPrecision - omsb);
       exponent -= extendedPrecision - omsb;
@@ -912,8 +927,18 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
   }
 
-  exponent -= (precision - 1);
+  // Convert the result having "2 * precision" significant-bits back to the one
+  // having "precision" significant-bits. First, move the radix point from 
+  // poision "2*precision - 1" to "precision - 1". The exponent need to be
+  // adjusted by "2*precision - 1" - "precision - 1" = "precision".
+  exponent -= precision;
 
+  // In case MSB resides at the left-hand side of radix point, shift the
+  // mantissa right by some amount to make sure the MSB reside right before
+  // the radix point (i.e. "MSB . rest-significant-bits").
+  //
+  // Note that the result is not normalized when "omsb < precision". So, the
+  // caller needs to call APFloat::normalize() if normalized value is expected.
   if (omsb > precision) {
     unsigned int bits, significantParts;
     lostFraction lf;
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index fac3cad5cc..4f650b42cc 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -65,7 +65,7 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
     MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf);
   }
 
-  if (NewBuf == 0) return ~0U;
+  if (!NewBuf) return ~0U;
 
   return AddNewSourceBuffer(NewBuf.take(), IncludeLoc);
 }
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index c9729b5412..9e497a0f63 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -251,7 +251,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("hexagon", Triple::hexagon)
     .Case("s390x", Triple::systemz)
     .Case("sparc", Triple::sparc)
-    .Case("sparcv9", Triple::sparcv9)
+    .Cases("sparcv9", "sparc64", Triple::sparcv9)
     .Case("tce", Triple::tce)
     .Case("xcore", Triple::xcore)
     .Case("nvptx", Triple::nvptx)
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index 72a8af621d..cdd475c17f 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -33,6 +33,7 @@
 #endif
 
 extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+extern "C" void __clear_cache(void *, void*);
 
 namespace {
 
diff --git a/lib/Support/Windows/Windows.h b/lib/Support/Windows/Windows.h
index 5c1da0d617..4cdac788a0 100644
--- a/lib/Support/Windows/Windows.h
+++ b/lib/Support/Windows/Windows.h
@@ -25,6 +25,7 @@
 #define WIN32_LEAN_AND_MEAN
 
 #include "llvm/Config/config.h" // Get build system configuration settings
+#include "llvm/Support/Compiler.h"
 #include <windows.h>
 #include <wincrypt.h>
 #include <shlobj.h>
@@ -75,7 +76,7 @@ public:
   }
 
   // True if Handle is valid.
-  operator bool() const {
+  LLVM_EXPLICIT operator bool() const {
     return HandleTraits::IsValid(Handle) ? true : false;
   }
 
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index df599d599d..f1695e2ce2 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -38,6 +38,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
     TLInfo(*this),
     TSInfo(*this),
     FrameLowering(Subtarget) {
+  initAsmInfo();
 }
 
 namespace {
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 3435217bb2..eeec608820 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -57,13 +57,14 @@ static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) {
   return X;
 }
 
-static MCAsmInfo *createAArch64MCAsmInfo(StringRef TT) {
+static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
+                                         StringRef TT) {
   Triple TheTriple(TT);
 
   MCAsmInfo *MAI = new AArch64ELFMCAsmInfo();
-  MachineLocation Dst(MachineLocation::VirtualFP);
-  MachineLocation Src(AArch64::XSP, 0);
-  MAI->addInitialFrameState(0, Dst, Src);
+  unsigned Reg = MRI.getDwarfRegNum(AArch64::XSP, true);
+  MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0);
+  MAI->addInitialFrameState(Inst);
 
   return MAI;
 }
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b0d34a76b0..4de5b4f41c 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -94,6 +94,7 @@ getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(ARM::SP);
   Reserved.set(ARM::PC);
   Reserved.set(ARM::FPSCR);
+  Reserved.set(ARM::APSR_NZCV);
   if (TFI->hasFP(MF))
     Reserved.set(FramePtr);
   if (hasBasePointer(MF))
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index fd77732364..432e3eefb1 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -2147,7 +2147,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
     Copy = *Copy->use_begin();
     if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
       return false;
-    Chain = Copy->getOperand(0);
+    TCChain = Copy->getOperand(0);
   } else {
     return false;
   }
@@ -5257,6 +5257,23 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
   return false;
 }
 
+static EVT getExtensionTo64Bits(const EVT &OrigVT) {
+  if (OrigVT.getSizeInBits() >= 64)
+    return OrigVT;
+
+  assert(OrigVT.isSimple() && "Expecting a simple value type");
+
+  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
+  switch (OrigSimpleTy) {
+  default: llvm_unreachable("Unexpected Vector Type");
+  case MVT::v2i8:
+  case MVT::v2i16:
+     return MVT::v2i32;
+  case MVT::v4i8:
+    return  MVT::v4i16;
+  }
+}
+
 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
 /// We insert the required extension here to get the vector to fill a D register.
@@ -5272,18 +5289,8 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
     return N;
 
   // Must extend size to at least 64 bits to be used as an operand for VMULL.
-  MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy;
-  EVT NewVT;
-  switch (OrigSimpleTy) {
-  default: llvm_unreachable("Unexpected Orig Vector Type");
-  case MVT::v2i8:
-  case MVT::v2i16:
-    NewVT = MVT::v2i32;
-    break;
-  case MVT::v4i8:
-    NewVT = MVT::v4i16;
-    break;
-  }
+  EVT NewVT = getExtensionTo64Bits(OrigTy);
+
   return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
 }
 
@@ -5293,22 +5300,22 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
 /// reach a total size of 64 bits. We have to add the extension separately
 /// because ARM does not have a sign/zero extending load for vectors.
 static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
-  SDValue NonExtendingLoad =
-    DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
+  EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
+
+  // The load already has the right type.
+  if (ExtendedTy == LD->getMemoryVT())
+    return DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
                 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
                 LD->isNonTemporal(), LD->isInvariant(),
                 LD->getAlignment());
-  unsigned ExtOp = 0;
-  switch (LD->getExtensionType()) {
-  default: llvm_unreachable("Unexpected LoadExtType");
-  case ISD::EXTLOAD:
-  case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break;
-  case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break;
-  }
-  MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy;
-  MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy;
-  return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG,
-                                      MemType, ExtType, ExtOp);
+
+  // We need to create a zextload/sextload. We cannot just create a load
+  // followed by a zext/zext node because LowerMUL is also run during normal
+  // operation legalization where we can't create illegal types.
+  return DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), ExtendedTy,
+                        LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
+                        LD->getMemoryVT(), LD->isVolatile(),
+                        LD->isNonTemporal(), LD->getAlignment());
 }
 
 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 1bd174e341..89f92a589d 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -4636,11 +4636,11 @@ def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
                    (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
                         c_imm:$CRm, 0, pred:$p)>;
 def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
-                    (outs GPR:$Rt),
+                    (outs GPRwithAPSR:$Rt),
                     (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
                          imm0_7:$opc2), []>;
 def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
-                   (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+                   (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
                         c_imm:$CRm, 0, pred:$p)>;
 
 def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
@@ -4650,7 +4650,7 @@ class MovRCopro2<string opc, bit direction, dag oops, dag iops,
                  list<dag> pattern>
   : ABXI<0b1110, oops, iops, NoItinerary,
          !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> {
-  let Inst{31-28} = 0b1111;
+  let Inst{31-24} = 0b11111110;
   let Inst{20} = direction;
   let Inst{4} = 1;
 
@@ -4679,11 +4679,11 @@ def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm",
                    (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
                          c_imm:$CRm, 0)>;
 def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
-                      (outs GPR:$Rt),
+                      (outs GPRwithAPSR:$Rt),
                       (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
                            imm0_7:$opc2), []>;
 def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm",
-                   (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+                   (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
                          c_imm:$CRm, 0)>;
 
 def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index b0f576bc2b..85743d8d5a 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -157,12 +157,15 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>;
 
 // Current Program Status Register.
 // We model fpscr with two registers: FPSCR models the control bits and will be
-// reserved. FPSCR_NZCV models the flag bits and will be unreserved. 
-def CPSR       : ARMReg<0, "cpsr">;
-def APSR       : ARMReg<1, "apsr">;
-def SPSR       : ARMReg<2, "spsr">;
-def FPSCR      : ARMReg<3, "fpscr">;
-def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
+// reserved. FPSCR_NZCV models the flag bits and will be unreserved. APSR_NZCV
+// models the APSR when it's accessed by some special instructions. In such cases 
+// it has the same encoding as PC.
+def CPSR       : ARMReg<0,  "cpsr">;
+def APSR       : ARMReg<1,  "apsr">;
+def APSR_NZCV  : ARMReg<15, "apsr_nzcv">; 
+def SPSR       : ARMReg<2,  "spsr">;
+def FPSCR      : ARMReg<3,  "fpscr">;
+def FPSCR_NZCV : ARMReg<3,  "fpscr_nzcv"> {
   let Aliases = [FPSCR];
 }
 def ITSTATE    : ARMReg<4, "itstate">;
@@ -207,6 +210,16 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
   }];
 }
 
+// GPRs without the PC but with APSR. Some instructions allow accessing the
+// APSR, while actually encoding PC in the register field. This is usefull
+// for assembly and disassembly only.
+def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add GPR, APSR_NZCV)> {
+  let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
+  let AltOrderSelect = [{
+      return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+  }];
+}
+
 // GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the
 // implied SP argument list.
 // FIXME: It would be better to not use this at all and refactor the
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 8653c462f0..9ff0d61481 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -162,10 +162,23 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
   if (!isThumb() || hasThumb2())
     PostRAScheduler = true;
 
-  // v6+ may or may not support unaligned mem access depending on the system
-  // configuration.
-  if (!StrictAlign && hasV6Ops() && isTargetDarwin())
-    AllowsUnalignedMem = true;
+  if (!StrictAlign) {
+    // Assume pre-ARMv6 doesn't support unaligned accesses.
+    //
+    // ARMv6 may or may not support unaligned accesses depending on the
+    // SCTLR.U bit, which is architecture-specific. We assume ARMv6
+    // Darwin targets support unaligned accesses, and others don't.
+    //
+    // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
+    // which raises an alignment fault on unaligned accesses. Linux
+    // defaults this bit to 0 and handles it as a system-wide (not
+    // per-process) setting. It is therefore safe to assume that ARMv7+
+    // targets support unaligned accesses.
+    //
+    // The above behavior is consistent with GCC.
+    if (hasV7Ops() || (hasV6Ops() && isTargetDarwin()))
+      AllowsUnalignedMem = true;
+  }
 
   // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
   uint64_t Bits = getFeatureBits();
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 42c7d2c437..17c52c94a0 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -85,6 +85,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
     TLInfo(*this),
     TSInfo(*this),
     FrameLowering(Subtarget) {
+  initAsmInfo();
   if (!Subtarget.hasARMOps())
     report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
                        "support ARM mode execution!");
@@ -117,6 +118,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
     FrameLowering(Subtarget.hasThumb2()
               ? new ARMFrameLowering(Subtarget)
               : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+  initAsmInfo();
 }
 
 namespace {
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index ac937f3534..d2896377cc 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -156,6 +156,9 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
 static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst,
                                                unsigned RegNo, uint64_t Address,
                                                const void *Decoder);
+static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst,
+                                               unsigned RegNo, uint64_t Address,
+                                               const void *Decoder);
 static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
 static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -920,6 +923,21 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
   return S;
 }
 
+static DecodeStatus
+DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo,
+                               uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  if (RegNo == 15)
+  {
+    Inst.addOperand(MCOperand::CreateReg(ARM::APSR_NZCV));
+    return MCDisassembler::Success;
+  }
+
+  Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+  return S;
+}
+
 static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
   if (RegNo > 7)
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 57239f8011..b858fff546 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -159,7 +159,7 @@ static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
   return X;
 }
 
-static MCAsmInfo *createARMMCAsmInfo(StringRef TT) {
+static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
   Triple TheTriple(TT);
 
   if (TheTriple.isOSDarwin())
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 99d7a3a963..57044b27d6 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -35,6 +35,7 @@ add_llvm_target(HexagonCodeGen
   HexagonTargetObjectFile.cpp
   HexagonVLIWPacketizer.cpp
   HexagonNewValueJump.cpp
+  HexagonCopyToCombine.cpp
 )
 
 add_subdirectory(TargetInfo)
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index 8e19c61f40..b88637ad57 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -44,6 +44,8 @@ namespace llvm {
   FunctionPass *createHexagonHardwareLoops();
   FunctionPass *createHexagonPeephole();
   FunctionPass *createHexagonFixupHwLoops();
+  FunctionPass *createHexagonNewValueJump();
+  FunctionPass *createHexagonCopyToCombine();
   FunctionPass *createHexagonPacketizer();
   FunctionPass *createHexagonNewValueJump();
 
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
new file mode 100644
index 0000000000..dd63523291
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -0,0 +1,676 @@
+//===------- HexagonCopyToCombine.cpp - Hexagon Copy-To-Combine Pass ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass replaces transfer instructions by combine instructions.
+// We walk along a basic block and look for two combinable instructions and try
+// to move them together. If we can move them next to each other we do so and
+// replace them with a combine instruction.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "hexagon-copy-combine"
+
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+
+using namespace llvm;
+
+static
+cl::opt<bool> IsCombinesDisabled("disable-merge-into-combines",
+                                 cl::Hidden, cl::ZeroOrMore,
+                                 cl::init(false),
+                                 cl::desc("Disable merging into combines"));
+static
+cl::opt<unsigned>
+MaxNumOfInstsBetweenNewValueStoreAndTFR("max-num-inst-between-tfr-and-nv-store",
+                   cl::Hidden, cl::init(4),
+                   cl::desc("Maximum distance between a tfr feeding a store we "
+                            "consider the store still to be newifiable"));
+
+namespace llvm {
+  void initializeHexagonCopyToCombinePass(PassRegistry&);
+}
+
+
+namespace {
+
+class HexagonCopyToCombine : public MachineFunctionPass  {
+  const HexagonInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  bool ShouldCombineAggressively;
+
+  DenseSet<MachineInstr *> PotentiallyNewifiableTFR;
+public:
+  static char ID;
+
+  HexagonCopyToCombine() : MachineFunctionPass(ID) {
+    initializeHexagonCopyToCombinePass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  const char *getPassName() const {
+    return "Hexagon Copy-To-Combine Pass";
+  }
+
+  virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+private:
+  MachineInstr *findPairable(MachineInstr *I1, bool &DoInsertAtI1);
+
+  void findPotentialNewifiableTFRs(MachineBasicBlock &);
+
+  void combine(MachineInstr *I1, MachineInstr *I2,
+               MachineBasicBlock::iterator &MI, bool DoInsertAtI1);
+
+  bool isSafeToMoveTogether(MachineInstr *I1, MachineInstr *I2,
+                            unsigned I1DestReg, unsigned I2DestReg,
+                            bool &DoInsertAtI1);
+
+  void emitCombineRR(MachineBasicBlock::iterator &Before, unsigned DestReg,
+                     MachineOperand &HiOperand, MachineOperand &LoOperand);
+
+  void emitCombineRI(MachineBasicBlock::iterator &Before, unsigned DestReg,
+                     MachineOperand &HiOperand, MachineOperand &LoOperand);
+
+  void emitCombineIR(MachineBasicBlock::iterator &Before, unsigned DestReg,
+                     MachineOperand &HiOperand, MachineOperand &LoOperand);
+
+  void emitCombineII(MachineBasicBlock::iterator &Before, unsigned DestReg,
+                     MachineOperand &HiOperand, MachineOperand &LoOperand);
+};
+
+} // End anonymous namespace.
+
+char HexagonCopyToCombine::ID = 0;
+
+INITIALIZE_PASS(HexagonCopyToCombine, "hexagon-copy-combine",
+                "Hexagon Copy-To-Combine Pass", false, false)
+
+static bool isCombinableInstType(MachineInstr *MI,
+                                 const HexagonInstrInfo *TII,
+                                 bool ShouldCombineAggressively) {
+  switch(MI->getOpcode()) {
+  case Hexagon::TFR: {
+    // A COPY instruction can be combined if its arguments are IntRegs (32bit).
+    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isReg());
+
+    unsigned DestReg = MI->getOperand(0).getReg();
+    unsigned SrcReg = MI->getOperand(1).getReg();
+    return Hexagon::IntRegsRegClass.contains(DestReg) &&
+      Hexagon::IntRegsRegClass.contains(SrcReg);
+  }
+
+  case Hexagon::TFRI: {
+    // A transfer-immediate can be combined if its argument is a signed 8bit
+    // value.
+    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+    unsigned DestReg = MI->getOperand(0).getReg();
+
+    // Only combine constant extended TFRI if we are in aggressive mode.
+    return Hexagon::IntRegsRegClass.contains(DestReg) &&
+      (ShouldCombineAggressively || isInt<8>(MI->getOperand(1).getImm()));
+  }
+
+  case Hexagon::TFRI_V4: {
+    if (!ShouldCombineAggressively)
+      return false;
+    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isGlobal());
+
+    // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a
+    // workaround for an ABI bug that prevents GOT relocations on combine
+    // instructions
+    if (MI->getOperand(1).getTargetFlags() != HexagonII::MO_NO_FLAG)
+      return false;
+
+    unsigned DestReg = MI->getOperand(0).getReg();
+    return Hexagon::IntRegsRegClass.contains(DestReg);
+  }
+
+  default:
+    break;
+  }
+
+  return false;
+}
+
+static bool isGreaterThan8BitTFRI(MachineInstr *I) {
+  return I->getOpcode() == Hexagon::TFRI &&
+    !isInt<8>(I->getOperand(1).getImm());
+}
+static bool isGreaterThan6BitTFRI(MachineInstr *I) {
+  return I->getOpcode() == Hexagon::TFRI &&
+    !isUInt<6>(I->getOperand(1).getImm());
+}
+
+/// areCombinableOperations - Returns true if the two instruction can be merge
+/// into a combine (ignoring register constraints).
+static bool areCombinableOperations(const TargetRegisterInfo *TRI,
+                                    MachineInstr *HighRegInst,
+                                    MachineInstr *LowRegInst) {
+  assert((HighRegInst->getOpcode() == Hexagon::TFR ||
+          HighRegInst->getOpcode() == Hexagon::TFRI ||
+          HighRegInst->getOpcode() == Hexagon::TFRI_V4) &&
+         (LowRegInst->getOpcode() == Hexagon::TFR ||
+          LowRegInst->getOpcode() == Hexagon::TFRI ||
+          LowRegInst->getOpcode() == Hexagon::TFRI_V4) &&
+         "Assume individual instructions are of a combinable type");
+
+  const HexagonRegisterInfo *QRI =
+    static_cast<const HexagonRegisterInfo *>(TRI);
+
+  // V4 added some combine variations (mixed immediate and register source
+  // operands), if we are on < V4 we can only combine 2 register-to-register
+  // moves and 2 immediate-to-register moves. We also don't have
+  // constant-extenders.
+  if (!QRI->Subtarget.hasV4TOps())
+    return HighRegInst->getOpcode() == LowRegInst->getOpcode() &&
+      !isGreaterThan8BitTFRI(HighRegInst) &&
+      !isGreaterThan6BitTFRI(LowRegInst);
+
+  // There is no combine of two constant extended values.
+  if ((HighRegInst->getOpcode() == Hexagon::TFRI_V4 ||
+       isGreaterThan8BitTFRI(HighRegInst)) &&
+      (LowRegInst->getOpcode() == Hexagon::TFRI_V4 ||
+       isGreaterThan6BitTFRI(LowRegInst)))
+    return false;
+
+  return true;
+}
+
+static bool isEvenReg(unsigned Reg) {
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+         Hexagon::IntRegsRegClass.contains(Reg));
+  return (Reg - Hexagon::R0) % 2 == 0;
+}
+
+static void removeKillInfo(MachineInstr *MI, unsigned RegNotKilled) {
+  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+    MachineOperand &Op = MI->getOperand(I);
+    if (!Op.isReg() || Op.getReg() != RegNotKilled || !Op.isKill())
+      continue;
+    Op.setIsKill(false);
+  }
+}
+
+/// isUnsafeToMoveAccross - Returns true if it is unsafe to move a copy
+/// instruction from \p UseReg to \p DestReg over the instruction \p I.
+bool isUnsafeToMoveAccross(MachineInstr *I, unsigned UseReg, unsigned DestReg,
+                            const TargetRegisterInfo *TRI) {
+  return (UseReg && (I->modifiesRegister(UseReg, TRI))) ||
+          I->modifiesRegister(DestReg, TRI) ||
+          I->readsRegister(DestReg, TRI) ||
+          I->hasUnmodeledSideEffects() ||
+          I->isInlineAsm() || I->isDebugValue();
+}
+
+/// isSafeToMoveTogether - Returns true if it is safe to move I1 next to I2 such
+/// that the two instructions can be paired in a combine.
+bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1,
+                                                MachineInstr *I2,
+                                                unsigned I1DestReg,
+                                                unsigned I2DestReg,
+                                                bool &DoInsertAtI1) {
+
+  bool IsImmUseReg = I2->getOperand(1).isImm() || I2->getOperand(1).isGlobal();
+  unsigned I2UseReg = IsImmUseReg ? 0 : I2->getOperand(1).getReg();
+
+  // It is not safe to move I1 and I2 into one combine if I2 has a true
+  // dependence on I1.
+  if (I2UseReg && I1->modifiesRegister(I2UseReg, TRI))
+    return false;
+
+  bool isSafe = true;
+
+  // First try to move I2 towards I1.
+  {
+    // A reverse_iterator instantiated like below starts before I2, and I1
+    // respectively.
+    // Look at instructions I in between I2 and (excluding) I1.
+    MachineBasicBlock::reverse_iterator I(I2),
+      End = --(MachineBasicBlock::reverse_iterator(I1));
+    // At 03 we got better results (dhrystone!) by being more conservative.
+    if (!ShouldCombineAggressively)
+      End = MachineBasicBlock::reverse_iterator(I1);
+    // If I2 kills its operand and we move I2 over an instruction that also
+    // uses I2's use reg we need to modify that (first) instruction to now kill
+    // this reg.
+    unsigned KilledOperand = 0;
+    if (I2->killsRegister(I2UseReg))
+      KilledOperand = I2UseReg;
+    MachineInstr *KillingInstr = 0;
+
+    for (; I != End; ++I) {
+      // If the intervening instruction I:
+      //   * modifies I2's use reg
+      //   * modifies I2's def reg
+      //   * reads I2's def reg
+      //   * or has unmodelled side effects
+      // we can't move I2 across it.
+      if (isUnsafeToMoveAccross(&*I, I2UseReg, I2DestReg, TRI)) {
+        isSafe = false;
+        break;
+      }
+
+      // Update first use of the killed operand.
+      if (!KillingInstr && KilledOperand &&
+          I->readsRegister(KilledOperand, TRI))
+        KillingInstr = &*I;
+    }
+    if (isSafe) {
+      // Update the intermediate instruction to with the kill flag.
+      if (KillingInstr) {
+        bool Added = KillingInstr->addRegisterKilled(KilledOperand, TRI, true);
+        (void)Added; // supress compiler warning
+        assert(Added && "Must successfully update kill flag");
+        removeKillInfo(I2, KilledOperand);
+      }
+      DoInsertAtI1 = true;
+      return true;
+    }
+  }
+
+  // Try to move I1 towards I2.
+  {
+    // Look at instructions I in between I1 and (excluding) I2.
+    MachineBasicBlock::iterator I(I1), End(I2);
+    // At O3 we got better results (dhrystone) by being more conservative here.
+    if (!ShouldCombineAggressively)
+      End = llvm::next(MachineBasicBlock::iterator(I2));
+    IsImmUseReg = I1->getOperand(1).isImm() || I1->getOperand(1).isGlobal();
+    unsigned I1UseReg = IsImmUseReg ? 0 : I1->getOperand(1).getReg();
+    // Track killed operands. If we move accross an instruction that kills our
+    // operand, we need to update the kill information on the moved I1. It kills
+    // the operand now.
+    MachineInstr *KillingInstr = 0;
+    unsigned KilledOperand = 0;
+
+    while(++I != End) {
+      // If the intervening instruction I:
+      //   * modifies I1's use reg
+      //   * modifies I1's def reg
+      //   * reads I1's def reg
+      //   * or has unmodelled side effects
+      //   We introduce this special case because llvm has no api to remove a
+      //   kill flag for a register (a removeRegisterKilled() analogous to
+      //   addRegisterKilled) that handles aliased register correctly.
+      //   * or has a killed aliased register use of I1's use reg
+      //           %D4<def> = TFRI64 16
+      //           %R6<def> = TFR %R9
+      //           %R8<def> = KILL %R8, %D4<imp-use,kill>
+      //      If we want to move R6 = across the KILL instruction we would have
+      //      to remove the %D4<imp-use,kill> operand. For now, we are
+      //      conservative and disallow the move.
+      // we can't move I1 across it.
+      if (isUnsafeToMoveAccross(I, I1UseReg, I1DestReg, TRI) ||
+          // Check for an aliased register kill. Bail out if we see one.
+          (!I->killsRegister(I1UseReg) && I->killsRegister(I1UseReg, TRI)))
+        return false;
+
+      // Check for an exact kill (registers match).
+      if (I1UseReg && I->killsRegister(I1UseReg)) {
+        assert(KillingInstr == 0 && "Should only see one killing instruction");
+        KilledOperand = I1UseReg;
+        KillingInstr = &*I;
+      }
+    }
+    if (KillingInstr) {
+      removeKillInfo(KillingInstr, KilledOperand);
+      // Update I1 to set the kill flag. This flag will later be picked up by
+      // the new COMBINE instruction.
+      bool Added = I1->addRegisterKilled(KilledOperand, TRI);
+      (void)Added; // supress compiler warning
+      assert(Added && "Must successfully update kill flag");
+    }
+    DoInsertAtI1 = false;
+  }
+
+  return true;
+}
+
+/// findPotentialNewifiableTFRs - Finds tranfers that feed stores that could be
+/// newified. (A use of a 64 bit register define can not be newified)
+void
+HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) {
+  DenseMap<unsigned, MachineInstr *> LastDef;
+  for (MachineBasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
+    MachineInstr *MI = I;
+    // Mark TFRs that feed a potential new value store as such.
+    if(TII->mayBeNewStore(MI)) {
+      // Look for uses of TFR instructions.
+      for (unsigned OpdIdx = 0, OpdE = MI->getNumOperands(); OpdIdx != OpdE;
+           ++OpdIdx) {
+        MachineOperand &Op = MI->getOperand(OpdIdx);
+
+        // Skip over anything except register uses.
+        if (!Op.isReg() || !Op.isUse() || !Op.getReg())
+          continue;
+
+        // Look for the defining instruction.
+        unsigned Reg = Op.getReg();
+        MachineInstr *DefInst = LastDef[Reg];
+        if (!DefInst)
+          continue;
+        if (!isCombinableInstType(DefInst, TII, ShouldCombineAggressively))
+          continue;
+
+        // Only close newifiable stores should influence the decision.
+        MachineBasicBlock::iterator It(DefInst);
+        unsigned NumInstsToDef = 0;
+        while (&*It++ != MI)
+          ++NumInstsToDef;
+
+        if (NumInstsToDef > MaxNumOfInstsBetweenNewValueStoreAndTFR)
+          continue;
+
+        PotentiallyNewifiableTFR.insert(DefInst);
+      }
+      // Skip to next instruction.
+      continue;
+    }
+
+    // Put instructions that last defined integer or double registers into the
+    // map.
+    for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+      MachineOperand &Op = MI->getOperand(I);
+      if (!Op.isReg() || !Op.isDef() || !Op.getReg())
+        continue;
+      unsigned Reg = Op.getReg();
+      if (Hexagon::DoubleRegsRegClass.contains(Reg)) {
+        for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+          LastDef[*SubRegs] = MI;
+        }
+      } else if (Hexagon::IntRegsRegClass.contains(Reg))
+        LastDef[Reg] = MI;
+    }
+  }
+}
+
+bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
+
+  if (IsCombinesDisabled) return false;
+
+  bool HasChanged = false;
+
+  // Get target info.
+  TRI = MF.getTarget().getRegisterInfo();
+  TII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo());
+
+  // Combine aggressively (for code size)
+  ShouldCombineAggressively =
+    MF.getTarget().getOptLevel() <= CodeGenOpt::Default;
+
+  // Traverse basic blocks.
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
+       ++BI) {
+    PotentiallyNewifiableTFR.clear();
+    findPotentialNewifiableTFRs(*BI);
+
+    // Traverse instructions in basic block.
+    for(MachineBasicBlock::iterator MI = BI->begin(), End = BI->end();
+        MI != End;) {
+      MachineInstr *I1 = MI++;
+      // Don't combine a TFR whose user could be newified (instructions that
+      // define double registers can not be newified - Programmer's Ref Manual
+      // 5.4.2 New-value stores).
+      if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I1))
+        continue;
+
+      // Ignore instructions that are not combinable.
+      if (!isCombinableInstType(I1, TII, ShouldCombineAggressively))
+        continue;
+
+      // Find a second instruction that can be merged into a combine
+      // instruction.
+      bool DoInsertAtI1 = false;
+      MachineInstr *I2 = findPairable(I1, DoInsertAtI1);
+      if (I2) {
+        HasChanged = true;
+        combine(I1, I2, MI, DoInsertAtI1);
+      }
+    }
+  }
+
+  return HasChanged;
+}
+
+/// findPairable - Returns an instruction that can be merged with \p I1 into a
+/// COMBINE instruction or 0 if no such instruction can be found. Returns true
+/// in \p DoInsertAtI1 if the combine must be inserted at instruction \p I1
+/// false if the combine must be inserted at the returned instruction.
+MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr *I1,
+                                                 bool &DoInsertAtI1) {
+  MachineBasicBlock::iterator I2 = llvm::next(MachineBasicBlock::iterator(I1));
+  unsigned I1DestReg = I1->getOperand(0).getReg();
+
+  for (MachineBasicBlock::iterator End = I1->getParent()->end(); I2 != End;
+       ++I2) {
+    // Bail out early if we see a second definition of I1DestReg.
+    if (I2->modifiesRegister(I1DestReg, TRI))
+      break;
+
+    // Ignore non-combinable instructions.
+    if (!isCombinableInstType(I2, TII, ShouldCombineAggressively))
+      continue;
+
+    // Don't combine a TFR whose user could be newified.
+    if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I2))
+      continue;
+
+    unsigned I2DestReg = I2->getOperand(0).getReg();
+
+    // Check that registers are adjacent and that the first destination register
+    // is even.
+    bool IsI1LowReg = (I2DestReg - I1DestReg) == 1;
+    bool IsI2LowReg = (I1DestReg - I2DestReg) == 1;
+    unsigned FirstRegIndex = IsI1LowReg ? I1DestReg : I2DestReg;
+    if ((!IsI1LowReg && !IsI2LowReg) || !isEvenReg(FirstRegIndex))
+      continue;
+
+    // Check that the two instructions are combinable. V4 allows more
+    // instructions to be merged into a combine.
+    // The order matters because in a TFRI we might can encode a int8 as the
+    // hi reg operand but only a uint6 as the low reg operand.
+    if ((IsI2LowReg && !areCombinableOperations(TRI, I1, I2)) ||
+        (IsI1LowReg && !areCombinableOperations(TRI, I2, I1)))
+      break;
+
+    if (isSafeToMoveTogether(I1, I2, I1DestReg, I2DestReg,
+                             DoInsertAtI1))
+      return I2;
+
+    // Not safe. Stop searching.
+    break;
+  }
+  return 0;
+}
+
+void HexagonCopyToCombine::combine(MachineInstr *I1, MachineInstr *I2,
+                                   MachineBasicBlock::iterator &MI,
+                                   bool DoInsertAtI1) {
+  // We are going to delete I2. If MI points to I2 advance it to the next
+  // instruction.
+  if ((MachineInstr *)MI == I2) ++MI;
+
+  // Figure out whether I1 or I2 goes into the lowreg part.
+  unsigned I1DestReg = I1->getOperand(0).getReg();
+  unsigned I2DestReg = I2->getOperand(0).getReg();
+  bool IsI1Loreg = (I2DestReg - I1DestReg) == 1;
+  unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg;
+
+  // Get the double word register.
+  unsigned DoubleRegDest =
+    TRI->getMatchingSuperReg(LoRegDef, Hexagon::subreg_loreg,
+                             &Hexagon::DoubleRegsRegClass);
+  assert(DoubleRegDest != 0 && "Expect a valid register");
+
+
+  // Setup source operands.
+  MachineOperand &LoOperand = IsI1Loreg ? I1->getOperand(1) :
+    I2->getOperand(1);
+  MachineOperand &HiOperand = IsI1Loreg ? I2->getOperand(1) :
+    I1->getOperand(1);
+
+  // Figure out which source is a register and which a constant.
+  bool IsHiReg = HiOperand.isReg();
+  bool IsLoReg = LoOperand.isReg();
+
+  MachineBasicBlock::iterator InsertPt(DoInsertAtI1 ? I1 : I2);
+  // Emit combine.
+  if (IsHiReg && IsLoReg)
+    emitCombineRR(InsertPt, DoubleRegDest, HiOperand, LoOperand);
+  else if (IsHiReg)
+    emitCombineRI(InsertPt, DoubleRegDest, HiOperand, LoOperand);
+  else if (IsLoReg)
+    emitCombineIR(InsertPt, DoubleRegDest, HiOperand, LoOperand);
+  else
+    emitCombineII(InsertPt, DoubleRegDest, HiOperand, LoOperand);
+
+  I1->eraseFromParent();
+  I2->eraseFromParent();
+}
+
+void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt,
+                                         unsigned DoubleDestReg,
+                                         MachineOperand &HiOperand,
+                                         MachineOperand &LoOperand) {
+  DebugLoc DL = InsertPt->getDebugLoc();
+  MachineBasicBlock *BB = InsertPt->getParent();
+
+  // Handle  globals.
+  if (HiOperand.isGlobal()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg)
+      .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(),
+                        HiOperand.getTargetFlags())
+      .addImm(LoOperand.getImm());
+    return;
+  }
+  if (LoOperand.isGlobal()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_iI_V4), DoubleDestReg)
+      .addImm(HiOperand.getImm())
+      .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(),
+                        LoOperand.getTargetFlags());
+    return;
+  }
+
+  // Handle constant extended immediates.
+  if (!isInt<8>(HiOperand.getImm())) {
+    assert(isInt<8>(LoOperand.getImm()));
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg)
+      .addImm(HiOperand.getImm())
+      .addImm(LoOperand.getImm());
+    return;
+  }
+
+  if (!isUInt<6>(LoOperand.getImm())) {
+    assert(isInt<8>(HiOperand.getImm()));
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_iI_V4), DoubleDestReg)
+      .addImm(HiOperand.getImm())
+      .addImm(LoOperand.getImm());
+    return;
+  }
+
+  // Insert new combine instruction.
+  //  DoubleRegDest = combine #HiImm, #LoImm
+  BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg)
+    .addImm(HiOperand.getImm())
+    .addImm(LoOperand.getImm());
+}
+
+void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt,
+                                         unsigned DoubleDestReg,
+                                         MachineOperand &HiOperand,
+                                         MachineOperand &LoOperand) {
+  unsigned LoReg = LoOperand.getReg();
+  unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill());
+
+  DebugLoc DL = InsertPt->getDebugLoc();
+  MachineBasicBlock *BB = InsertPt->getParent();
+
+  // Handle global.
+  if (HiOperand.isGlobal()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ir_V4), DoubleDestReg)
+      .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(),
+                        HiOperand.getTargetFlags())
+      .addReg(LoReg, LoRegKillFlag);
+    return;
+  }
+  // Insert new combine instruction.
+  //  DoubleRegDest = combine #HiImm, LoReg
+  BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ir_V4), DoubleDestReg)
+    .addImm(HiOperand.getImm())
+    .addReg(LoReg, LoRegKillFlag);
+}
+
+void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt,
+                                         unsigned DoubleDestReg,
+                                         MachineOperand &HiOperand,
+                                         MachineOperand &LoOperand) {
+  unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill());
+  unsigned HiReg = HiOperand.getReg();
+
+  DebugLoc DL = InsertPt->getDebugLoc();
+  MachineBasicBlock *BB = InsertPt->getParent();
+
+  // Handle global.
+  if (LoOperand.isGlobal()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rI_V4), DoubleDestReg)
+      .addReg(HiReg, HiRegKillFlag)
+      .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(),
+                        LoOperand.getTargetFlags());
+    return;
+  }
+
+  // Insert new combine instruction.
+  //  DoubleRegDest = combine HiReg, #LoImm
+  BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rI_V4), DoubleDestReg)
+    .addReg(HiReg, HiRegKillFlag)
+    .addImm(LoOperand.getImm());
+}
+
+void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt,
+                                         unsigned DoubleDestReg,
+                                         MachineOperand &HiOperand,
+                                         MachineOperand &LoOperand) {
+  unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill());
+  unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill());
+  unsigned LoReg = LoOperand.getReg();
+  unsigned HiReg = HiOperand.getReg();
+
+  DebugLoc DL = InsertPt->getDebugLoc();
+  MachineBasicBlock *BB = InsertPt->getParent();
+
+  // Insert new combine instruction.
+  //  DoubleRegDest = combine HiReg, LoReg
+  BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rr), DoubleDestReg)
+    .addReg(HiReg, HiRegKillFlag)
+    .addReg(LoReg, LoRegKillFlag);
+}
+
+FunctionPass *llvm::createHexagonCopyToCombine() {
+  return new HexagonCopyToCombine();
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index d1e32c65ad..c96aaca8f8 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -384,6 +384,12 @@ def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
 // ALU32/PERM +
 //===----------------------------------------------------------------------===//
 
+let neverHasSideEffects = 1 in
+def COMBINE_ii : ALU32_ii<(outs DoubleRegs:$dst),
+            (ins s8Imm:$src1, s8Imm:$src2),
+            "$dst = combine(#$src1, #$src2)",
+            []>;
+
 // Mux.
 def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
                                                    DoubleRegs:$src2,
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 022a7f6136..fee83fb811 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -3188,6 +3188,93 @@ def STriw_offset_ext_V4 : STInst<(outs),
                     (add IntRegs:$src1, u6_2ImmPred:$src2))]>,
             Requires<[HasV4T]>;
 
+def : Pat<(i64 (ctlz (i64 DoubleRegs:$src1))),
+          (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTLZ64_rr DoubleRegs:$src1))))>,
+          Requires<[HasV4T]>;
+
+def : Pat<(i64 (cttz (i64 DoubleRegs:$src1))),
+          (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTTZ64_rr DoubleRegs:$src1))))>,
+          Requires<[HasV4T]>;
+
+
+// i8 -> i64 loads
+// We need a complexity of 120 here to overide preceeding handling of
+// zextloadi8.
+let Predicates = [HasV4T], AddedComplexity = 120 in {
+def:  Pat <(i64 (extloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
+      (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 tglobaladdr:$addr)))>;
+
+def:  Pat <(i64 (zextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
+      (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 tglobaladdr:$addr)))>;
+
+def:  Pat <(i64 (sextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
+      (i64 (SXTW (LDrib_abs_V4 tglobaladdr:$addr)))>;
+
+def:  Pat <(i64 (extloadi8 FoldGlobalAddr:$addr)),
+      (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 FoldGlobalAddr:$addr)))>;
+
+def:  Pat <(i64 (zextloadi8 FoldGlobalAddr:$addr)),
+      (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 FoldGlobalAddr:$addr)))>;
+
+def:  Pat <(i64 (sextloadi8 FoldGlobalAddr:$addr)),
+      (i64 (SXTW (LDrib_abs_V4 FoldGlobalAddr:$addr)))>;
+}
+// i16 -> i64 loads
+// We need a complexity of 120 here to overide preceeding handling of
+// zextloadi16.
+let AddedComplexity = 120 in {
+def:  Pat <(i64 (extloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
+      (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 tglobaladdr:$addr)))>,
+      Requires<[HasV4T]>;
+
+def:  Pat <(i64 (zextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
+      (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 tglobaladdr:$addr)))>,
+      Requires<[HasV4T]>;
+
+def:  Pat <(i64 (sextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
+      (i64 (SXTW (LDrih_abs_V4 tglobaladdr:$addr)))>,
+      Requires<[HasV4T]>;
+
+def:  Pat <(i64 (extloadi16 FoldGlobalAddr:$addr)),
+      (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 FoldGlobalAddr:$addr)))>,
+      Requires<[HasV4T]>;
+
+def:  Pat <(i64 (zextloadi16 FoldGlobalAddr:$addr)),
+      (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 FoldGlobalAddr:$addr)))>,
+      Requires<[HasV4T]>;
+
+def:  Pat <(i64 (sextloadi16 FoldGlobalAddr:$addr)),
+      (i64 (SXTW (LDrih_abs_V4 FoldGlobalAddr:$addr)))>,
+      Requires<[HasV4T]>;
+}
+// i32->i64 loads
+// We need a complexity of 120 here to overide preceeding handling of
+// zextloadi32.
+let AddedComplexity = 120 in {
+def:  Pat <(i64 (extloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
+      (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>,
+      Requires<[HasV4T]>;
+
+def:  Pat <(i64 (zextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
+      (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>,
+      Requires<[HasV4T]>;
+
+def:  Pat <(i64 (sextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
+      (i64 (SXTW (LDriw_abs_V4 tglobaladdr:$addr)))>,
+      Requires<[HasV4T]>;
+
+def:  Pat <(i64 (extloadi32 FoldGlobalAddr:$addr)),
+      (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>,
+      Requires<[HasV4T]>;
+
+def:  Pat <(i64 (zextloadi32 FoldGlobalAddr:$addr)),
+      (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>,
+      Requires<[HasV4T]>;
+
+def:  Pat <(i64 (sextloadi32 FoldGlobalAddr:$addr)),
+      (i64 (SXTW (LDriw_abs_V4 FoldGlobalAddr:$addr)))>,
+      Requires<[HasV4T]>;
+}
 
 // Indexed store double word - global address.
 // memw(Rs+#u6:2)=#S8
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 05e696865f..f7c4513213 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -631,6 +631,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
                                     .addMBB(jmpTarget);
 
           assert(NewMI && "New Value Jump Instruction Not created!");
+          (void)NewMI;
           if (cmpInstr->getOperand(0).isReg() &&
               cmpInstr->getOperand(0).isKill())
             cmpInstr->getOperand(0).setIsKill(false);
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index dc44b34cff..676dff2a4a 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -79,6 +79,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
     FrameLowering(Subtarget),
     InstrItins(&Subtarget.getInstrItineraryData()) {
     setMCUseCFI(false);
+    initAsmInfo();
 }
 
 // addPassesForOptimizations - Allow the backend (target) to add Target
@@ -161,6 +162,7 @@ bool HexagonPassConfig::addPreSched2() {
   HexagonTargetObjectFile &TLOF =
     (HexagonTargetObjectFile&)(getTargetLowering()->getObjFileLowering());
 
+  addPass(createHexagonCopyToCombine());
   if (getOptLevel() != CodeGenOpt::None)
     addPass(&IfConverterID);
   if (!TLOF.IsSmallDataEnabled()) {
@@ -168,9 +170,6 @@ bool HexagonPassConfig::addPreSched2() {
     printAndVerify("After hexagon split const32/64 pass");
   }
   return true;
-  if (getOptLevel() != CodeGenOpt::None)
-    addPass(&IfConverterID);
-  return false;
 }
 
 bool HexagonPassConfig::addPreEmitPass() {
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index c508d124b3..59b4fabe01 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -837,16 +837,38 @@ bool HexagonPacketizerList::RestrictingDepExistInPacket (MachineInstr* MI,
 }
 
 
+/// Gets the predicate register of a predicated instruction.
+unsigned getPredicatedRegister(MachineInstr *MI, const HexagonInstrInfo *QII) {
+  /// We use the following rule: The first predicate register that is a use is
+  /// the predicate register of a predicated instruction.
+
+  assert(QII->isPredicated(MI) && "Must be predicated instruction");
+
+  for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+       OE = MI->operands_end(); OI != OE; ++OI) {
+    MachineOperand &Op = *OI;
+    if (Op.isReg() && Op.getReg() && Op.isUse() &&
+        Hexagon::PredRegsRegClass.contains(Op.getReg()))
+      return Op.getReg();
+  }
+
+  llvm_unreachable("Unknown instruction operand layout");
+
+  return 0;
+}
+
 // Given two predicated instructions, this function detects whether
 // the predicates are complements
 bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
      MachineInstr* MI2, std::map <MachineInstr*, SUnit*> MIToSUnit) {
 
   const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  // Currently can only reason about conditional transfers
-  if (!QII->isConditionalTransfer(MI1) || !QII->isConditionalTransfer(MI2)) {
+
+  // If we don't know the predicate sense of the instructions bail out early, we
+  // need it later.
+  if (getPredicateSense(MI1, QII) == PK_Unknown ||
+      getPredicateSense(MI2, QII) == PK_Unknown)
     return false;
-  }
 
   // Scheduling unit for candidate
   SUnit* SU = MIToSUnit[MI1];
@@ -885,9 +907,9 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
         // there already exist anti dep on the same pred in
         // the packet.
         if (PacketSU->Succs[i].getSUnit() == SU &&
+            PacketSU->Succs[i].getKind() == SDep::Data &&
             Hexagon::PredRegsRegClass.contains(
               PacketSU->Succs[i].getReg()) &&
-            PacketSU->Succs[i].getKind() == SDep::Data &&
             // Here I know that *VIN is predicate setting instruction
             // with true data dep to candidate on the register
             // we care about - c) in the above example.
@@ -908,7 +930,11 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
   // that the predicate sense is different
   // We also need to differentiate .old vs. .new:
   // !p0 is not complimentary to p0.new
-  return ((MI1->getOperand(1).getReg() == MI2->getOperand(1).getReg()) &&
+  unsigned PReg1 = getPredicatedRegister(MI1, QII);
+  unsigned PReg2 = getPredicatedRegister(MI2, QII);
+  return ((PReg1 == PReg2) &&
+          Hexagon::PredRegsRegClass.contains(PReg1) &&
+          Hexagon::PredRegsRegClass.contains(PReg2) &&
           (getPredicateSense(MI1, QII) != getPredicateSense(MI2, QII)) &&
           (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2)));
 }
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 273bc22b8e..2f93a5299c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -54,13 +54,14 @@ static MCSubtargetInfo *createHexagonMCSubtargetInfo(StringRef TT,
   return X;
 }
 
-static MCAsmInfo *createHexagonMCAsmInfo(StringRef TT) {
+static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
+                                         StringRef TT) {
   MCAsmInfo *MAI = new HexagonMCAsmInfo(TT);
 
   // VirtualFP = (R30 + #0).
-  MachineLocation Dst(MachineLocation::VirtualFP);
-  MachineLocation Src(Hexagon::R30, 0);
-  MAI->addInitialFrameState(0, Dst, Src);
+  MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(
+      0, Hexagon::R30, 0);
+  MAI->addInitialFrameState(Inst);
 
   return MAI;
 }
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index bcdd32fed9..c75895575d 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -43,6 +43,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT,
     FrameLowering(Subtarget),
     TLInfo(*this), TSInfo(*this),
     InstrItins(Subtarget.getInstrItineraryData()) {
+  initAsmInfo();
 }
 
 namespace {
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
index ec76dba491..5bc0668f35 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -53,7 +53,7 @@ static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU,
   return X;
 }
 
-static MCAsmInfo *createMCAsmInfo(StringRef TT) {
+static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
   Triple TheTriple(TT);
   switch (TheTriple.getOS()) {
   default:
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 164e351df9..6710a09707 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -36,7 +36,9 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
     // FIXME: Check DataLayout string.
     DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
     InstrInfo(*this), TLInfo(*this), TSInfo(*this),
-    FrameLowering(Subtarget) { }
+    FrameLowering(Subtarget) {
+  initAsmInfo();
+}
 
 namespace {
 /// MSP430 Code Generator Pass Configuration Options.
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 26694ffdac..837fabee76 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -93,12 +93,12 @@ static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU,
   return X;
 }
 
-static MCAsmInfo *createMipsMCAsmInfo(StringRef TT) {
+static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
   MCAsmInfo *MAI = new MipsMCAsmInfo(TT);
 
-  MachineLocation Dst(MachineLocation::VirtualFP);
-  MachineLocation Src(Mips::SP, 0);
-  MAI->addInitialFrameState(0, Dst, Src);
+  unsigned SP = MRI.getDwarfRegNum(Mips::SP, true);
+  MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, SP, 0);
+  MAI->addInitialFrameState(Inst);
 
   return MAI;
 }
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index 4d1e61bb99..cc7324f26e 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -18,6 +18,36 @@
 #include "llvm/Support/raw_ostream.h"
 #include <string>
 
+static void inlineAsmOut
+  (LLVMContext &C, StringRef AsmString, BasicBlock *BB ) {
+  std::vector<llvm::Type *> AsmArgTypes;
+  std::vector<llvm::Value*> AsmArgs;
+  llvm::FunctionType *AsmFTy =
+    llvm::FunctionType::get(Type::getVoidTy(C),
+                            AsmArgTypes, false);
+  llvm::InlineAsm *IA =
+    llvm::InlineAsm::get(AsmFTy, AsmString, "", true,
+                         /* IsAlignStack */ false,
+                         llvm::InlineAsm::AD_ATT);
+  CallInst::Create(IA, AsmArgs, "", BB);
+}
+
+namespace {
+
+class InlineAsmHelper {
+  LLVMContext &C;
+  BasicBlock *BB;
+public:
+  InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) :
+    C(C_), BB(BB_) {
+  }
+
+  void Out(StringRef AsmString) {
+    inlineAsmOut(C, AsmString, BB);
+  }
+
+};
+}
 //
 // Return types that matter for hard float are:
 // float, double, complex float, and complex double
@@ -52,6 +82,243 @@ static FPReturnVariant whichFPReturnVariant(Type *T) {
 }
 
 //
+// Parameter type that matter are float, (float, float), (float, double),
+// double, (double, double), (double, float)
+//
+enum FPParamVariant {
+  FSig, FFSig, FDSig,
+  DSig, DDSig, DFSig, NoSig
+};
+
+// which floating point parameter signature variant we are dealing with
+//
+typedef Type::TypeID TypeID;
+const Type::TypeID FloatTyID = Type::FloatTyID;
+const Type::TypeID DoubleTyID = Type::DoubleTyID;
+
+static FPParamVariant whichFPParamVariantNeeded(Function &F) {
+  switch (F.arg_size()) {
+  case 0:
+    return NoSig;
+  case 1:{
+    TypeID ArgTypeID = F.getFunctionType()->getParamType(0)->getTypeID();
+    switch (ArgTypeID) {
+    case FloatTyID:
+      return FSig;
+    case DoubleTyID:
+      return DSig;
+    default:
+      return NoSig;
+    }
+  }
+  default: {
+    TypeID ArgTypeID0 = F.getFunctionType()->getParamType(0)->getTypeID();
+    TypeID ArgTypeID1 = F.getFunctionType()->getParamType(1)->getTypeID();
+    switch(ArgTypeID0) {
+    case FloatTyID: {
+      switch (ArgTypeID1) {
+      case FloatTyID:
+        return FFSig;
+      case DoubleTyID:
+        return FDSig;
+      default:
+        return FSig;
+      }
+    }
+    case DoubleTyID: {
+      switch (ArgTypeID1) {
+      case FloatTyID:
+        return DFSig;
+      case DoubleTyID:
+        return DDSig;
+      default:
+        return DSig;
+      }
+    }
+    default:
+      return NoSig;
+    }
+  }
+  }
+  llvm_unreachable("can't get here");
+}
+
+// Figure out if we need float point based on the function parameters.
+// We need to move variables in and/or out of floating point
+// registers because of the ABI
+//
+static bool needsFPStubFromParams(Function &F) {
+  if (F.arg_size() >=1) {
+    Type *ArgType = F.getFunctionType()->getParamType(0);
+    switch (ArgType->getTypeID()) {
+      case Type::FloatTyID:
+      case Type::DoubleTyID:
+        return true;
+      default:
+        break;
+    }
+  }
+  return false;
+}
+
+static bool needsFPReturnHelper(Function &F) {
+  Type* RetType = F.getReturnType();
+  return whichFPReturnVariant(RetType) != NoFPRet;
+}
+
+static bool needsFPHelperFromSig(Function &F) {
+  return needsFPStubFromParams(F) || needsFPReturnHelper(F);
+}
+
+//
+// We swap between FP and Integer registers to allow Mips16 and Mips32 to
+// interoperate
+//
+
+static void swapFPIntParams
+  (FPParamVariant PV, Module *M, InlineAsmHelper &IAH,
+   bool LE, bool ToFP) {
+  //LLVMContext &Context = M->getContext();
+  std::string MI = ToFP? "mtc1 ": "mfc1 ";
+  switch (PV) {
+  case FSig:
+    IAH.Out(MI + "$$4,$$f12");
+    break;
+  case FFSig:
+    IAH.Out(MI +"$$4,$$f12");
+    IAH.Out(MI + "$$5,$$f14");
+    break;
+  case FDSig:
+    IAH.Out(MI + "$$4,$$f12");
+    if (LE) {
+      IAH.Out(MI + "$$6,$$f14");
+      IAH.Out(MI + "$$7,$$f15");
+    } else {
+      IAH.Out(MI + "$$7,$$f14");
+      IAH.Out(MI + "$$6,$$f15");
+    }
+    break;
+  case DSig:
+    if (LE) {
+      IAH.Out(MI + "$$4,$$f12");
+      IAH.Out(MI + "$$5,$$f13");
+    } else {
+      IAH.Out(MI + "$$5,$$f12");
+      IAH.Out(MI + "$$4,$$f13");
+    }
+    break;
+  case DDSig:
+    if (LE) {
+      IAH.Out(MI + "$$4,$$f12");
+      IAH.Out(MI + "$$5,$$f13");
+      IAH.Out(MI + "$$6,$$f14");
+      IAH.Out(MI + "$$7,$$f15");
+    } else {
+      IAH.Out(MI + "$$5,$$f12");
+      IAH.Out(MI + "$$4,$$f13");
+      IAH.Out(MI + "$$7,$$f14");
+      IAH.Out(MI + "$$6,$$f15");
+    }
+    break;
+  case DFSig:
+    if (LE) {
+      IAH.Out(MI + "$$4,$$f12");
+      IAH.Out(MI + "$$5,$$f13");
+    } else {
+      IAH.Out(MI + "$$5,$$f12");
+      IAH.Out(MI + "$$4,$$f13");
+    }
+    IAH.Out(MI + "$$6,$$f14");
+    break;
+  case NoSig:
+    return;
+  }
+}
+//
+// Make sure that we know we already need a stub for this function.
+// Having called needsFPHelperFromSig
+//
+static void assureFPCallStub(Function &F, Module *M,  
+                             const MipsSubtarget &Subtarget){
+  // for now we only need them for static relocation
+  if (Subtarget.getRelocationModel() == Reloc::PIC_)
+    return;
+  LLVMContext &Context = M->getContext();
+  bool LE = Subtarget.isLittle();
+  std::string Name = F.getName();
+  std::string SectionName = ".mips16.call.fp." + Name;
+  std::string StubName = "__call_stub_" + Name;
+  //
+  // see if we already have the stub
+  //
+  Function *FStub = M->getFunction(StubName);
+  if (FStub && !FStub->isDeclaration()) return;
+  FStub = Function::Create(F.getFunctionType(),
+                           Function::InternalLinkage, StubName, M);
+  FStub->addFnAttr("mips16_fp_stub");
+  FStub->addFnAttr(llvm::Attribute::Naked);
+  FStub->addFnAttr(llvm::Attribute::NoUnwind);
+  FStub->addFnAttr("nomips16");
+  FStub->setSection(SectionName);
+  BasicBlock *BB = BasicBlock::Create(Context, "entry", FStub);
+  InlineAsmHelper IAH(Context, BB);
+  FPReturnVariant RV = whichFPReturnVariant(FStub->getReturnType());
+  FPParamVariant PV = whichFPParamVariantNeeded(F);
+  swapFPIntParams(PV, M, IAH, LE, true);
+  if (RV != NoFPRet) {
+    IAH.Out("move $$18, $$31");
+    IAH.Out("jal " + Name);
+  } else {
+    IAH.Out("lui  $$25,%hi(" + Name + ")");
+    IAH.Out("addiu  $$25,$$25,%lo(" + Name + ")" );
+  }
+  switch (RV) {
+  case FRet:
+    IAH.Out("mfc1 $$2,$$f0");
+    break;
+  case DRet:
+    if (LE) {
+      IAH.Out("mfc1 $$2,$$f0");
+      IAH.Out("mfc1 $$3,$$f1");
+    } else {
+      IAH.Out("mfc1 $$3,$$f0");
+      IAH.Out("mfc1 $$2,$$f1");
+    }
+    break;
+  case CFRet:
+    if (LE) {
+    IAH.Out("mfc1 $$2,$$f0");
+    IAH.Out("mfc1 $$3,$$f2");
+    } else {
+      IAH.Out("mfc1 $$3,$$f0");
+      IAH.Out("mfc1 $$3,$$f2");
+    }
+    break;
+  case CDRet:
+    if (LE) {
+      IAH.Out("mfc1 $$4,$$f2");
+      IAH.Out("mfc1 $$5,$$f3");
+      IAH.Out("mfc1 $$2,$$f0");
+      IAH.Out("mfc1 $$3,$$f1");
+
+    } else {
+      IAH.Out("mfc1 $$5,$$f2");
+      IAH.Out("mfc1 $$4,$$f3");
+      IAH.Out("mfc1 $$3,$$f0");
+      IAH.Out("mfc1 $$2,$$f1");
+    }
+    break;
+  case NoFPRet:
+    break;
+  }
+  if (RV != NoFPRet)
+    IAH.Out("jr $$18");
+  else
+    IAH.Out("jr $$25");
+  new UnreachableInst(Context, BB);
+}
+
+//
 // Returns of float, double and complex need to be handled with a helper
 // function. The "AndCal" part is coming in a later patch.
 //
@@ -96,6 +363,16 @@ static bool fixupFPReturnAndCall
                            Attribute::ReadNone);
         Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, NULL));
         CallInst::Create(F, Params, "", &Inst );
+      } else if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+          // pic mode calls are handled by already defined
+          // helper functions
+          if (Subtarget.getRelocationModel() != Reloc::PIC_ ) {
+            Function *F_ =  CI->getCalledFunction();
+            if (F_ && needsFPHelperFromSig(*F_)) {
+              assureFPCallStub(*F_, M, Subtarget);
+              Modified=true;
+            }
+          }
       }
     }
   return Modified;
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 17dd2c0796..ab9e62703b 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -145,7 +145,7 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
 
 /// GetOppositeBranchOpc - Return the inverse of the specified
 /// opcode, e.g. turning BEQ to BNE.
-unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
+unsigned Mips16InstrInfo::getOppositeBranchOpc(unsigned Opc) const {
   switch (Opc) {
   default:  llvm_unreachable("Illegal opcode!");
   case Mips::BeqzRxImmX16: return Mips::BnezRxImmX16;
@@ -380,7 +380,7 @@ Mips16InstrInfo::loadImmediate(unsigned FrameReg,
   return Reg;
 }
 
-unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
+unsigned Mips16InstrInfo::getAnalyzableBrOpc(unsigned Opc) const {
   return (Opc == Mips::BeqzRxImmX16   || Opc == Mips::BimmX16  ||
           Opc == Mips::BnezRxImmX16   || Opc == Mips::BteqzX16 ||
           Opc == Mips::BteqzT8CmpX16  || Opc == Mips::BteqzT8CmpiX16 ||
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index a77a9043bb..a3bd31e94f 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -64,7 +64,7 @@ public:
 
   virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
 
-  virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
+  virtual unsigned getOppositeBranchOpc(unsigned Opc) const;
 
   // Adjust SP by FrameSize bytes. Save RA, S0, S1
   void makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB,
@@ -102,7 +102,7 @@ public:
     (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const;
 
 private:
-  virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
+  virtual unsigned getAnalyzableBrOpc(unsigned Opc) const;
 
   void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    unsigned Opc) const;
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 6b23057c9c..5fa79cb159 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -154,6 +154,7 @@ class LW_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
   InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
          [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI> {
   let DecoderMethod = "DecodeFMem";
+  let mayLoad = 1;
 }
 
 class SW_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
@@ -161,6 +162,7 @@ class SW_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
   InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
          [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI> {
   let DecoderMethod = "DecodeFMem";
+  let mayStore = 1;
 }
 
 class MADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
@@ -314,8 +316,12 @@ let Predicates = [NotN64, HasMips64, HasStdEnc],
 }
 
 let Predicates = [NotN64, NotMips64, HasStdEnc] in {
-  def LDC1 : LW_FT<"ldc1", AFGR64, IILoad, mem, load>, LW_FM<0x35>;
-  def SDC1 : SW_FT<"sdc1", AFGR64, IIStore, mem, store>, LW_FM<0x3d>;
+  let isPseudo = 1, isCodeGenOnly = 1 in {
+    def PseudoLDC1 : LW_FT<"", AFGR64, IILoad, mem, load>;
+    def PseudoSDC1 : SW_FT<"", AFGR64, IIStore, mem, store>;
+  }
+  def LDC1 : LW_FT<"ldc1", AFGR64, IILoad, mem>, LW_FM<0x35>;
+  def SDC1 : SW_FT<"sdc1", AFGR64, IIStore, mem>, LW_FM<0x3d>;
 }
 
 // Indexed loads and stores.
@@ -523,7 +529,7 @@ let AddedComplexity = 40 in {
   }
 
   let Predicates = [NotN64, NotMips64, HasStdEnc] in {
-    def : LoadRegImmPat<LDC1, f64, load>;
-    def : StoreRegImmPat<SDC1, f64>;
+    def : LoadRegImmPat<PseudoLDC1, f64, load>;
+    def : StoreRegImmPat<PseudoSDC1, f64>;
   }
 }
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index ad92d41209..3144daebd7 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -77,7 +77,7 @@ MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
 void MipsInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
                                   MachineBasicBlock *&BB,
                                   SmallVectorImpl<MachineOperand> &Cond) const {
-  assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch");
+  assert(getAnalyzableBrOpc(Opc) && "Not an analyzable branch");
   int NumOp = Inst->getNumExplicitOperands();
 
   // for both int and fp branches, the last explicit operand is the
@@ -167,7 +167,7 @@ RemoveBranch(MachineBasicBlock &MBB) const
   // Up to 2 branches are removed.
   // Note that indirect branches are not removed.
   for(removed = 0; I != REnd && removed < 2; ++I, ++removed)
-    if (!GetAnalyzableBrOpc(I->getOpcode()))
+    if (!getAnalyzableBrOpc(I->getOpcode()))
       break;
 
   MBB.erase(I.base(), FirstBr.base());
@@ -182,7 +182,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
 {
   assert( (Cond.size() && Cond.size() <= 3) &&
           "Invalid Mips branch condition!");
-  Cond[0].setImm(GetOppositeBranchOpc(Cond[0].getImm()));
+  Cond[0].setImm(getOppositeBranchOpc(Cond[0].getImm()));
   return false;
 }
 
@@ -210,7 +210,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
   BranchInstrs.push_back(LastInst);
 
   // Not an analyzable branch (e.g., indirect jump).
-  if (!GetAnalyzableBrOpc(LastOpc))
+  if (!getAnalyzableBrOpc(LastOpc))
     return LastInst->isIndirectBranch() ? BT_Indirect : BT_None;
 
   // Get the second to last instruction in the block.
@@ -219,7 +219,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
 
   if (++I != REnd) {
     SecondLastInst = &*I;
-    SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());
+    SecondLastOpc = getAnalyzableBrOpc(SecondLastInst->getOpcode());
 
     // Not an analyzable branch (must be an indirect jump).
     if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc)
@@ -282,3 +282,16 @@ unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   }
   }
 }
+
+MachineInstrBuilder
+MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
+                                  MachineBasicBlock::iterator I) const {
+  MachineInstrBuilder MIB;
+  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc));
+
+  for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J)
+    MIB.addOperand(I->getOperand(J));
+
+  MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end());
+  return MIB;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 8c05d97bea..0f075ec6d0 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -17,6 +17,7 @@
 #include "Mips.h"
 #include "MipsAnalyzeImmediate.h"
 #include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
@@ -81,7 +82,7 @@ public:
   ///
   virtual const MipsRegisterInfo &getRegisterInfo() const = 0;
 
-  virtual unsigned GetOppositeBranchOpc(unsigned Opc) const = 0;
+  virtual unsigned getOppositeBranchOpc(unsigned Opc) const = 0;
 
   /// Return the number of bytes of code the specified instruction may be.
   unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
@@ -116,6 +117,11 @@ public:
                                 const TargetRegisterInfo *TRI,
                                 int64_t Offset) const = 0;
 
+  /// Create an instruction which has the same operands and memory operands
+  /// as MI but has a new opcode.
+  MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc,
+                                         MachineBasicBlock::iterator I) const;
+
 protected:
   bool isZeroImm(const MachineOperand &op) const;
 
@@ -123,7 +129,7 @@ protected:
                                    unsigned Flag) const;
 
 private:
-  virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const = 0;
+  virtual unsigned getAnalyzableBrOpc(unsigned Opc) const = 0;
 
   void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
                      MachineBasicBlock *&BB,
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 3d319373fe..5ada1df267 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1095,7 +1095,8 @@ def : InstAlias<"mfc2 $rt, $rd",
                 (MFC2_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>;
 def : InstAlias<"mtc2 $rt, $rd",
                 (MTC2_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>;
-
+def : InstAlias<"addiu $rs, $imm",
+                (ADDiu CPURegsOpnd:$rs, CPURegsOpnd:$rs, simm16:$imm), 0>;
 //===----------------------------------------------------------------------===//
 // Assembler Pseudo Instructions
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index bf5ad37031..daabf3d25a 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -217,7 +217,7 @@ int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) {
 // MachineBasicBlock operand MBBOpnd.
 void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br,
                                    DebugLoc DL, MachineBasicBlock *MBBOpnd) {
-  unsigned NewOpc = TII->GetOppositeBranchOpc(Br->getOpcode());
+  unsigned NewOpc = TII->getOppositeBranchOpc(Br->getOpcode());
   const MCInstrDesc &NewDesc = TII->get(NewOpc);
 
   MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc);
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index a0768e51c0..12ed1bc186 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -18,11 +18,17 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
 
+static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
+                                   cl::desc("Expand double precision loads and "
+                                            "stores to their single precision "
+                                            "counterparts."));
+
 MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm)
   : MipsInstrInfo(tm,
                   tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J),
@@ -245,17 +251,23 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   default:
     return false;
   case Mips::RetRA:
-    ExpandRetRA(MBB, MI, Mips::RET);
+    expandRetRA(MBB, MI, Mips::RET);
     break;
   case Mips::BuildPairF64:
-    ExpandBuildPairF64(MBB, MI);
+    expandBuildPairF64(MBB, MI);
     break;
   case Mips::ExtractElementF64:
-    ExpandExtractElementF64(MBB, MI);
+    expandExtractElementF64(MBB, MI);
+    break;
+  case Mips::PseudoLDC1:
+    expandDPLoadStore(MBB, MI, Mips::LDC1, Mips::LWC1);
+    break;
+  case Mips::PseudoSDC1:
+    expandDPLoadStore(MBB, MI, Mips::SDC1, Mips::SWC1);
     break;
   case Mips::MIPSeh_return32:
   case Mips::MIPSeh_return64:
-    ExpandEhReturn(MBB, MI);
+    expandEhReturn(MBB, MI);
     break;
   }
 
@@ -263,9 +275,9 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   return true;
 }
 
-/// GetOppositeBranchOpc - Return the inverse of the specified
+/// getOppositeBranchOpc - Return the inverse of the specified
 /// opcode, e.g. turning BEQ to BNE.
-unsigned MipsSEInstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
+unsigned MipsSEInstrInfo::getOppositeBranchOpc(unsigned Opc) const {
   switch (Opc) {
   default:           llvm_unreachable("Illegal opcode!");
   case Mips::BEQ:    return Mips::BNE;
@@ -346,7 +358,7 @@ MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB,
   return Reg;
 }
 
-unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
+unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const {
   return (Opc == Mips::BEQ    || Opc == Mips::BNE    || Opc == Mips::BGTZ   ||
           Opc == Mips::BGEZ   || Opc == Mips::BLTZ   || Opc == Mips::BLEZ   ||
           Opc == Mips::BEQ64  || Opc == Mips::BNE64  || Opc == Mips::BGTZ64 ||
@@ -356,13 +368,13 @@ unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
          Opc : 0;
 }
 
-void MipsSEInstrInfo::ExpandRetRA(MachineBasicBlock &MBB,
+void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator I,
                                 unsigned Opc) const {
   BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA);
 }
 
-void MipsSEInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB,
+void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator I) const {
   unsigned DstReg = I->getOperand(0).getReg();
   unsigned SrcReg = I->getOperand(1).getReg();
@@ -377,7 +389,7 @@ void MipsSEInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB,
   BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg);
 }
 
-void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB,
+void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator I) const {
   unsigned DstReg = I->getOperand(0).getReg();
   unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
@@ -393,7 +405,57 @@ void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB,
     .addReg(HiReg);
 }
 
-void MipsSEInstrInfo::ExpandEhReturn(MachineBasicBlock &MBB,
+/// Add 4 to the displacement of operand MO.
+static void fixDisp(MachineOperand &MO) {
+  switch (MO.getType()) {
+  default:
+    llvm_unreachable("Unhandled operand type.");
+  case MachineOperand::MO_Immediate:
+    MO.setImm(MO.getImm() + 4);
+    break;
+  case MachineOperand::MO_GlobalAddress:
+  case MachineOperand::MO_ConstantPoolIndex:
+  case MachineOperand::MO_BlockAddress:
+  case MachineOperand::MO_TargetIndex:
+  case MachineOperand::MO_ExternalSymbol:
+    MO.setOffset(MO.getOffset() + 4);
+    break;
+  }
+}
+
+void MipsSEInstrInfo::expandDPLoadStore(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned OpcD, unsigned OpcS) const {
+  // If NoDPLoadStore is false, just change the opcode.
+  if (!NoDPLoadStore) {
+    genInstrWithNewOpc(OpcD, I);
+    return;
+  }
+
+  // Expand a double precision FP load or store to two single precision
+  // instructions.
+
+  const TargetRegisterInfo &TRI = getRegisterInfo();
+  const MachineOperand &ValReg = I->getOperand(0);
+  unsigned LoReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_fpeven);
+  unsigned HiReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_fpodd);
+
+  if (!TM.getSubtarget<MipsSubtarget>().isLittle())
+    std::swap(LoReg, HiReg);
+
+  // Create an instruction which loads from or stores to the lower memory
+  // address.
+  MachineInstrBuilder MIB = genInstrWithNewOpc(OpcS, I);
+  MIB->getOperand(0).setReg(LoReg);
+
+  // Create an instruction which loads from or stores to the higher memory
+  // address.
+  MIB = genInstrWithNewOpc(OpcS, I);
+  MIB->getOperand(0).setReg(HiReg);
+  fixDisp(MIB->getOperand(2));
+}
+
+void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const {
   // This pseudo instruction is generated as part of the lowering of
   // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index 0bf7876f0f..416fff8a60 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -65,7 +65,7 @@ public:
 
   virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
 
-  virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
+  virtual unsigned getOppositeBranchOpc(unsigned Opc) const;
 
   /// Adjust SP by Amount bytes.
   void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
@@ -79,15 +79,18 @@ public:
                          unsigned *NewImm) const;
 
 private:
-  virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
+  virtual unsigned getAnalyzableBrOpc(unsigned Opc) const;
 
-  void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+  void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    unsigned Opc) const;
-  void ExpandExtractElementF64(MachineBasicBlock &MBB,
+  void expandExtractElementF64(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator I) const;
-  void ExpandBuildPairF64(MachineBasicBlock &MBB,
+  void expandBuildPairF64(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator I) const;
-  void ExpandEhReturn(MachineBasicBlock &MBB,
+  void expandDPLoadStore(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator I, unsigned OpcD,
+                         unsigned OpcS) const;
+  void expandEhReturn(MachineBasicBlock &MBB,
                       MachineBasicBlock::iterator I) const;
 };
 
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index a876f1c7f0..89407351a0 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -72,6 +72,7 @@ MipsTargetMachine(const Target &T, StringRef TT,
     FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
     TLInfo(MipsTargetLowering::create(*this)),
     TSInfo(*this), JITInfo() {
+  initAsmInfo();
 }
 
 
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 67ca6b58e5..5f35edf219 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -72,7 +72,9 @@ NVPTXTargetMachine::NVPTXTargetMachine(
       Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()),
       InstrInfo(*this), TLInfo(*this), TSInfo(*this),
       FrameLowering(
-          *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {}
+          *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
+  initAsmInfo();
+}
 
 void NVPTXTargetMachine32::anchor() {}
 
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 71803cdac9..e5c5204708 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -27,6 +27,7 @@ add_llvm_target(PowerPCCodeGen
   PPCRegisterInfo.cpp
   PPCSubtarget.cpp
   PPCTargetMachine.cpp
+  PPCTargetObjectFile.cpp
   PPCTargetTransformInfo.cpp
   PPCSelectionDAGInfo.cpp
   )
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index ec2657403e..b1ac4a6f27 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -22,7 +22,7 @@
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
   switch (Kind) {
   default:
     llvm_unreachable("Unknown fixup kind!");
@@ -50,6 +50,29 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
   }
 }
 
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+  case FK_Data_1:
+    return 1;
+  case FK_Data_2:
+  case PPC::fixup_ppc_ha16:
+  case PPC::fixup_ppc_lo16:
+  case PPC::fixup_ppc_lo16_ds:
+    return 2;
+  case FK_Data_4:
+  case PPC::fixup_ppc_brcond14:
+  case PPC::fixup_ppc_br24:
+    return 4;
+  case FK_Data_8:
+    return 8;
+  case PPC::fixup_ppc_tlsreg:
+  case PPC::fixup_ppc_nofixup:
+    return 0;
+  }
+}
+
 namespace {
 class PPCMachObjectWriter : public MCMachObjectTargetWriter {
 public:
@@ -77,9 +100,9 @@ public:
       // name                    offset  bits  flags
       { "fixup_ppc_br24",        6,      24,   MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_ppc_brcond14",    16,     14,   MCFixupKindInfo::FKF_IsPCRel },
-      { "fixup_ppc_lo16",        16,     16,   0 },
-      { "fixup_ppc_ha16",        16,     16,   0 },
-      { "fixup_ppc_lo16_ds",     16,     14,   0 },
+      { "fixup_ppc_lo16",         0,     16,   0 },
+      { "fixup_ppc_ha16",         0,     16,   0 },
+      { "fixup_ppc_lo16_ds",      0,     14,   0 },
       { "fixup_ppc_tlsreg",       0,      0,   0 },
       { "fixup_ppc_nofixup",      0,      0,   0 }
     };
@@ -98,12 +121,13 @@ public:
     if (!Value) return;           // Doesn't change encoding.
 
     unsigned Offset = Fixup.getOffset();
+    unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
 
     // For each byte of the fragment that the fixup touches, mask in the bits
     // from the fixup value. The Value has been "split up" into the appropriate
     // bitfields above.
-    for (unsigned i = 0; i != 4; ++i)
-      Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
+    for (unsigned i = 0; i != NumBytes; ++i)
+      Data[Offset + i] |= uint8_t((Value >> ((NumBytes - i - 1)*8)) & 0xff);
   }
 
   bool mayNeedRelaxation(const MCInst &Inst) const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 7a84723ed5..2508cc2f37 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -33,26 +33,9 @@ namespace {
     virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target,
                                                     const MCFixup &Fixup,
                                                     bool IsPCRel) const;
-    virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
-
-    virtual void sortRelocs(const MCAssembler &Asm,
-                            std::vector<ELFRelocationEntry> &Relocs);
-  };
-
-  class PPCELFRelocationEntry : public ELFRelocationEntry {
-  public:
-    PPCELFRelocationEntry(const ELFRelocationEntry &RE);
-    bool operator<(const PPCELFRelocationEntry &RE) const {
-      return (RE.r_offset < r_offset ||
-              (RE.r_offset == r_offset && RE.Type > Type));
-    }
   };
 }
 
-PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE)
-  : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol,
-                       RE.r_addend, *RE.Fixup) {}
-
 PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
   : MCELFObjectTargetWriter(Is64Bit, OSABI,
                             Is64Bit ?  ELF::EM_PPC64 : ELF::EM_PPC,
@@ -240,47 +223,6 @@ const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Targe
   return NULL;
 }
 
-void PPCELFObjectWriter::
-adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
-  switch ((unsigned)Fixup.getKind()) {
-    case PPC::fixup_ppc_ha16:
-    case PPC::fixup_ppc_lo16:
-    case PPC::fixup_ppc_lo16_ds:
-      RelocOffset += 2;
-      break;
-    default:
-      break;
-  }
-}
-
-// The standard sorter only sorts on the r_offset field, but PowerPC can
-// have multiple relocations at the same offset.  Sort secondarily on the
-// relocation type to avoid nondeterminism.
-void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm,
-                                    std::vector<ELFRelocationEntry> &Relocs) {
-
-  // Copy to a temporary vector of relocation entries having a different
-  // sort function.
-  std::vector<PPCELFRelocationEntry> TmpRelocs;
-  
-  for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin();
-       R != Relocs.end(); ++R) {
-    TmpRelocs.push_back(PPCELFRelocationEntry(*R));
-  }
-
-  // Sort in place by ascending r_offset and descending r_type.
-  array_pod_sort(TmpRelocs.begin(), TmpRelocs.end());
-
-  // Copy back to the original vector.
-  unsigned I = 0;
-  for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin();
-       R != TmpRelocs.end(); ++R, ++I) {
-    Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type,
-                                   R->Symbol, R->r_addend, *R->Fixup);
-  }
-}
-
-
 MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
                                                bool Is64Bit,
                                                uint8_t OSABI) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 2223cd623c..3f04a4ec0a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -142,7 +142,7 @@ unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo,
   if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
   
   // Add a fixup for the branch target.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+  Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_ha16));
   return 0;
 }
@@ -153,7 +153,7 @@ unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo,
   if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
   
   // Add a fixup for the branch target.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+  Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_lo16));
   return 0;
 }
@@ -170,7 +170,7 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
     return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
   
   // Add a fixup for the displacement field.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+  Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_lo16));
   return RegBits;
 }
@@ -188,7 +188,7 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
     return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
   
   // Add a fixup for the displacement field.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+  Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_lo16_ds));
   return RegBits;
 }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index a01fa44a9a..2da30f9038 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -58,7 +58,7 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
   return X;
 }
 
-static MCAsmInfo *createPPCMCAsmInfo(StringRef TT) {
+static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
   Triple TheTriple(TT);
   bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
 
@@ -69,9 +69,10 @@ static MCAsmInfo *createPPCMCAsmInfo(StringRef TT) {
     MAI = new PPCLinuxMCAsmInfo(isPPC64);
 
   // Initial state of the frame pointer is R1.
-  MachineLocation Dst(MachineLocation::VirtualFP);
-  MachineLocation Src(isPPC64? PPC::X1 : PPC::R1, 0);
-  MAI->addInitialFrameState(0, Dst, Src);
+  unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
+  MCCFIInstruction Inst =
+      MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(Reg, true), 0);
+  MAI->addInitialFrameState(Inst);
 
   return MAI;
 }
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index cd70aeed87..1f0c3c4b5d 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1168,6 +1168,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
         FuncInfo->addMustSaveCR(Reg);
       } else {
         CRSpilled = true;
+        FuncInfo->setSpillsCR();
 
 	// 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
 	// the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 3819bc8f15..eee2bb87de 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16,6 +16,7 @@
 #include "PPCMachineFunctionInfo.h"
 #include "PPCPerfectShuffle.h"
 #include "PPCTargetMachine.h"
+#include "PPCTargetObjectFile.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -64,6 +65,9 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
   if (TM.getSubtargetImpl()->isDarwin())
     return new TargetLoweringObjectFileMachO();
 
+  if (TM.getSubtargetImpl()->isSVR4ABI())
+    return new PPC64LinuxTargetObjectFile();
+
   return new TargetLoweringObjectFileELF();
 }
 
@@ -662,6 +666,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
+  case PPCISD::SC:              return "PPCISD::SC";
   }
 }
 
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index b219de38d5..2a1cc121da 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -175,61 +175,61 @@ namespace llvm {
 
       /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
       /// TLS model, produces an ADDIS8 instruction that adds the GOT
-      /// base to sym@got@tprel@ha.
+      /// base to sym\@got\@tprel\@ha.
       ADDIS_GOT_TPREL_HA,
 
       /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
       /// TLS model, produces a LD instruction with base register G8RReg
-      /// and offset sym@got@tprel@l.  This completes the addition that
+      /// and offset sym\@got\@tprel\@l.  This completes the addition that
       /// finds the offset of "sym" relative to the thread pointer.
       LD_GOT_TPREL_L,
 
       /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
       /// model, produces an ADD instruction that adds the contents of
       /// G8RReg to the thread pointer.  Symbol contains a relocation
-      /// sym@tls which is to be replaced by the thread pointer and
+      /// sym\@tls which is to be replaced by the thread pointer and
       /// identifies to the linker that the instruction is part of a
       /// TLS sequence.
       ADD_TLS,
 
       /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS
       /// model, produces an ADDIS8 instruction that adds the GOT base
-      /// register to sym@got@tlsgd@ha.
+      /// register to sym\@got\@tlsgd\@ha.
       ADDIS_TLSGD_HA,
 
       /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
       /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym@got@tlsgd@l.
+      /// sym\@got\@tlsgd\@l.
       ADDI_TLSGD_L,
 
       /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
-      /// model, produces a call to __tls_get_addr(sym@tlsgd).
+      /// model, produces a call to __tls_get_addr(sym\@tlsgd).
       GET_TLS_ADDR,
 
       /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
       /// model, produces an ADDIS8 instruction that adds the GOT base
-      /// register to sym@got@tlsld@ha.
+      /// register to sym\@got\@tlsld\@ha.
       ADDIS_TLSLD_HA,
 
       /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
       /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym@got@tlsld@l.
+      /// sym\@got\@tlsld\@l.
       ADDI_TLSLD_L,
 
       /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
-      /// model, produces a call to __tls_get_addr(sym@tlsld).
+      /// model, produces a call to __tls_get_addr(sym\@tlsld).
       GET_TLSLD_ADDR,
 
       /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
       /// local-dynamic TLS model, produces an ADDIS8 instruction
-      /// that adds X3 to sym@dtprel@ha.  The Chain operand is needed 
+      /// that adds X3 to sym\@dtprel\@ha.  The Chain operand is needed
       /// to tie this in place following a copy to %X3 from the result
       /// of a GET_TLSLD_ADDR.
       ADDIS_DTPREL_HA,
 
       /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
       /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym@got@dtprel@l.
+      /// sym\@got\@dtprel\@l.
       ADDI_DTPREL_L,
 
       /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
@@ -238,6 +238,10 @@ namespace llvm {
       /// optimizations due to constant folding.
       VADD_SPLAT,
 
+      /// CHAIN = SC CHAIN, Imm128 - System call.  The 7-bit unsigned
+      /// operand identifies the operating system entry point.
+      SC,
+
       /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
       /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
       /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
@@ -266,16 +270,16 @@ namespace llvm {
 
       /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
       /// produces an ADDIS8 instruction that adds the TOC base register to
-      /// sym@toc@ha.
+      /// sym\@toc\@ha.
       ADDIS_TOC_HA,
 
       /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model,
       /// produces a LD instruction with base register G8RReg and offset
-      /// sym@toc@l.  Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+      /// sym\@toc\@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
       LD_TOC_L,
 
       /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
-      /// an ADDI8 instruction that adds G8RReg to sym@toc@l.
+      /// an ADDI8 instruction that adds G8RReg to sym\@toc\@l.
       /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
       ADDI_TOC_L
     };
@@ -450,7 +454,7 @@ namespace llvm {
     /// It returns EVT::Other if the type should be determined using generic
     /// target-independent logic.
     virtual EVT
-    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 
+    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
                         bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                         MachineFunction &MF) const;
 
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index b6f4e85215..a24405851c 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -145,6 +145,19 @@ class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
   let Inst{31}    = lk;
 }
 
+// 1.7.3 SC-Form
+class SCForm<bits<6> opcode, bits<1> xo,
+                     dag OOL, dag IOL, string asmstr, InstrItinClass itin,
+                     list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<7>  LEV;
+
+  let Pattern = pattern;
+
+  let Inst{20-26} = LEV;
+  let Inst{30}    = xo;
+}
+
 // 1.7.4 D-Form
 class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
                  InstrItinClass itin, list<dag> pattern> 
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 4763069f25..9c39b34ab0 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -162,6 +162,10 @@ def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
                                 SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
                                 [SDNPHasChain, SDNPSideEffect]>;
 
+def SDT_PPCsc     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def PPCsc         : SDNode<"PPCISD::SC", SDT_PPCsc,
+                           [SDNPHasChain, SDNPSideEffect]>;
+
 def PPCvcmp       : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
 def PPCvcmp_o     : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
 
@@ -987,6 +991,12 @@ let isBranch = 1, isTerminator = 1 in {
                         "#EH_SjLj_Setup\t$dst", []>;
 }
 
+// System call.
+let PPC970_Unit = 7 in {
+  def SC     : SCForm<17, 1, (outs), (ins i32imm:$lev),
+                      "sc $lev", BrB, [(PPCsc (i32 imm:$lev))]>;
+}
+
 // DCB* instructions.
 def DCBA   : DCB_Form<758, 0, (outs), (ins memrr:$dst),
                       "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 14dc794195..0b099edff4 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -48,6 +48,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
   // The binutils for the BG/P are too old for CFI.
   if (Subtarget.isBGP())
     setMCUseCFI(false);
+  initAsmInfo();
 }
 
 void PPC32TargetMachine::anchor() { }
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
new file mode 100644
index 0000000000..90e4f15452
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -0,0 +1,57 @@
+//===-- PPCTargetObjectFile.cpp - PPC Object Info -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetObjectFile.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+void
+PPC64LinuxTargetObjectFile::
+Initialize(MCContext &Ctx, const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+}
+
+const MCSection * PPC64LinuxTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+
+  const MCSection *DefaultSection = 
+    TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM);
+
+  if (DefaultSection != ReadOnlySection)
+    return DefaultSection;
+
+  // Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI
+  // when we have a constant that contains global relocations.  This is
+  // necessary because of this ABI's handling of pointers to functions in
+  // a shared library.  The address of a function is actually the address
+  // of a function descriptor, which resides in the .opd section.  Generated
+  // code uses the descriptor directly rather than going via the GOT as some
+  // other ABIs do, which means that initialized function pointers must
+  // reference the descriptor.  The linker must convert copy relocs of
+  // pointers to functions in shared libraries into dynamic relocations,
+  // because of an ordering problem with initialization of copy relocs and
+  // PLT entries.  The dynamic relocation will be initialized by the dynamic
+  // linker, so we must use DataRelROSection instead of ReadOnlySection.
+  // For more information, see the description of ELIMINATE_COPY_RELOCS in
+  // GNU ld.
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+
+  if (GVar && GVar->isConstant() &&
+      (GVar->getInitializer()->getRelocationInfo() ==
+       Constant::GlobalRelocations))
+    return DataRelROSection;
+
+  return DefaultSection;
+}
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h
new file mode 100644
index 0000000000..9203e23574
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -0,0 +1,32 @@
+//===-- PPCTargetObjectFile.h - PPC Object Info -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_PPC_TARGETOBJECTFILE_H
+#define LLVM_TARGET_PPC_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+  /// PPC64LinuxTargetObjectFile - This implementation is used for
+  /// 64-bit PowerPC Linux.
+  class PPC64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+
+    virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+    virtual const MCSection *
+    SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                           Mangler *Mang, const TargetMachine &TM) const;
+  };
+
+}  // end namespace llvm
+
+#endif
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 31fbf32d0c..7175ec941a 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -70,6 +70,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
     InstrInfo = new SIInstrInfo(*this);
     TLInfo = new SITargetLowering(*this);
   }
+  initAsmInfo();
 }
 
 AMDGPUTargetMachine::~AMDGPUTargetMachine() {
diff --git a/lib/Target/R600/AMDILDeviceInfo.cpp b/lib/Target/R600/AMDILDeviceInfo.cpp
index 178795936a..126514b976 100644
--- a/lib/Target/R600/AMDILDeviceInfo.cpp
+++ b/lib/Target/R600/AMDILDeviceInfo.cpp
@@ -81,7 +81,8 @@ AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
     return new AMDGPUNIDevice(ptr);
   } else if (deviceName == "SI" ||
              deviceName == "tahiti" || deviceName == "pitcairn" ||
-             deviceName == "verde"  || deviceName == "oland") {
+             deviceName == "verde"  || deviceName == "oland" ||
+	     deviceName == "hainan") {
     return new AMDGPUSIDevice(ptr);
   } else {
 #if DEBUG
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 45d009c2a0..6f66aa898a 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -78,7 +78,7 @@ static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
   if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
     return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
   } else {
-    return createR600MCCodeEmitter(MCII, MRI, STI, Ctx);
+    return createR600MCCodeEmitter(MCII, MRI);
   }
 }
 
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index 09d0d5b61c..95c572c21b 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -32,9 +32,7 @@ class raw_ostream;
 extern Target TheAMDGPUTarget;
 
 MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
-                                       const MCRegisterInfo &MRI,
-                                       const MCSubtargetInfo &STI,
-                                       MCContext &Ctx);
+                                       const MCRegisterInfo &MRI);
 
 MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
                                      const MCRegisterInfo &MRI,
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 271a974734..3404844435 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -35,14 +35,11 @@ class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
   void operator=(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
   const MCInstrInfo &MCII;
   const MCRegisterInfo &MRI;
-  const MCSubtargetInfo &STI;
-  MCContext &Ctx;
 
 public:
 
-  R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
-                    const MCSubtargetInfo &sti, MCContext &ctx)
-    : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
+  R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri)
+    : MCII(mcii), MRI(mri) { }
 
   /// \brief Encode the instruction and write it to the OS.
   virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -98,10 +95,8 @@ enum TextureTypes {
 };
 
 MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
-                                           const MCRegisterInfo &MRI,
-                                           const MCSubtargetInfo &STI,
-                                           MCContext &Ctx) {
-  return new R600MCCodeEmitter(MCII, MRI, STI, Ctx);
+                                           const MCRegisterInfo &MRI) {
+  return new R600MCCodeEmitter(MCII, MRI);
 }
 
 void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index 5ee1c0d8ae..0cbe919d81 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -45,3 +45,4 @@ def : Proc<"tahiti",     SI_Itin, [Feature64BitPtr, FeatureFP64]>;
 def : Proc<"pitcairn",   SI_Itin, [Feature64BitPtr, FeatureFP64]>;
 def : Proc<"verde",      SI_Itin, [Feature64BitPtr, FeatureFP64]>;
 def : Proc<"oland",      SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"hainan",     SI_Itin, [Feature64BitPtr, FeatureFP64]>;
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 60bceb708f..3e7a24aecf 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -37,6 +37,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
     InstrInfo(Subtarget),
     TLInfo(*this), TSInfo(*this),
     FrameLowering(Subtarget) {
+  initAsmInfo();
 }
 
 namespace {
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index c7725a1459..7f2159f79e 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -250,46 +251,6 @@ public:
   bool isS32Imm() const { return isImm(-(1LL << 31), (1LL << 31) - 1); }
 };
 
-// Maps of asm register numbers to LLVM register numbers, with 0 indicating
-// an invalid register.  We don't use register class directly because that
-// specifies the allocation order.
-static const unsigned GR32Regs[] = {
-  SystemZ::R0W, SystemZ::R1W, SystemZ::R2W, SystemZ::R3W,
-  SystemZ::R4W, SystemZ::R5W, SystemZ::R6W, SystemZ::R7W,
-  SystemZ::R8W, SystemZ::R9W, SystemZ::R10W, SystemZ::R11W,
-  SystemZ::R12W, SystemZ::R13W, SystemZ::R14W, SystemZ::R15W
-};
-static const unsigned GR64Regs[] = {
-  SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D,
-  SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D,
-  SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D,
-  SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D
-};
-static const unsigned GR128Regs[] = {
-  SystemZ::R0Q, 0, SystemZ::R2Q, 0,
-  SystemZ::R4Q, 0, SystemZ::R6Q, 0,
-  SystemZ::R8Q, 0, SystemZ::R10Q, 0,
-  SystemZ::R12Q, 0, SystemZ::R14Q, 0
-};
-static const unsigned FP32Regs[] = {
-  SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S,
-  SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S,
-  SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
-  SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S
-};
-static const unsigned FP64Regs[] = {
-  SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D,
-  SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D,
-  SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
-  SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D
-};
-static const unsigned FP128Regs[] = {
-  SystemZ::F0Q, SystemZ::F1Q, 0, 0,
-  SystemZ::F4Q, SystemZ::F5Q, 0, 0,
-  SystemZ::F8Q, SystemZ::F9Q, 0, 0,
-  SystemZ::F12Q, SystemZ::F13Q, 0, 0
-};
-
 class SystemZAsmParser : public MCTargetAsmParser {
 #define GET_ASSEMBLER_HEADER
 #include "SystemZGenAsmMatcher.inc"
@@ -349,25 +310,28 @@ public:
   // Used by the TableGen code to parse particular operand types.
   OperandMatchResultTy
   parseGR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-    return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::GR32Reg);
+    return parseRegister(Operands, 'r', SystemZMC::GR32Regs,
+                         SystemZOperand::GR32Reg);
   }
   OperandMatchResultTy
   parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-    return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::GR64Reg);
+    return parseRegister(Operands, 'r', SystemZMC::GR64Regs,
+                         SystemZOperand::GR64Reg);
   }
   OperandMatchResultTy
   parseGR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-    return parseRegister(Operands, 'r', GR128Regs, SystemZOperand::GR128Reg);
+    return parseRegister(Operands, 'r', SystemZMC::GR128Regs,
+                         SystemZOperand::GR128Reg);
   }
   OperandMatchResultTy
   parseADDR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-    return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::ADDR32Reg,
-                         true);
+    return parseRegister(Operands, 'r', SystemZMC::GR32Regs,
+                         SystemZOperand::ADDR32Reg, true);
   }
   OperandMatchResultTy
   parseADDR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-    return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::ADDR64Reg,
-                         true);
+    return parseRegister(Operands, 'r', SystemZMC::GR64Regs,
+                         SystemZOperand::ADDR64Reg, true);
   }
   OperandMatchResultTy
   parseADDR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
@@ -375,30 +339,47 @@ public:
   }
   OperandMatchResultTy
   parseFP32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-    return parseRegister(Operands, 'f', FP32Regs, SystemZOperand::FP32Reg);
+    return parseRegister(Operands, 'f', SystemZMC::FP32Regs,
+                         SystemZOperand::FP32Reg);
   }
   OperandMatchResultTy
   parseFP64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-    return parseRegister(Operands, 'f', FP64Regs, SystemZOperand::FP64Reg);
+    return parseRegister(Operands, 'f', SystemZMC::FP64Regs,
+                         SystemZOperand::FP64Reg);
   }
   OperandMatchResultTy
   parseFP128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-    return parseRegister(Operands, 'f', FP128Regs, SystemZOperand::FP128Reg);
+    return parseRegister(Operands, 'f', SystemZMC::FP128Regs,
+                         SystemZOperand::FP128Reg);
   }
   OperandMatchResultTy
   parseBDAddr32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-    return parseAddress(Operands, GR32Regs, SystemZOperand::ADDR32Reg, false);
+    return parseAddress(Operands, SystemZMC::GR32Regs,
+                        SystemZOperand::ADDR32Reg, false);
   }
   OperandMatchResultTy
   parseBDAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-    return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, false);
+    return parseAddress(Operands, SystemZMC::GR64Regs,
+                        SystemZOperand::ADDR64Reg, false);
   }
   OperandMatchResultTy
   parseBDXAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-    return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, true);
+    return parseAddress(Operands, SystemZMC::GR64Regs,
+                        SystemZOperand::ADDR64Reg, true);
   }
   OperandMatchResultTy
   parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+  OperandMatchResultTy
+  parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+             int64_t MinVal, int64_t MaxVal);
+  OperandMatchResultTy
+  parsePCRel16(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1);
+  }
+  OperandMatchResultTy
+  parsePCRel32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1);
+  }
 };
 }
 
@@ -502,7 +483,8 @@ SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
 
     // Parse the first register.
     Register Reg;
-    OperandMatchResultTy Result = parseRegister(Reg, 'r', GR64Regs, true);
+    OperandMatchResultTy Result = parseRegister(Reg, 'r', SystemZMC::GR64Regs,
+                                                true);
     if (Result != MatchOperand_Success)
       return Result;
 
@@ -517,7 +499,7 @@ SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
       }
 
       Index = Reg.Number;
-      Result = parseRegister(Reg, 'r', GR64Regs, true);
+      Result = parseRegister(Reg, 'r', SystemZMC::GR64Regs, true);
       if (Result != MatchOperand_Success)
         return Result;
     }
@@ -546,9 +528,9 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
   if (parseRegister(Reg))
     return Error(Reg.StartLoc, "register expected");
   if (Reg.Prefix == 'r' && Reg.Number < 16)
-    RegNo = GR64Regs[Reg.Number];
+    RegNo = SystemZMC::GR64Regs[Reg.Number];
   else if (Reg.Prefix == 'f' && Reg.Number < 16)
-    RegNo = FP64Regs[Reg.Number];
+    RegNo = SystemZMC::FP64Regs[Reg.Number];
   else
     return Error(Reg.StartLoc, "invalid register");
   StartLoc = Reg.StartLoc;
@@ -683,6 +665,37 @@ parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   return MatchOperand_Success;
 }
 
+SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::
+parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+           int64_t MinVal, int64_t MaxVal) {
+  MCContext &Ctx = getContext();
+  MCStreamer &Out = getStreamer();
+  const MCExpr *Expr;
+  SMLoc StartLoc = Parser.getTok().getLoc();
+  if (getParser().parseExpression(Expr))
+    return MatchOperand_NoMatch;
+
+  // For consistency with the GNU assembler, treat immediates as offsets
+  // from ".".
+  if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) {
+    int64_t Value = CE->getValue();
+    if ((Value & 1) || Value < MinVal || Value > MaxVal) {
+      Error(StartLoc, "offset out of range");
+      return MatchOperand_ParseFail;
+    }
+    MCSymbol *Sym = Ctx.CreateTempSymbol();
+    Out.EmitLabel(Sym);
+    const MCExpr *Base = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
+                                                 Ctx);
+    Expr = Value == 0 ? Base : MCBinaryExpr::CreateAdd(Base, Expr, Ctx);
+  }
+
+  SMLoc EndLoc =
+    SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
+  return MatchOperand_Success;
+}
+
 // Force static initialization.
 extern "C" void LLVMInitializeSystemZAsmParser() {
   RegisterMCAsmParser<SystemZAsmParser> X(TheSystemZTarget);
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 67b17fcc59..757d5a8898 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -4,6 +4,7 @@ tablegen(LLVM SystemZGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(LLVM SystemZGenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM SystemZGenCallingConv.inc -gen-callingconv)
 tablegen(LLVM SystemZGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM SystemZGenDisassemblerTables.inc -gen-disassembler)
 tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
 tablegen(LLVM SystemZGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM SystemZGenRegisterInfo.inc -gen-register-info)
@@ -27,6 +28,7 @@ add_llvm_target(SystemZCodeGen
 add_dependencies(LLVMSystemZCodeGen intrinsics_gen)
 
 add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/SystemZ/Disassembler/CMakeLists.txt b/lib/Target/SystemZ/Disassembler/CMakeLists.txt
new file mode 100644
index 0000000000..5bc1859816
--- /dev/null
+++ b/lib/Target/SystemZ/Disassembler/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMSystemZDisassembler
+  SystemZDisassembler.cpp
+  )
+
+add_dependencies(LLVMSystemZDisassembler SystemZCommonTableGen)
diff --git a/lib/Target/SystemZ/Disassembler/LLVMBuild.txt b/lib/Target/SystemZ/Disassembler/LLVMBuild.txt
new file mode 100644
index 0000000000..c3081f5447
--- /dev/null
+++ b/lib/Target/SystemZ/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===-- ./lib/Target/SystemZ/Disassembler/LLVMBuild.txt ---------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = SystemZDisassembler
+parent = SystemZ
+required_libraries = MC Support SystemZDesc SystemZInfo
+add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/Disassembler/Makefile b/lib/Target/SystemZ/Disassembler/Makefile
new file mode 100644
index 0000000000..efc4cc8e9c
--- /dev/null
+++ b/lib/Target/SystemZ/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===-- lib/Target/SystemZ/Disassembler/Makefile -----------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSystemZDisassembler
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
new file mode 100644
index 0000000000..9a9de78224
--- /dev/null
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -0,0 +1,301 @@
+//===-- SystemZDisassembler.cpp - Disassembler for SystemZ ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+class SystemZDisassembler : public MCDisassembler {
+public:
+  SystemZDisassembler(const MCSubtargetInfo &STI)
+    : MCDisassembler(STI) {}
+  virtual ~SystemZDisassembler() {}
+
+  // Override MCDisassembler.
+  virtual DecodeStatus getInstruction(MCInst &instr,
+                                      uint64_t &size,
+                                      const MemoryObject &region,
+                                      uint64_t address,
+                                      raw_ostream &vStream,
+                                      raw_ostream &cStream) const LLVM_OVERRIDE;
+};
+} // end anonymous namespace
+
+static MCDisassembler *createSystemZDisassembler(const Target &T,
+                                                 const MCSubtargetInfo &STI) {
+  return new SystemZDisassembler(STI);
+}
+
+extern "C" void LLVMInitializeSystemZDisassembler() {
+  // Register the disassembler.
+  TargetRegistry::RegisterMCDisassembler(TheSystemZTarget,
+                                         createSystemZDisassembler);
+}
+
+static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                        const unsigned *Regs,
+                                        bool isAddress = false) {
+  assert(RegNo < 16 && "Invalid register");
+  if (!isAddress || RegNo) {
+    RegNo = Regs[RegNo];
+    if (RegNo == 0)
+      return MCDisassembler::Fail;
+  }
+  Inst.addOperand(MCOperand::CreateReg(RegNo));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs);
+}
+
+static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                                uint64_t Address,
+                                                const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs);
+}
+
+static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, true);
+}
+
+static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs);
+}
+
+static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs);
+}
+
+static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                                uint64_t Address,
+                                                const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs);
+}
+
+template<unsigned N>
+static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) {
+  assert(isUInt<N>(Imm) && "Invalid immediate");
+  Inst.addOperand(MCOperand::CreateImm(Imm));
+  return MCDisassembler::Success;
+}
+
+template<unsigned N>
+static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) {
+  assert(isUInt<N>(Imm) && "Invalid immediate");
+  Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm)));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeAccessRegOperand(MCInst &Inst, uint64_t Imm,
+                                           uint64_t Address,
+                                           const void *Decoder) {
+  return decodeUImmOperand<4>(Inst, Imm);
+}
+
+static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm,
+                                       uint64_t Address, const void *Decoder) {
+  return decodeUImmOperand<4>(Inst, Imm);
+}
+
+static DecodeStatus decodeU6ImmOperand(MCInst &Inst, uint64_t Imm,
+                                       uint64_t Address, const void *Decoder) {
+  return decodeUImmOperand<6>(Inst, Imm);
+}
+
+static DecodeStatus decodeU8ImmOperand(MCInst &Inst, uint64_t Imm,
+                                       uint64_t Address, const void *Decoder) {
+  return decodeUImmOperand<8>(Inst, Imm);
+}
+
+static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm,
+                                        uint64_t Address, const void *Decoder) {
+  return decodeUImmOperand<16>(Inst, Imm);
+}
+
+static DecodeStatus decodeU32ImmOperand(MCInst &Inst, uint64_t Imm,
+                                        uint64_t Address, const void *Decoder) {
+  return decodeUImmOperand<32>(Inst, Imm);
+}
+
+static DecodeStatus decodeS8ImmOperand(MCInst &Inst, uint64_t Imm,
+                                       uint64_t Address, const void *Decoder) {
+  return decodeSImmOperand<8>(Inst, Imm);
+}
+
+static DecodeStatus decodeS16ImmOperand(MCInst &Inst, uint64_t Imm,
+                                        uint64_t Address, const void *Decoder) {
+  return decodeSImmOperand<16>(Inst, Imm);
+}
+
+static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm,
+                                        uint64_t Address, const void *Decoder) {
+  return decodeSImmOperand<32>(Inst, Imm);
+}
+
+template<unsigned N>
+static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm,
+                                       uint64_t Address) {
+  assert(isUInt<N>(Imm) && "Invalid PC-relative offset");
+  Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm) * 2 + Address));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus decodePC16DBLOperand(MCInst &Inst, uint64_t Imm,
+                                         uint64_t Address,
+                                         const void *Decoder) {
+  return decodePCDBLOperand<16>(Inst, Imm, Address);
+}
+
+static DecodeStatus decodePC32DBLOperand(MCInst &Inst, uint64_t Imm,
+                                         uint64_t Address,
+                                         const void *Decoder) {
+  return decodePCDBLOperand<32>(Inst, Imm, Address);
+}
+
+static DecodeStatus decodeBDAddr12Operand(MCInst &Inst, uint64_t Field,
+                                          const unsigned *Regs) {
+  uint64_t Base = Field >> 12;
+  uint64_t Disp = Field & 0xfff;
+  assert(Base < 16 && "Invalid BDAddr12");
+  Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
+  Inst.addOperand(MCOperand::CreateImm(Disp));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDAddr20Operand(MCInst &Inst, uint64_t Field,
+                                          const unsigned *Regs) {
+  uint64_t Base = Field >> 20;
+  uint64_t Disp = ((Field << 12) & 0xff000) | ((Field >> 8) & 0xfff);
+  assert(Base < 16 && "Invalid BDAddr20");
+  Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
+  Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp)));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDXAddr12Operand(MCInst &Inst, uint64_t Field,
+                                           const unsigned *Regs) {
+  uint64_t Index = Field >> 16;
+  uint64_t Base = (Field >> 12) & 0xf;
+  uint64_t Disp = Field & 0xfff;
+  assert(Index < 16 && "Invalid BDXAddr12");
+  Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
+  Inst.addOperand(MCOperand::CreateImm(Disp));
+  Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index]));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDXAddr20Operand(MCInst &Inst, uint64_t Field,
+                                           const unsigned *Regs) {
+  uint64_t Index = Field >> 24;
+  uint64_t Base = (Field >> 20) & 0xf;
+  uint64_t Disp = ((Field & 0xfff00) >> 8) | ((Field & 0xff) << 12);
+  assert(Index < 16 && "Invalid BDXAddr20");
+  Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
+  Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp)));
+  Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index]));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field,
+                                                uint64_t Address,
+                                                const void *Decoder) {
+  return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR32Regs);
+}
+
+static DecodeStatus decodeBDAddr32Disp20Operand(MCInst &Inst, uint64_t Field,
+                                                uint64_t Address,
+                                                const void *Decoder) {
+  return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR32Regs);
+}
+
+static DecodeStatus decodeBDAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
+                                                uint64_t Address,
+                                                const void *Decoder) {
+  return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus decodeBDAddr64Disp20Operand(MCInst &Inst, uint64_t Field,
+                                                uint64_t Address,
+                                                const void *Decoder) {
+  return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus decodeBDXAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
+  return decodeBDXAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
+  return decodeBDXAddr20Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+#include "SystemZGenDisassemblerTables.inc"
+
+DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+                                                 const MemoryObject &Region,
+                                                 uint64_t Address,
+                                                 raw_ostream &os,
+                                                 raw_ostream &cs) const {
+  // Get the first two bytes of the instruction.
+  uint8_t Bytes[6];
+  Size = 0;
+  if (Region.readBytes(Address, 2, Bytes, 0) == -1)
+    return MCDisassembler::Fail;
+
+  // The top 2 bits of the first byte specify the size.
+  const uint8_t *Table;
+  if (Bytes[0] < 0x40) {
+    Size = 2;
+    Table = DecoderTable16;
+  } else if (Bytes[0] < 0xc0) {
+    Size = 4;
+    Table = DecoderTable32;
+  } else {
+    Size = 6;
+    Table = DecoderTable48;
+  }
+
+  // Read any remaining bytes.
+  if (Size > 2 && Region.readBytes(Address + 2, Size - 2, Bytes + 2, 0) == -1)
+    return MCDisassembler::Fail;
+
+  // Construct the instruction.
+  uint64_t Inst = 0;
+  for (uint64_t I = 0; I < Size; ++I)
+    Inst = (Inst << 8) | Bytes[I];
+
+  return decodeInstruction(Table, MI, Inst, Address, this, STI);
+}
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index d73cf49808..369802b2b8 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -114,10 +114,26 @@ void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum,
   O << "%a" << (unsigned int)Value;
 }
 
+void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.isImm()) {
+    O << "0x";
+    O.write_hex(MO.getImm());
+  } else
+    O << *MO.getExpr();
+}
+
 void SystemZInstPrinter::printCallOperand(const MCInst *MI, int OpNum,
                                           raw_ostream &O) {
-  printOperand(MI, OpNum, O);
-  O << "@PLT";
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.isImm()) {
+    O << "0x";
+    O.write_hex(MO.getImm());
+  } else {
+    O << *MO.getExpr();
+    O << "@PLT";
+  }
 }
 
 void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index b82e79d93c..f77282efcb 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -56,6 +56,7 @@ private:
   void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O);
 
diff --git a/lib/Target/SystemZ/LLVMBuild.txt b/lib/Target/SystemZ/LLVMBuild.txt
index aba0de27ac..95e657f7bd 100644
--- a/lib/Target/SystemZ/LLVMBuild.txt
+++ b/lib/Target/SystemZ/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
 
 [component_0]
 type = TargetGroup
@@ -24,6 +24,7 @@ name = SystemZ
 parent = Target
 has_asmparser = 1
 has_asmprinter = 1
+has_disassembler = 1
 has_jit = 1
 
 [component_1]
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index ea2250f546..7721b1ffab 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -45,30 +45,43 @@ private:
 
   // Called by the TableGen code to get the binary encoding of operand
   // MO in MI.  Fixups is the list of fixups against MI.
-  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+  uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
                              SmallVectorImpl<MCFixup> &Fixups) const;
 
+  // Called by the TableGen code to get the binary encoding of an address.
+  // The index, if any, is encoded first, followed by the base,
+  // followed by the displacement.  In a 20-bit displacement,
+  // the low 12 bits are encoded before the high 8 bits.
+  uint64_t getBDAddr12Encoding(const MCInst &MI, unsigned OpNum,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+  uint64_t getBDAddr20Encoding(const MCInst &MI, unsigned OpNum,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+  uint64_t getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum,
+                                SmallVectorImpl<MCFixup> &Fixups) const;
+  uint64_t getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum,
+                                SmallVectorImpl<MCFixup> &Fixups) const;
+
   // Operand OpNum of MI needs a PC-relative fixup of kind Kind at
   // Offset bytes from the start of MI.  Add the fixup to Fixups
   // and return the in-place addend, which since we're a RELA target
   // is always 0.
-  unsigned getPCRelEncoding(const MCInst &MI, unsigned int OpNum,
+  uint64_t getPCRelEncoding(const MCInst &MI, unsigned OpNum,
                             SmallVectorImpl<MCFixup> &Fixups,
                             unsigned Kind, int64_t Offset) const;
 
-  unsigned getPC16DBLEncoding(const MCInst &MI, unsigned int OpNum,
+  uint64_t getPC16DBLEncoding(const MCInst &MI, unsigned OpNum,
                               SmallVectorImpl<MCFixup> &Fixups) const {
     return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2);
   }
-  unsigned getPC32DBLEncoding(const MCInst &MI, unsigned int OpNum,
+  uint64_t getPC32DBLEncoding(const MCInst &MI, unsigned OpNum,
                               SmallVectorImpl<MCFixup> &Fixups) const {
     return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2);
   }
-  unsigned getPLT16DBLEncoding(const MCInst &MI, unsigned int OpNum,
+  uint64_t getPLT16DBLEncoding(const MCInst &MI, unsigned OpNum,
                                SmallVectorImpl<MCFixup> &Fixups) const {
     return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT16DBL, 2);
   }
-  unsigned getPLT32DBLEncoding(const MCInst &MI, unsigned int OpNum,
+  uint64_t getPLT32DBLEncoding(const MCInst &MI, unsigned OpNum,
                                SmallVectorImpl<MCFixup> &Fixups) const {
     return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT32DBL, 2);
   }
@@ -95,34 +108,73 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   }
 }
 
-unsigned SystemZMCCodeEmitter::
+uint64_t SystemZMCCodeEmitter::
 getMachineOpValue(const MCInst &MI, const MCOperand &MO,
                   SmallVectorImpl<MCFixup> &Fixups) const {
   if (MO.isReg())
     return Ctx.getRegisterInfo().getEncodingValue(MO.getReg());
   if (MO.isImm())
-    return static_cast<unsigned>(MO.getImm());
+    return static_cast<uint64_t>(MO.getImm());
   llvm_unreachable("Unexpected operand type!");
 }
 
-unsigned
-SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned int OpNum,
+uint64_t SystemZMCCodeEmitter::
+getBDAddr12Encoding(const MCInst &MI, unsigned OpNum,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups);
+  uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups);
+  assert(isUInt<4>(Base) && isUInt<12>(Disp));
+  return (Base << 12) | Disp;
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDAddr20Encoding(const MCInst &MI, unsigned OpNum,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups);
+  uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups);
+  assert(isUInt<4>(Base) && isInt<20>(Disp));
+  return (Base << 20) | ((Disp & 0xfff) << 8) | ((Disp & 0xff000) >> 12);
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum,
+                     SmallVectorImpl<MCFixup> &Fixups) const {
+  uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups);
+  uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups);
+  uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups);
+  assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Index));
+  return (Index << 16) | (Base << 12) | Disp;
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum,
+                     SmallVectorImpl<MCFixup> &Fixups) const {
+  uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups);
+  uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups);
+  uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups);
+  assert(isUInt<4>(Base) && isInt<20>(Disp) && isUInt<4>(Index));
+  return (Index << 24) | (Base << 20) | ((Disp & 0xfff) << 8)
+    | ((Disp & 0xff000) >> 12);
+}
+
+uint64_t
+SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
                                        SmallVectorImpl<MCFixup> &Fixups,
                                        unsigned Kind, int64_t Offset) const {
   const MCOperand &MO = MI.getOperand(OpNum);
-  // For compatibility with the GNU assembler, treat constant operands as
-  // unadjusted PC-relative offsets.
+  const MCExpr *Expr;
   if (MO.isImm())
-    return MO.getImm() / 2;
-
-  const MCExpr *Expr = MO.getExpr();
-  if (Offset) {
-    // The operand value is relative to the start of MI, but the fixup
-    // is relative to the operand field itself, which is Offset bytes
-    // into MI.  Add Offset to the relocation value to cancel out
-    // this difference.
-    const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
-    Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+    Expr = MCConstantExpr::Create(MO.getImm() + Offset, Ctx);
+  else {
+    Expr = MO.getExpr();
+    if (Offset) {
+      // The operand value is relative to the start of MI, but the fixup
+      // is relative to the operand field itself, which is Offset bytes
+      // into MI.  Add Offset to the relocation value to cancel out
+      // this difference.
+      const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
+      Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+    }
   }
   Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind));
   return 0;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 6844f92ec9..3653192d85 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -27,11 +27,55 @@
 
 using namespace llvm;
 
-static MCAsmInfo *createSystemZMCAsmInfo(StringRef TT) {
+const unsigned SystemZMC::GR32Regs[16] = {
+  SystemZ::R0W, SystemZ::R1W, SystemZ::R2W, SystemZ::R3W,
+  SystemZ::R4W, SystemZ::R5W, SystemZ::R6W, SystemZ::R7W,
+  SystemZ::R8W, SystemZ::R9W, SystemZ::R10W, SystemZ::R11W,
+  SystemZ::R12W, SystemZ::R13W, SystemZ::R14W, SystemZ::R15W
+};
+
+const unsigned SystemZMC::GR64Regs[16] = {
+  SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D,
+  SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D,
+  SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D,
+  SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D
+};
+
+const unsigned SystemZMC::GR128Regs[16] = {
+  SystemZ::R0Q, 0, SystemZ::R2Q, 0,
+  SystemZ::R4Q, 0, SystemZ::R6Q, 0,
+  SystemZ::R8Q, 0, SystemZ::R10Q, 0,
+  SystemZ::R12Q, 0, SystemZ::R14Q, 0
+};
+
+const unsigned SystemZMC::FP32Regs[16] = {
+  SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S,
+  SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S,
+  SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
+  SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S
+};
+
+const unsigned SystemZMC::FP64Regs[16] = {
+  SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D,
+  SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D,
+  SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
+  SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D
+};
+
+const unsigned SystemZMC::FP128Regs[16] = {
+  SystemZ::F0Q, SystemZ::F1Q, 0, 0,
+  SystemZ::F4Q, SystemZ::F5Q, 0, 0,
+  SystemZ::F8Q, SystemZ::F9Q, 0, 0,
+  SystemZ::F12Q, SystemZ::F13Q, 0, 0
+};
+
+static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI,
+                                         StringRef TT) {
   MCAsmInfo *MAI = new SystemZMCAsmInfo(TT);
-  MachineLocation FPDst(MachineLocation::VirtualFP);
-  MachineLocation FPSrc(SystemZ::R15D, -SystemZMC::CFAOffsetFromInitialSP);
-  MAI->addInitialFrameState(0, FPDst, FPSrc);
+  MCCFIInstruction Inst =
+      MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(SystemZ::R15D, true),
+                                     SystemZMC::CFAOffsetFromInitialSP);
+  MAI->addInitialFrameState(Inst);
   return MAI;
 }
 
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 229912f161..1f70047db6 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -34,6 +34,16 @@ namespace SystemZMC {
 
   // The offset of the DWARF CFA from the incoming stack pointer.
   const int64_t CFAOffsetFromInitialSP = CallFrameSize;
+
+  // Maps of asm register numbers to LLVM register numbers, with 0 indicating
+  // an invalid register.  We don't use the register classes directly because
+  // they specify the allocation order.
+  extern const unsigned GR32Regs[16];
+  extern const unsigned GR64Regs[16];
+  extern const unsigned GR128Regs[16];
+  extern const unsigned FP32Regs[16];
+  extern const unsigned FP64Regs[16];
+  extern const unsigned FP128Regs[16];
 }
 
 MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile
index c992584af9..445725bd1e 100644
--- a/lib/Target/SystemZ/Makefile
+++ b/lib/Target/SystemZ/Makefile
@@ -16,13 +16,14 @@ BUILT_SOURCES = SystemZGenRegisterInfo.inc \
 		SystemZGenAsmWriter.inc \
 		SystemZGenAsmMatcher.inc \
 		SystemZGenCodeEmitter.inc \
+		SystemZGenDisassemblerTables.inc \
 		SystemZGenInstrInfo.inc \
 		SystemZGenDAGISel.inc \
 		SystemZGenSubtargetInfo.inc \
 		SystemZGenCallingConv.inc \
 		SystemZGenMCCodeEmitter.inc
 
-DIRS = InstPrinter AsmParser TargetInfo MCTargetDesc
+DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt
index d1f56a4916..8f5a5476b4 100644
--- a/lib/Target/SystemZ/README.txt
+++ b/lib/Target/SystemZ/README.txt
@@ -29,17 +29,44 @@ to load 103.  This seems to be a general target-independent problem.
 
 --
 
-The tuning of the choice between Load Address (LA) and addition in
+The tuning of the choice between LOAD ADDRESS (LA) and addition in
 SystemZISelDAGToDAG.cpp is suspect.  It should be tweaked based on
 performance measurements.
 
 --
 
+We don't support tail calls at present.
+
+--
+
+We don't support prefetching yet.
+
+--
+
 There is no scheduling support.
 
 --
 
-We don't use the Branch on Count or Branch on Index families of instruction.
+We don't use the BRANCH ON COUNT or BRANCH ON INDEX families of instruction.
+
+--
+
+We might want to use BRANCH ON CONDITION for conditional indirect calls
+and conditional returns.
+
+--
+
+We don't use the combined COMPARE AND BRANCH instructions.  Using them
+would require a change to the way we handle out-of-range branches.
+At the moment, we start with 32-bit forms like BRCL and shorten them
+to forms like BRC where possible, but COMPARE AND BRANCH does not have
+a 32-bit form.
+
+--
+
+We should probably model just CC, not the PSW as a whole.  Strictly
+speaking, every instruction changes the PSW since the PSW contains the
+current instruction address.
 
 --
 
@@ -54,7 +81,30 @@ equality after an integer comparison, etc.
 
 --
 
-We don't optimize string and block memory operations.
+We don't use the LOAD AND TEST or TEST DATA CLASS instructions.
+
+--
+
+We could use the generic floating-point forms of LOAD COMPLEMENT,
+LOAD NEGATIVE and LOAD POSITIVE in cases where we don't need the
+condition codes.  For example, we could use LCDFR instead of LCDBR.
+
+--
+
+We don't optimize block memory operations.
+
+It's definitely worth using things like MVC, CLC, NC, XC and OC with
+constant lengths.  MVCIN may be worthwhile too.
+
+We should probably implement things like memcpy using MVC with EXECUTE.
+Likewise memcmp and CLC.  MVCLE and CLCLE could be useful too.
+
+--
+
+We don't optimize string operations.
+
+MVST, CLST, SRST and CUSE could be useful here.  Some of the TRANSLATE
+family might be too, although they are probably more difficult to exploit.
 
 --
 
@@ -63,9 +113,33 @@ conventions require f128s to be returned by invisible reference.
 
 --
 
+ADD LOGICAL WITH SIGNED IMMEDIATE could be useful when we need to
+produce a carry.  SUBTRACT LOGICAL IMMEDIATE could be useful when we
+need to produce a borrow.  (Note that there are no memory forms of
+ADD LOGICAL WITH CARRY and SUBTRACT LOGICAL WITH BORROW, so the high
+part of 128-bit memory operations would probably need to be done
+via a register.)
+
+--
+
+We don't use the halfword forms of LOAD REVERSED and STORE REVERSED
+(LRVH and STRVH).
+
+--
+
+We could take advantage of the various ... UNDER MASK instructions,
+such as ICM and STCM.
+
+--
+
+We could make more use of the ROTATE AND ... SELECTED BITS instructions.
+At the moment we only use RISBG, and only then for subword atomic operations.
+
+--
+
 DAGCombiner can detect integer absolute, but there's not yet an associated
-ISD opcode.  We could add one and implement it using Load Positive.
-Negated absolutes could use Load Negative.
+ISD opcode.  We could add one and implement it using LOAD POSITIVE.
+Negated absolutes could use LOAD NEGATIVE.
 
 --
 
@@ -142,5 +216,15 @@ See CodeGen/SystemZ/alloca-01.ll for an example.
 --
 
 Atomic loads and stores use the default compare-and-swap based implementation.
-This is probably much too conservative in practice, and the overhead is
-especially bad for 8- and 16-bit accesses.
+This is much too conservative in practice, since the architecture guarantees
+that 1-, 2-, 4- and 8-byte loads and stores to aligned addresses are
+inherently atomic.
+
+--
+
+If needed, we can support 16-byte atomics using LPQ, STPQ and CSDG.
+
+--
+
+We might want to model all access registers and use them to spill
+32-bit values.
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 7c9f0e668b..104af6e99d 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -40,24 +40,22 @@ def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>;
 
 // fcopysign with an FP32 result.
 let isCodeGenOnly = 1 in {
-  def CPSDRss : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP32>;
-  def CPSDRsd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP64>;
+  def CPSDRss : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP32, FP32>;
+  def CPSDRsd : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP32, FP64>;
 }
 
-// The sign of an FP128 is in the high register.  Give the CPSDRsd
-// operands in R1, R2, R3 order.
+// The sign of an FP128 is in the high register.
 def : Pat<(fcopysign FP32:$src1, FP128:$src2),
-          (CPSDRsd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP32:$src1)>;
+          (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_high))>;
 
 // fcopysign with an FP64 result.
 let isCodeGenOnly = 1 in
-  def CPSDRds : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP32>;
-def CPSDRdd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP64>;
+  def CPSDRds : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP64, FP32>;
+def CPSDRdd : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP64, FP64>;
 
-// The sign of an FP128 is in the high register.  Give the CPSDRdd
-// operands in R1, R2, R3 order.
+// The sign of an FP128 is in the high register.
 def : Pat<(fcopysign FP64:$src1, FP128:$src2),
-          (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP64:$src1)>;
+          (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_high))>;
 
 // fcopysign with an FP128 result.  Use "upper" as the high half and leave
 // the low half as-is.
@@ -65,13 +63,12 @@ class CopySign128<RegisterOperand cls, dag upper>
   : Pat<(fcopysign FP128:$src1, cls:$src2),
         (INSERT_SUBREG FP128:$src1, upper, subreg_high)>;
 
-// Give the CPSDR* operands in R1, R2, R3 order.
-def : CopySign128<FP32,  (CPSDRds FP32:$src2,
-                                  (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
-def : CopySign128<FP64,  (CPSDRdd FP64:$src2,
-                                  (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
-def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high),
-                                  (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
+def : CopySign128<FP32,  (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_high),
+                                  FP32:$src2)>;
+def : CopySign128<FP64,  (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high),
+                                  FP64:$src2)>;
+def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high),
+                                  (EXTRACT_SUBREG FP128:$src2, subreg_high))>;
 
 //===----------------------------------------------------------------------===//
 // Load instructions
@@ -155,13 +152,13 @@ let Defs = [PSW] in {
 }
 
 // fp_to_sint always rounds towards zero, which is modifier value 5.
-def : Pat<(i32 (fp_to_sint FP32:$src)),  (CFEBR FP32:$src,  5)>;
-def : Pat<(i32 (fp_to_sint FP64:$src)),  (CFDBR FP64:$src,  5)>;
-def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR FP128:$src, 5)>;
+def : Pat<(i32 (fp_to_sint FP32:$src)),  (CFEBR 5, FP32:$src)>;
+def : Pat<(i32 (fp_to_sint FP64:$src)),  (CFDBR 5, FP64:$src)>;
+def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>;
 
-def : Pat<(i64 (fp_to_sint FP32:$src)),  (CGEBR FP32:$src,  5)>;
-def : Pat<(i64 (fp_to_sint FP64:$src)),  (CGDBR FP64:$src,  5)>;
-def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR FP128:$src, 5)>;
+def : Pat<(i64 (fp_to_sint FP32:$src)),  (CGEBR 5, FP32:$src)>;
+def : Pat<(i64 (fp_to_sint FP64:$src)),  (CGDBR 5, FP64:$src)>;
+def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>;
 
 //===----------------------------------------------------------------------===//
 // Unary arithmetic
@@ -210,9 +207,9 @@ let Defs = [PSW] in {
 
 // frint rounds according to the current mode (modifier 0) and detects
 // inexact conditions.
-def : Pat<(frint FP32:$src),  (FIEBR FP32:$src,  0)>;
-def : Pat<(frint FP64:$src),  (FIDBR FP64:$src,  0)>;
-def : Pat<(frint FP128:$src), (FIXBR FP128:$src, 0)>;
+def : Pat<(frint FP32:$src),  (FIEBR 0, FP32:$src)>;
+def : Pat<(frint FP64:$src),  (FIDBR 0, FP64:$src)>;
+def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>;
 
 //===----------------------------------------------------------------------===//
 // Binary arithmetic
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index b32b7eb0fc..bf5aa8dbeb 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -82,25 +82,24 @@ def getDisp20Opcode : InstrMapping {
 //
 // Formats are specified using operand field declarations of the form:
 //
-//   bits<4> Rn : register input or output for operand n
-//   bits<m> In : immediate value of width m for operand n
-//   bits<4> Bn : base register for address operand n
-//   bits<m> Dn : displacement value of width m for address operand n
-//   bits<4> Xn : index register for address operand n
-//   bits<4> Mn : mode value for operand n
+//   bits<4> Rn   : register input or output for operand n
+//   bits<m> In   : immediate value of width m for operand n
+//   bits<4> BDn  : address operand n, which has a base and a displacement
+//   bits<m> XBDn : address operand n, which has an index, a base and a
+//                  displacement
+//   bits<4> Xn   : index register for address operand n
+//   bits<4> Mn   : mode value for operand n
 //
-// The operand numbers ("n" in the list above) follow the architecture manual,
-// but the fields are always declared in assembly order, so there are some
-// cases where operand "2" comes after operand "3".  For address operands,
-// the base register field is declared first, followed by the displacement,
-// followed by the index (if any).  This matches the bdaddr* and bdxaddr*
-// orders.
+// The operand numbers ("n" in the list above) follow the architecture manual.
+// Assembly operands sometimes have a different order; in particular, R3 often
+// is often written between operands 1 and 2.
 //
 //===----------------------------------------------------------------------===//
 
 class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
+  field bits<32> SoftFail = 0;
 
   bits<4> R1;
   bits<16> I2;
@@ -114,6 +113,7 @@ class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
+  field bits<48> SoftFail = 0;
 
   bits<4> R1;
   bits<4> R2;
@@ -133,6 +133,7 @@ class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
+  field bits<48> SoftFail = 0;
 
   bits<4> R1;
   bits<32> I2;
@@ -146,6 +147,7 @@ class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<2, outs, ins, asmstr, pattern> {
   field bits<16> Inst;
+  field bits<16> SoftFail = 0;
 
   bits<4> R1;
   bits<4> R2;
@@ -158,6 +160,7 @@ class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
+  field bits<32> SoftFail = 0;
 
   bits<4> R1;
   bits<4> R3;
@@ -173,6 +176,7 @@ class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
+  field bits<32> SoftFail = 0;
 
   bits<4> R1;
   bits<4> R2;
@@ -186,6 +190,7 @@ class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
+  field bits<32> SoftFail = 0;
 
   bits<4> R1;
   bits<4> R2;
@@ -201,17 +206,14 @@ class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
+  field bits<32> SoftFail = 0;
 
   bits<4> R1;
-  bits<4> B2;
-  bits<12> D2;
-  bits<4> X2;
+  bits<20> XBD2;
 
   let Inst{31-24} = op;
   let Inst{23-20} = R1;
-  let Inst{19-16} = X2;
-  let Inst{15-12} = B2;
-  let Inst{11-0}  = D2;
+  let Inst{19-0}  = XBD2;
 
   let HasIndex = 1;
 }
@@ -219,17 +221,14 @@ class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
+  field bits<48> SoftFail = 0;
 
   bits<4> R1;
-  bits<4> B2;
-  bits<12> D2;
-  bits<4> X2;
+  bits<20> XBD2;
 
   let Inst{47-40} = op{15-8};
   let Inst{39-36} = R1;
-  let Inst{35-32} = X2;
-  let Inst{31-28} = B2;
-  let Inst{27-16} = D2;
+  let Inst{35-16} = XBD2;
   let Inst{15-8}  = 0;
   let Inst{7-0}   = op{7-0};
 
@@ -239,18 +238,15 @@ class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
+  field bits<48> SoftFail = 0;
 
   bits<4> R1;
   bits<4> R3;
-  bits<4> B2;
-  bits<12> D2;
-  bits<4> X2;
+  bits<20> XBD2;
 
   let Inst{47-40} = op{15-8};
   let Inst{39-36} = R3;
-  let Inst{35-32} = X2;
-  let Inst{31-28} = B2;
-  let Inst{27-16} = D2;
+  let Inst{35-16} = XBD2;
   let Inst{15-12} = R1;
   let Inst{11-8}  = 0;
   let Inst{7-0}   = op{7-0};
@@ -261,18 +257,14 @@ class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
+  field bits<48> SoftFail = 0;
 
   bits<4> R1;
-  bits<4> B2;
-  bits<20> D2;
-  bits<4> X2;
+  bits<28> XBD2;
 
   let Inst{47-40} = op{15-8};
   let Inst{39-36} = R1;
-  let Inst{35-32} = X2;
-  let Inst{31-28} = B2;
-  let Inst{27-16} = D2{11-0};
-  let Inst{15-8}  = D2{19-12};
+  let Inst{35-8}  = XBD2;
   let Inst{7-0}   = op{7-0};
 
   let Has20BitOffset = 1;
@@ -282,34 +274,31 @@ class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 class InstRS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
+  field bits<32> SoftFail = 0;
 
   bits<4> R1;
   bits<4> R3;
-  bits<4> B2;
-  bits<12> D2;
+  bits<16> BD2;
 
   let Inst{31-24} = op;
   let Inst{23-20} = R1;
   let Inst{19-16} = R3;
-  let Inst{15-12} = B2;
-  let Inst{11-0}  = D2;
+  let Inst{15-0}  = BD2;
 }
 
 class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
+  field bits<48> SoftFail = 0;
 
   bits<4> R1;
   bits<4> R3;
-  bits<4> B2;
-  bits<20> D2;
+  bits<24> BD2;
 
   let Inst{47-40} = op{15-8};
   let Inst{39-36} = R1;
   let Inst{35-32} = R3;
-  let Inst{31-28} = B2;
-  let Inst{27-16} = D2{11-0};
-  let Inst{15-8}  = D2{19-12};
+  let Inst{31-8}  = BD2;
   let Inst{7-0}   = op{7-0};
 
   let Has20BitOffset = 1;
@@ -318,44 +307,40 @@ class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 class InstSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
+  field bits<32> SoftFail = 0;
 
-  bits<4> B1;
-  bits<12> D1;
+  bits<16> BD1;
   bits<8> I2;
 
   let Inst{31-24} = op;
   let Inst{23-16} = I2;
-  let Inst{15-12} = B1;
-  let Inst{11-0}  = D1;
+  let Inst{15-0}  = BD1;
 }
 
 class InstSIL<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
+  field bits<48> SoftFail = 0;
 
-  bits<4> B1;
-  bits<12> D1;
+  bits<16> BD1;
   bits<16> I2;
 
   let Inst{47-32} = op;
-  let Inst{31-28} = B1;
-  let Inst{27-16} = D1;
+  let Inst{31-16} = BD1;
   let Inst{15-0}  = I2;
 }
 
 class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
+  field bits<48> SoftFail = 0;
 
-  bits<4> B1;
-  bits<20> D1;
+  bits<24> BD1;
   bits<8> I2;
 
   let Inst{47-40} = op{15-8};
   let Inst{39-32} = I2;
-  let Inst{31-28} = B1;
-  let Inst{27-16} = D1{11-0};
-  let Inst{15-8}  = D1{19-12};
+  let Inst{31-8}  = BD1;
   let Inst{7-0}   = op{7-0};
 
   let Has20BitOffset = 1;
@@ -432,23 +417,23 @@ class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 
 class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls,
                   dag src>
-  : InstRRE<opcode, (outs cls:$dst), (ins),
-            mnemonic#"\t$dst",
-            [(set cls:$dst, src)]> {
+  : InstRRE<opcode, (outs cls:$R1), (ins),
+            mnemonic#"\t$R1",
+            [(set cls:$R1, src)]> {
   let R2 = 0;
 }
 
 class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
-  : InstRSY<opcode, (outs cls:$dst1, cls:$dst2), (ins bdaddr20only:$addr),
-            mnemonic#"\t$dst1, $dst2, $addr", []> {
+  : InstRSY<opcode, (outs cls:$R1, cls:$R3), (ins bdaddr20only:$BD2),
+            mnemonic#"\t$R1, $R3, $BD2", []> {
   let mayLoad = 1;
 }
 
 class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                  RegisterOperand cls>
-  : InstRIL<opcode, (outs), (ins cls:$src, pcrel32:$addr),
-            mnemonic#"\t$src, $addr",
-            [(operator cls:$src, pcrel32:$addr)]> {
+  : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2),
+            mnemonic#"\t$R1, $I2",
+            [(operator cls:$R1, pcrel32:$I2)]> {
   let mayStore = 1;
   // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
   // However, BDXs have two extra operands and are therefore 6 units more
@@ -458,17 +443,17 @@ class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
 
 class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
               RegisterOperand cls, AddressingMode mode = bdxaddr12only>
-  : InstRX<opcode, (outs), (ins cls:$src, mode:$addr),
-           mnemonic#"\t$src, $addr",
-           [(operator cls:$src, mode:$addr)]> {
+  : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+           mnemonic#"\t$R1, $XBD2",
+           [(operator cls:$R1, mode:$XBD2)]> {
   let mayStore = 1;
 }
 
 class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                RegisterOperand cls, AddressingMode mode = bdxaddr20only>
-  : InstRXY<opcode, (outs), (ins cls:$src, mode:$addr),
-            mnemonic#"\t$src, $addr",
-            [(operator cls:$src, mode:$addr)]> {
+  : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+            mnemonic#"\t$R1, $XBD2",
+            [(operator cls:$R1, mode:$XBD2)]> {
   let mayStore = 1;
 }
 
@@ -483,32 +468,32 @@ multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
 }
 
 class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
-  : InstRSY<opcode, (outs), (ins cls:$from, cls:$to, bdaddr20only:$addr),
-            mnemonic#"\t$from, $to, $addr", []> {
+  : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, bdaddr20only:$BD2),
+            mnemonic#"\t$R1, $R3, $BD2", []> {
   let mayStore = 1;
 }
 
 class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
               Immediate imm, AddressingMode mode = bdaddr12only>
-  : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
-           mnemonic#"\t$addr, $src",
-           [(operator imm:$src, mode:$addr)]> {
+  : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
+           mnemonic#"\t$BD1, $I2",
+           [(operator imm:$I2, mode:$BD1)]> {
   let mayStore = 1;
 }
 
 class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                Immediate imm, AddressingMode mode = bdaddr20only>
-  : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
-            mnemonic#"\t$addr, $src",
-            [(operator imm:$src, mode:$addr)]> {
+  : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
+            mnemonic#"\t$BD1, $I2",
+            [(operator imm:$I2, mode:$BD1)]> {
   let mayStore = 1;
 }
 
 class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                Immediate imm>
-  : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src),
-            mnemonic#"\t$addr, $src",
-            [(operator imm:$src, bdaddr12only:$addr)]> {
+  : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
+            mnemonic#"\t$BD1, $I2",
+            [(operator imm:$I2, bdaddr12only:$BD1)]> {
   let mayStore = 1;
 }
 
@@ -524,38 +509,38 @@ multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
 
 class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
               RegisterOperand cls1, RegisterOperand cls2>
-  : InstRR<opcode, (outs cls1:$dst), (ins cls2:$src),
-           mnemonic#"\t$dst, $src",
-           [(set cls1:$dst, (operator cls2:$src))]>;
+  : InstRR<opcode, (outs cls1:$R1), (ins cls2:$R2),
+           mnemonic#"\t$R1, $R2",
+           [(set cls1:$R1, (operator cls2:$R2))]>;
 
 class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                RegisterOperand cls1, RegisterOperand cls2>
-  : InstRRE<opcode, (outs cls1:$dst), (ins cls2:$src),
-            mnemonic#"\t$dst, $src",
-            [(set cls1:$dst, (operator cls2:$src))]>;
+  : InstRRE<opcode, (outs cls1:$R1), (ins cls2:$R2),
+            mnemonic#"\t$R1, $R2",
+            [(set cls1:$R1, (operator cls2:$R2))]>;
 
 class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
                RegisterOperand cls2>
-  : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src, uimm8zx4:$mode),
-            mnemonic#"\t$dst, $mode, $src", []>;
+  : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2),
+            mnemonic#"\t$R1, $R3, $R2", []>;
 
 class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
               RegisterOperand cls, Immediate imm>
-  : InstRI<opcode, (outs cls:$dst), (ins imm:$src),
-           mnemonic#"\t$dst, $src",
-           [(set cls:$dst, (operator imm:$src))]>;
+  : InstRI<opcode, (outs cls:$R1), (ins imm:$I2),
+           mnemonic#"\t$R1, $I2",
+           [(set cls:$R1, (operator imm:$I2))]>;
 
 class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                RegisterOperand cls, Immediate imm>
-  : InstRIL<opcode, (outs cls:$dst), (ins imm:$src),
-            mnemonic#"\t$dst, $src",
-            [(set cls:$dst, (operator imm:$src))]>;
+  : InstRIL<opcode, (outs cls:$R1), (ins imm:$I2),
+            mnemonic#"\t$R1, $I2",
+            [(set cls:$R1, (operator imm:$I2))]>;
 
 class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                  RegisterOperand cls>
-  : InstRIL<opcode, (outs cls:$dst), (ins pcrel32:$addr),
-            mnemonic#"\t$dst, $addr",
-            [(set cls:$dst, (operator pcrel32:$addr))]> {
+  : InstRIL<opcode, (outs cls:$R1), (ins pcrel32:$I2),
+            mnemonic#"\t$R1, $I2",
+            [(set cls:$R1, (operator pcrel32:$I2))]> {
   let mayLoad = 1;
   // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
   // However, BDXs have two extra operands and are therefore 6 units more
@@ -565,25 +550,25 @@ class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
 
 class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
               RegisterOperand cls, AddressingMode mode = bdxaddr12only>
-  : InstRX<opcode, (outs cls:$dst), (ins mode:$addr),
-           mnemonic#"\t$dst, $addr",
-           [(set cls:$dst, (operator mode:$addr))]> {
+  : InstRX<opcode, (outs cls:$R1), (ins mode:$XBD2),
+           mnemonic#"\t$R1, $XBD2",
+           [(set cls:$R1, (operator mode:$XBD2))]> {
   let mayLoad = 1;
 }
 
 class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                RegisterOperand cls>
-  : InstRXE<opcode, (outs cls:$dst), (ins bdxaddr12only:$addr),
-            mnemonic#"\t$dst, $addr",
-            [(set cls:$dst, (operator bdxaddr12only:$addr))]> {
+  : InstRXE<opcode, (outs cls:$R1), (ins bdxaddr12only:$XBD2),
+            mnemonic#"\t$R1, $XBD2",
+            [(set cls:$R1, (operator bdxaddr12only:$XBD2))]> {
   let mayLoad = 1;
 }
 
 class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                RegisterOperand cls, AddressingMode mode = bdxaddr20only>
-  : InstRXY<opcode, (outs cls:$dst), (ins mode:$addr),
-            mnemonic#"\t$dst, $addr",
-            [(set cls:$dst, (operator mode:$addr))]> {
+  : InstRXY<opcode, (outs cls:$R1), (ins mode:$XBD2),
+            mnemonic#"\t$R1, $XBD2",
+            [(set cls:$R1, (operator mode:$XBD2))]> {
   let mayLoad = 1;
 }
 
@@ -599,83 +584,76 @@ multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
 
 class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                RegisterOperand cls1, RegisterOperand cls2>
-  : InstRR<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2),
-           mnemonic#"\t$dst, $src2",
-           [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> {
-  let Constraints = "$src1 = $dst";
-  let DisableEncoding = "$src1";
+  : InstRR<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
+           mnemonic#"\t$R1, $R2",
+           [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
 }
 
 class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                 RegisterOperand cls1, RegisterOperand cls2>
-  : InstRRE<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2),
-            mnemonic#"\t$dst, $src2",
-            [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> {
-  let Constraints = "$src1 = $dst";
-  let DisableEncoding = "$src1";
+  : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
+            mnemonic#"\t$R1, $R2",
+            [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
 }
 
-// Here the assembly and dag operands are in natural order,
-// but the first input operand maps to R3 and the second to R2.
-// This is used for "CPSDR R1, R3, R2", which is equivalent to
-// R1 = copysign (R3, R2).
-//
-// Direct uses of the instruction must pass operands in encoding order --
-// R1, R2, R3 -- so they must pass the source operands in reverse order.
-class BinaryRevRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-                   RegisterOperand cls1, RegisterOperand cls2>
-  : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src2, cls1:$src1),
-            mnemonic#"\t$dst, $src1, $src2",
-            [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]>;
+class BinaryRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                RegisterOperand cls1, RegisterOperand cls2>
+  : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R3, cls2:$R2),
+            mnemonic#"\t$R1, $R3, $R2",
+            [(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]>;
 
 class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                RegisterOperand cls, Immediate imm>
-  : InstRI<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2),
-           mnemonic#"\t$dst, $src2",
-           [(set cls:$dst, (operator cls:$src1, imm:$src2))]> {
-  let Constraints = "$src1 = $dst";
-  let DisableEncoding = "$src1";
+  : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+           mnemonic#"\t$R1, $I2",
+           [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
 }
 
 class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                 RegisterOperand cls, Immediate imm>
-  : InstRIL<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2),
-            mnemonic#"\t$dst, $src2",
-            [(set cls:$dst, (operator cls:$src1, imm:$src2))]> {
-  let Constraints = "$src1 = $dst";
-  let DisableEncoding = "$src1";
+  : InstRIL<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+            mnemonic#"\t$R1, $I2",
+            [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
 }
 
 class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                RegisterOperand cls, SDPatternOperator load,
                AddressingMode mode = bdxaddr12only>
-  : InstRX<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
-           mnemonic#"\t$dst, $src2",
-           [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> {
-  let Constraints = "$src1 = $dst";
-  let DisableEncoding = "$src1";
+  : InstRX<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2),
+           mnemonic#"\t$R1, $XBD2",
+           [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
   let mayLoad = 1;
 }
 
 class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                   RegisterOperand cls, SDPatternOperator load>
-  : InstRXE<opcode, (outs cls:$dst), (ins cls:$src1, bdxaddr12only:$src2),
-            mnemonic#"\t$dst, $src2",
-            [(set cls:$dst, (operator cls:$src1,
-                                      (load bdxaddr12only:$src2)))]> {
-  let Constraints = "$src1 = $dst";
-  let DisableEncoding = "$src1";
+  : InstRXE<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2),
+            mnemonic#"\t$R1, $XBD2",
+            [(set cls:$R1, (operator cls:$R1src,
+                                     (load bdxaddr12only:$XBD2)))]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
   let mayLoad = 1;
 }
 
 class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                 RegisterOperand cls, SDPatternOperator load,
                 AddressingMode mode = bdxaddr20only>
-  : InstRXY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
-            mnemonic#"\t$dst, $src2",
-            [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> {
-  let Constraints = "$src1 = $dst";
-  let DisableEncoding = "$src1";
+  : InstRXY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2),
+            mnemonic#"\t$R1, $XBD2",
+            [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
   let mayLoad = 1;
 }
 
@@ -693,18 +671,18 @@ multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
 
 class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                Operand imm, AddressingMode mode = bdaddr12only>
-  : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
-           mnemonic#"\t$addr, $src",
-           [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> {
+  : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
+           mnemonic#"\t$BD1, $I2",
+           [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> {
   let mayLoad = 1;
   let mayStore = 1;
 }
 
 class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                 Operand imm, AddressingMode mode = bdaddr20only>
-  : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
-            mnemonic#"\t$addr, $src",
-            [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> {
+  : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
+            mnemonic#"\t$BD1, $I2",
+            [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> {
   let mayLoad = 1;
   let mayStore = 1;
 }
@@ -722,49 +700,49 @@ multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,
 
 class ShiftRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
               RegisterOperand cls, AddressingMode mode>
-  : InstRS<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
-           mnemonic#"\t$dst, $src2",
-           [(set cls:$dst, (operator cls:$src1, mode:$src2))]> {
+  : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2),
+           mnemonic#"\t$R1, $BD2",
+           [(set cls:$R1, (operator cls:$R1src, mode:$BD2))]> {
   let R3 = 0;
-  let Constraints = "$src1 = $dst";
-  let DisableEncoding = "$src1";
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
 }
 
 class ShiftRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                RegisterOperand cls, AddressingMode mode>
-  : InstRSY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
-            mnemonic#"\t$dst, $src1, $src2",
-            [(set cls:$dst, (operator cls:$src1, mode:$src2))]>;
+  : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, mode:$BD2),
+            mnemonic#"\t$R1, $R3, $BD2",
+            [(set cls:$R1, (operator cls:$R3, mode:$BD2))]>;
 
 class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                 RegisterOperand cls1, RegisterOperand cls2>
-  : InstRR<opcode, (outs), (ins cls1:$src1, cls2:$src2),
-           mnemonic#"\t$src1, $src2",
-           [(operator cls1:$src1, cls2:$src2)]>;
+  : InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+           mnemonic#"\t$R1, $R2",
+           [(operator cls1:$R1, cls2:$R2)]>;
 
 class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  RegisterOperand cls1, RegisterOperand cls2>
-  : InstRRE<opcode, (outs), (ins cls1:$src1, cls2:$src2),
-            mnemonic#"\t$src1, $src2",
-            [(operator cls1:$src1, cls2:$src2)]>;
+  : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+            mnemonic#"\t$R1, $R2",
+            [(operator cls1:$R1, cls2:$R2)]>;
 
 class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                 RegisterOperand cls, Immediate imm>
-  : InstRI<opcode, (outs), (ins cls:$src1, imm:$src2),
-           mnemonic#"\t$src1, $src2",
-           [(operator cls:$src1, imm:$src2)]>;
+  : InstRI<opcode, (outs), (ins cls:$R1, imm:$I2),
+           mnemonic#"\t$R1, $I2",
+           [(operator cls:$R1, imm:$I2)]>;
 
 class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                  RegisterOperand cls, Immediate imm>
-  : InstRIL<opcode, (outs), (ins cls:$src1, imm:$src2),
-            mnemonic#"\t$src1, $src2",
-            [(operator cls:$src1, imm:$src2)]>;
+  : InstRIL<opcode, (outs), (ins cls:$R1, imm:$I2),
+            mnemonic#"\t$R1, $I2",
+            [(operator cls:$R1, imm:$I2)]>;
 
 class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                    RegisterOperand cls, SDPatternOperator load>
-  : InstRIL<opcode, (outs), (ins cls:$src1, pcrel32:$src2),
-            mnemonic#"\t$src1, $src2",
-            [(operator cls:$src1, (load pcrel32:$src2))]> {
+  : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2),
+            mnemonic#"\t$R1, $I2",
+            [(operator cls:$R1, (load pcrel32:$I2))]> {
   let mayLoad = 1;
   // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
   // However, BDXs have two extra operands and are therefore 6 units more
@@ -775,26 +753,26 @@ class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
 class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                 RegisterOperand cls, SDPatternOperator load,
                 AddressingMode mode = bdxaddr12only>
-  : InstRX<opcode, (outs), (ins cls:$src1, mode:$src2),
-           mnemonic#"\t$src1, $src2",
-           [(operator cls:$src1, (load mode:$src2))]> {
+  : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+           mnemonic#"\t$R1, $XBD2",
+           [(operator cls:$R1, (load mode:$XBD2))]> {
   let mayLoad = 1;
 }
 
 class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  RegisterOperand cls, SDPatternOperator load>
-  : InstRXE<opcode, (outs), (ins cls:$src1, bdxaddr12only:$src2),
-            mnemonic#"\t$src1, $src2",
-            [(operator cls:$src1, (load bdxaddr12only:$src2))]> {
+  : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
+            mnemonic#"\t$R1, $XBD2",
+            [(operator cls:$R1, (load bdxaddr12only:$XBD2))]> {
   let mayLoad = 1;
 }
 
 class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  RegisterOperand cls, SDPatternOperator load,
                  AddressingMode mode = bdxaddr20only>
-  : InstRXY<opcode, (outs), (ins cls:$src1, mode:$src2),
-            mnemonic#"\t$src1, $src2",
-            [(operator cls:$src1, (load mode:$src2))]> {
+  : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+            mnemonic#"\t$R1, $XBD2",
+            [(operator cls:$R1, (load mode:$XBD2))]> {
   let mayLoad = 1;
 }
 
@@ -814,26 +792,26 @@ multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
 class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                 SDPatternOperator load, Immediate imm,
                 AddressingMode mode = bdaddr12only>
-  : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
-           mnemonic#"\t$addr, $src",
-           [(operator (load mode:$addr), imm:$src)]> {
+  : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
+           mnemonic#"\t$BD1, $I2",
+           [(operator (load mode:$BD1), imm:$I2)]> {
   let mayLoad = 1;
 }
 
 class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  SDPatternOperator load, Immediate imm>
-  : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src),
-            mnemonic#"\t$addr, $src",
-            [(operator (load bdaddr12only:$addr), imm:$src)]> {
+  : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
+            mnemonic#"\t$BD1, $I2",
+            [(operator (load bdaddr12only:$BD1), imm:$I2)]> {
   let mayLoad = 1;
 }
 
 class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  SDPatternOperator load, Immediate imm,
                  AddressingMode mode = bdaddr20only>
-  : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
-            mnemonic#"\t$addr, $src",
-            [(operator (load mode:$addr), imm:$src)]> {
+  : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
+            mnemonic#"\t$BD1, $I2",
+            [(operator (load mode:$BD1), imm:$I2)]> {
   let mayLoad = 1;
 }
 
@@ -851,43 +829,43 @@ multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
 
 class TernaryRRD<string mnemonic, bits<16> opcode,
                  SDPatternOperator operator, RegisterOperand cls>
-  : InstRRD<opcode, (outs cls:$dst), (ins cls:$src1, cls:$src2, cls:$src3),
-            mnemonic#"\t$dst, $src2, $src3",
-            [(set cls:$dst, (operator cls:$src1, cls:$src2, cls:$src3))]> {
-  let Constraints = "$src1 = $dst";
-  let DisableEncoding = "$src1";
+  : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2),
+            mnemonic#"\t$R1, $R3, $R2",
+            [(set cls:$R1, (operator cls:$R1src, cls:$R3, cls:$R2))]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
 }
 
 class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  RegisterOperand cls, SDPatternOperator load>
-  : InstRXF<opcode, (outs cls:$dst),
-            (ins cls:$src1, cls:$src2, bdxaddr12only:$src3),
-            mnemonic#"\t$dst, $src2, $src3",
-            [(set cls:$dst, (operator cls:$src1, cls:$src2,
-                                      (load bdxaddr12only:$src3)))]> {
-  let Constraints = "$src1 = $dst";
-  let DisableEncoding = "$src1";
+  : InstRXF<opcode, (outs cls:$R1),
+            (ins cls:$R1src, cls:$R3, bdxaddr12only:$XBD2),
+            mnemonic#"\t$R1, $R3, $XBD2",
+            [(set cls:$R1, (operator cls:$R1src, cls:$R3,
+                                     (load bdxaddr12only:$XBD2)))]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
   let mayLoad = 1;
 }
 
 class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                 RegisterOperand cls, AddressingMode mode = bdaddr12only>
-  : InstRS<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr),
-           mnemonic#"\t$dst, $new, $ptr",
-           [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> {
-  let Constraints = "$old = $dst";
-  let DisableEncoding = "$old";
+  : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2),
+           mnemonic#"\t$R1, $R3, $BD2",
+           [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
   let mayLoad = 1;
   let mayStore = 1;
 }
 
 class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  RegisterOperand cls, AddressingMode mode = bdaddr20only>
-  : InstRSY<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr),
-            mnemonic#"\t$dst, $new, $ptr",
-            [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> {
-  let Constraints = "$old = $dst";
-  let DisableEncoding = "$old";
+  : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2),
+            mnemonic#"\t$R1, $R3, $BD2",
+            [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
   let mayLoad = 1;
   let mayStore = 1;
 }
@@ -904,12 +882,12 @@ multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
 
 class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
                        RegisterOperand cls2>
-  : InstRIEf<opcode, (outs cls1:$dst),
-             (ins cls1:$src1, cls2:$src2,
-                  uimm8zx6:$imm1, uimm8zx6:$imm2, uimm8zx6:$imm3),
-             mnemonic#"\t$dst, $src2, $imm1, $imm2, $imm3", []> {
-  let Constraints = "$src1 = $dst";
-  let DisableEncoding = "$src1";
+  : InstRIEf<opcode, (outs cls1:$R1),
+             (ins cls1:$R1src, cls2:$R2,
+                  uimm8zx6:$I3, uimm8zx6:$I4, uimm8zx6:$I5),
+             mnemonic#"\t$R1, $R2, $I3, $I4, $I5", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 7ffa382d36..903fb740a4 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -42,20 +42,19 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1,
 // Unconditional branches.  R1 is the condition-code mask (all 1s).
 let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
   let isIndirectBranch = 1 in
-    def BR : InstRR<0x07, (outs), (ins ADDR64:$dst),
-                    "br\t$dst", [(brind ADDR64:$dst)]>;
+    def BR : InstRR<0x07, (outs), (ins ADDR64:$R2),
+                    "br\t$R2", [(brind ADDR64:$R2)]>;
 
   // An assembler extended mnemonic for BRC.  Use a separate instruction for
   // the asm parser, so that we don't relax Js to external symbols into JGs.
   let isCodeGenOnly = 1 in
-    def J : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>;
-  let isAsmParserOnly = 1 in
-    def AsmJ : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>;
+    def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2", []>;
+  def AsmJ : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2", []>;
 
   // An assembler extended mnemonic for BRCL.  (The extension is "G"
   // rather than "L" because "JL" is "Jump if Less".)
-  def JG : InstRIL<0xC04, (outs), (ins brtarget32:$dst),
-                   "jg\t$dst", [(br bb:$dst)]>;
+  def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2),
+                   "jg\t$I2", [(br bb:$I2)]>;
 }
 
 // Conditional branches.  It's easier for LLVM to handle these branches
@@ -64,42 +63,39 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
 // JE and JLH when writing out the assembly though.
 multiclass CondBranches<Operand imm, string short, string long> {
   let isBranch = 1, isTerminator = 1, Uses = [PSW] in {
-    def "" : InstRI<0xA74, (outs), (ins imm:$cond, brtarget16:$dst), short, []>;
-    def L  : InstRIL<0xC04, (outs), (ins imm:$cond, brtarget32:$dst), long, []>;
+    def "" : InstRI<0xA74, (outs), (ins imm:$R1, brtarget16:$I2), short, []>;
+    def L  : InstRIL<0xC04, (outs), (ins imm:$R1, brtarget32:$I2), long, []>;
   }
 }
 let isCodeGenOnly = 1 in
-  defm BRC : CondBranches<cond4, "j$cond\t$dst", "jg$cond\t$dst">;
-let isAsmParserOnly = 1 in
-  defm AsmBRC : CondBranches<uimm8zx4, "brc\t$cond, $dst", "brcl\t$cond, $dst">;
+  defm BRC : CondBranches<cond4, "j$R1\t$I2", "jg$R1\t$I2">;
+defm AsmBRC : CondBranches<uimm8zx4, "brc\t$R1, $I2", "brcl\t$R1, $I2">;
 
 def : Pat<(z_br_ccmask cond4:$cond, bb:$dst), (BRCL cond4:$cond, bb:$dst)>;
 
 // Define AsmParser mnemonics for each condition code.
 multiclass CondExtendedMnemonic<bits<4> Cond, string name> {
   let R1 = Cond in {
-    def "" : InstRI<0xA74, (outs), (ins brtarget16:$dst),
-                    "j"##name##"\t$dst", []>;
-    def L  : InstRIL<0xC04, (outs), (ins brtarget32:$dst),
-                    "jg"##name##"\t$dst", []>;
+    def "" : InstRI<0xA74, (outs), (ins brtarget16:$I2),
+                    "j"##name##"\t$I2", []>;
+    def L  : InstRIL<0xC04, (outs), (ins brtarget32:$I2),
+                    "jg"##name##"\t$I2", []>;
   }
 }
-let isAsmParserOnly = 1 in {
-  defm AsmJO   : CondExtendedMnemonic<1,  "o">;
-  defm AsmJH   : CondExtendedMnemonic<2,  "h">;
-  defm AsmJNLE : CondExtendedMnemonic<3,  "nle">;
-  defm AsmJL   : CondExtendedMnemonic<4,  "l">;
-  defm AsmJNHE : CondExtendedMnemonic<5,  "nhe">;
-  defm AsmJLH  : CondExtendedMnemonic<6,  "lh">;
-  defm AsmJNE  : CondExtendedMnemonic<7,  "ne">;
-  defm AsmJE   : CondExtendedMnemonic<8,  "e">;
-  defm AsmJNLH : CondExtendedMnemonic<9,  "nlh">;
-  defm AsmJHE  : CondExtendedMnemonic<10, "he">;
-  defm AsmJNL  : CondExtendedMnemonic<11, "nl">;
-  defm AsmJLE  : CondExtendedMnemonic<12, "le">;
-  defm AsmJNH  : CondExtendedMnemonic<13, "nh">;
-  defm AsmJNO  : CondExtendedMnemonic<14, "no">;
-}
+defm AsmJO   : CondExtendedMnemonic<1,  "o">;
+defm AsmJH   : CondExtendedMnemonic<2,  "h">;
+defm AsmJNLE : CondExtendedMnemonic<3,  "nle">;
+defm AsmJL   : CondExtendedMnemonic<4,  "l">;
+defm AsmJNHE : CondExtendedMnemonic<5,  "nhe">;
+defm AsmJLH  : CondExtendedMnemonic<6,  "lh">;
+defm AsmJNE  : CondExtendedMnemonic<7,  "ne">;
+defm AsmJE   : CondExtendedMnemonic<8,  "e">;
+defm AsmJNLH : CondExtendedMnemonic<9,  "nlh">;
+defm AsmJHE  : CondExtendedMnemonic<10, "he">;
+defm AsmJNL  : CondExtendedMnemonic<11, "nl">;
+defm AsmJLE  : CondExtendedMnemonic<12, "le">;
+defm AsmJNH  : CondExtendedMnemonic<13, "nh">;
+defm AsmJNO  : CondExtendedMnemonic<14, "no">;
 
 def Select32 : SelectWrapper<GR32>;
 def Select64 : SelectWrapper<GR64>;
@@ -112,24 +108,22 @@ def Select64 : SelectWrapper<GR64>;
 let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
                         F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D],
     R1 = 14, isCodeGenOnly = 1 in {
-  def BRAS  : InstRI<0xA75, (outs), (ins pcrel16call:$dst, variable_ops),
-                     "bras\t%r14, $dst", []>;
-  def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$dst, variable_ops),
-                      "brasl\t%r14, $dst", [(z_call pcrel32call:$dst)]>;
-  def BASR  : InstRR<0x0D, (outs), (ins ADDR64:$dst, variable_ops),
-                     "basr\t%r14, $dst", [(z_call ADDR64:$dst)]>;
+  def BRAS  : InstRI<0xA75, (outs), (ins pcrel16call:$I2, variable_ops),
+                     "bras\t%r14, $I2", []>;
+  def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$I2, variable_ops),
+                      "brasl\t%r14, $I2", [(z_call pcrel32call:$I2)]>;
+  def BASR  : InstRR<0x0D, (outs), (ins ADDR64:$R2, variable_ops),
+                     "basr\t%r14, $R2", [(z_call ADDR64:$R2)]>;
 }
 
 // Define the general form of the call instructions for the asm parser.
 // These instructions don't hard-code %r14 as the return address register.
-let isAsmParserOnly = 1 in {
-  def AsmBRAS  : InstRI<0xA75, (outs), (ins GR64:$save, brtarget16:$dst),
-                        "bras\t$save, $dst", []>;
-  def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$save, brtarget32:$dst),
-                        "brasl\t$save, $dst", []>;
-  def AsmBASR  : InstRR<0x0D, (outs), (ins GR64:$save, ADDR64:$dst),
-                        "basr\t$save, $dst", []>;
-}
+def AsmBRAS  : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2),
+                      "bras\t$R1, $I2", []>;
+def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2),
+                       "brasl\t$R1, $I2", []>;
+def AsmBASR  : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
+                      "basr\t$R1, $R2", []>;
 
 //===----------------------------------------------------------------------===//
 // Move instructions
@@ -337,21 +331,21 @@ def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap>, GR64>;
 // Load BDX-style addresses.
 let neverHasSideEffects = 1, Function = "la" in {
   let PairType = "12" in
-    def LA : InstRX<0x41, (outs GR64:$dst), (ins laaddr12pair:$src),
-                    "la\t$dst, $src",
-                    [(set GR64:$dst, laaddr12pair:$src)]>;
+    def LA : InstRX<0x41, (outs GR64:$R1), (ins laaddr12pair:$XBD2),
+                    "la\t$R1, $XBD2",
+                    [(set GR64:$R1, laaddr12pair:$XBD2)]>;
   let PairType = "20" in
-    def LAY : InstRXY<0xE371, (outs GR64:$dst), (ins laaddr20pair:$src),
-                      "lay\t$dst, $src",
-                      [(set GR64:$dst, laaddr20pair:$src)]>;
+    def LAY : InstRXY<0xE371, (outs GR64:$R1), (ins laaddr20pair:$XBD2),
+                      "lay\t$R1, $XBD2",
+                      [(set GR64:$R1, laaddr20pair:$XBD2)]>;
 }
 
 // Load a PC-relative address.  There's no version of this instruction
 // with a 16-bit offset, so there's no relaxation.
 let neverHasSideEffects = 1 in {
-  def LARL : InstRIL<0xC00, (outs GR64:$dst), (ins pcrel32:$src),
-                     "larl\t$dst, $src",
-                     [(set GR64:$dst, pcrel32:$src)]>;
+  def LARL : InstRIL<0xC00, (outs GR64:$R1), (ins pcrel32:$I2),
+                     "larl\t$R1, $I2",
+                     [(set GR64:$R1, pcrel32:$I2)]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -484,6 +478,7 @@ let Defs = [PSW] in {
   def SGR  : BinaryRRE<"sgr",  0xB909, sub,       GR64, GR64>;
 
   // Subtraction of memory.
+  defm SH  : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, sextloadi16>;
   defm S   : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load>;
   def  SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32>;
   def  SG  : BinaryRXY<"sg",  0xE309, sub, GR64, load>;
@@ -903,9 +898,9 @@ let Defs = [PSW] in {
 // Read a 32-bit access register into a GR32.  As with all GR32 operations,
 // the upper 32 bits of the enclosing GR64 remain unchanged, which is useful
 // when a 64-bit address is stored in a pair of access registers.
-def EAR : InstRRE<0xB24F, (outs GR32:$dst), (ins access_reg:$src),
-                  "ear\t$dst, $src",
-                  [(set GR32:$dst, (z_extract_access access_reg:$src))]>;
+def EAR : InstRRE<0xB24F, (outs GR32:$R1), (ins access_reg:$R2),
+                  "ear\t$R1, $R2",
+                  [(set GR32:$R1, (z_extract_access access_reg:$R2))]>;
 
 // Find leftmost one, AKA count leading zeros.  The instruction actually
 // returns a pair of GR64s, the first giving the number of leading zeros
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 0abc3f7517..66d9c5fceb 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -24,14 +24,30 @@ class ImmediateAsmOperand<string name>
 class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop>
   : PatLeaf<(vt imm), pred, xform>, Operand<vt> {
   let PrintMethod = "print"##asmop##"Operand";
+  let DecoderMethod = "decode"##asmop##"Operand";
   let ParserMatchClass = !cast<AsmOperandClass>(asmop);
 }
 
+// Constructs an asm operand for a PC-relative address.  SIZE says how
+// many bits there are.
+class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"##size> {
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parsePCRel"##size;
+}
+
+// Constructs an operand for a PC-relative address with address type VT.
+// ASMOP is the associated asm operand.
+class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
+  let PrintMethod = "printPCRelOperand";
+  let ParserMatchClass = asmop;
+}
+
 // Constructs both a DAG pattern and instruction operand for a PC-relative
-// address with address size VT.  SELF is the name of the operand.
-class PCRelAddress<ValueType vt, string self>
+// address with address size VT.  SELF is the name of the operand and
+// ASMOP is the associated asm operand.
+class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop>
   : ComplexPattern<vt, 1, "selectPCRelAddress", [z_pcrel_wrapper]>,
-    Operand<vt> {
+    PCRelOperand<vt, asmop> {
   let MIOperandInfo = (ops !cast<Operand>(self));
 }
 
@@ -45,11 +61,14 @@ class AddressAsmOperand<string format, string bitsize, string dispsize>
 }
 
 // Constructs both a DAG pattern and instruction operand for an addressing mode.
-// The mode is selected by custom code in selectTYPE...SUFFIX().  The address
-// registers have BITSIZE bits and displacements have DISPSIZE bits.  NUMOPS is
-// the number of operands that make up an address and OPERANDS lists the types
-// of those operands using (ops ...).  FORMAT is the type of addressing mode,
-// which needs to match the names used in AddressAsmOperand.
+// The mode is selected by custom code in select<TYPE><DISPSIZE><SUFFIX>(),
+// encoded by custom code in get<FORMAT><DISPSIZE>Encoding() and decoded
+// by custom code in decode<TYPE><BITSIZE>Disp<DISPSIZE>Operand().
+// The address registers have BITSIZE bits and displacements have
+// DISPSIZE bits.  NUMOPS is the number of operands that make up an
+// address and OPERANDS lists the types of those operands using (ops ...).
+// FORMAT is the type of addressing mode, which needs to match the names
+// used in AddressAsmOperand.
 class AddressingMode<string type, string bitsize, string dispsize,
                      string suffix, int numops, string format, dag operands>
   : ComplexPattern<!cast<ValueType>("i"##bitsize), numops,
@@ -57,6 +76,8 @@ class AddressingMode<string type, string bitsize, string dispsize,
                    [add, sub, or, frameindex, z_adjdynalloc]>,
     Operand<!cast<ValueType>("i"##bitsize)> {
   let PrintMethod = "print"##format##"Operand";
+  let EncoderMethod = "get"##format##dispsize##"Encoding";
+  let DecoderMethod = "decode"##format##bitsize##"Disp"##dispsize##"Operand";
   let MIOperandInfo = operands;
   let ParserMatchClass =
     !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize);
@@ -334,30 +355,39 @@ def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>;
 // Symbolic address operands
 //===----------------------------------------------------------------------===//
 
+// PC-relative asm operands.
+def PCRel16 : PCRelAsmOperand<"16">;
+def PCRel32 : PCRelAsmOperand<"32">;
+
 // PC-relative offsets of a basic block.  The offset is sign-extended
 // and multiplied by 2.
-def brtarget16 : Operand<OtherVT> {
+def brtarget16 : PCRelOperand<OtherVT, PCRel16> {
   let EncoderMethod = "getPC16DBLEncoding";
+  let DecoderMethod = "decodePC16DBLOperand";
 }
-def brtarget32 : Operand<OtherVT> {
+def brtarget32 : PCRelOperand<OtherVT, PCRel32> {
   let EncoderMethod = "getPC32DBLEncoding";
+  let DecoderMethod = "decodePC32DBLOperand";
 }
 
 // A PC-relative offset of a global value.  The offset is sign-extended
 // and multiplied by 2.
-def pcrel32 : PCRelAddress<i64, "pcrel32"> {
+def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> {
   let EncoderMethod = "getPC32DBLEncoding";
+  let DecoderMethod = "decodePC32DBLOperand";
 }
 
 // A PC-relative offset of a global value when the value is used as a
 // call target.  The offset is sign-extended and multiplied by 2.
-def pcrel16call : PCRelAddress<i64, "pcrel16call"> {
+def pcrel16call : PCRelAddress<i64, "pcrel16call", PCRel16> {
   let PrintMethod = "printCallOperand";
   let EncoderMethod = "getPLT16DBLEncoding";
+  let DecoderMethod = "decodePC16DBLOperand";
 }
-def pcrel32call : PCRelAddress<i64, "pcrel32call"> {
+def pcrel32call : PCRelAddress<i64, "pcrel32call", PCRel32> {
   let PrintMethod = "printCallOperand";
   let EncoderMethod = "getPLT32DBLEncoding";
+  let DecoderMethod = "decodePC32DBLOperand";
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 8c4c456ef5..17450ee53e 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -33,6 +33,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
        "-f32:32-f64:64-f128:64-a0:8:16-n32:64"),
     InstrInfo(*this), TLInfo(*this), TSInfo(*this),
     FrameLowering(*this, Subtarget) {
+  initAsmInfo();
 }
 
 namespace {
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 019a670083..263eb5ed9c 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1196,6 +1196,7 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
         }
       }
       assert (Found && "Unable to rewrite ImmDisp.");
+      (void)Found;
     } else {
       // We have a symbolic and an immediate displacement, but no displacement
       // before the bracketed expression.  Put the immediate displacement
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 226ebca8cb..d5aab8e0a2 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -263,7 +263,7 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
   return X;
 }
 
-static MCAsmInfo *createX86MCAsmInfo(StringRef TT) {
+static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
   Triple TheTriple(TT);
   bool is64Bit = TheTriple.getArch() == Triple::x86_64;
 
@@ -290,14 +290,16 @@ static MCAsmInfo *createX86MCAsmInfo(StringRef TT) {
   int stackGrowth = is64Bit ? -8 : -4;
 
   // Initial state of the frame pointer is esp+stackGrowth.
-  MachineLocation Dst(MachineLocation::VirtualFP);
-  MachineLocation Src(is64Bit ? X86::RSP : X86::ESP, stackGrowth);
-  MAI->addInitialFrameState(0, Dst, Src);
+  unsigned StackPtr = is64Bit ? X86::RSP : X86::ESP;
+  MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(
+      0, MRI.getDwarfRegNum(StackPtr, true), -stackGrowth);
+  MAI->addInitialFrameState(Inst);
 
   // Add return address to move list
-  MachineLocation CSDst(is64Bit ? X86::RSP : X86::ESP, stackGrowth);
-  MachineLocation CSSrc(is64Bit ? X86::RIP : X86::EIP);
-  MAI->addInitialFrameState(0, CSDst, CSSrc);
+  unsigned InstPtr = is64Bit ? X86::RIP : X86::EIP;
+  MCCFIInstruction Inst2 = MCCFIInstruction::createOffset(
+      0, MRI.getDwarfRegNum(InstPtr, true), stackGrowth);
+  MAI->addInitialFrameState(Inst2);
 
   return MAI;
 }
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 49721df7c1..07314a092c 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -357,21 +357,21 @@ defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw,
 defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b,
                                    MMX_INTALU_ITINS>;
 defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w,
-                                   MMX_INTALU_ITINS, 1>;
+                                   MMX_INTALU_ITINS>;
 defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d,
-                                   MMX_INTALU_ITINS, 1>;
+                                   MMX_INTALU_ITINS>;
 defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q,
-                                   MMX_INTALUQ_ITINS, 1>;
+                                   MMX_INTALUQ_ITINS>;
 
 defm MMX_PSUBSB  : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b,
-                                   MMX_INTALU_ITINS, 1>;
+                                   MMX_INTALU_ITINS>;
 defm MMX_PSUBSW  : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w,
-                                   MMX_INTALU_ITINS, 1>;
+                                   MMX_INTALU_ITINS>;
 
 defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b,
-                                   MMX_INTALU_ITINS, 1>;
+                                   MMX_INTALU_ITINS>;
 defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w,
-                                   MMX_INTALU_ITINS, 1>;
+                                   MMX_INTALU_ITINS>;
 
 defm MMX_PHSUBW  : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w,
                                    MMX_PHADDSUBW>;
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 00fa47f80b..0422a61fb8 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -49,6 +49,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
     TLInfo(*this),
     TSInfo(*this),
     JITInfo(*this) {
+  initAsmInfo();
 }
 
 void X86_64TargetMachine::anchor() { }
@@ -69,6 +70,7 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
     TLInfo(*this),
     TSInfo(*this),
     JITInfo(*this) {
+  initAsmInfo();
 }
 
 /// X86TargetMachine ctor - Create an X86 target.
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index e38da34a81..10bb6dfa92 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -51,13 +51,13 @@ static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU,
   return X;
 }
 
-static MCAsmInfo *createXCoreMCAsmInfo(StringRef TT) {
+static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI,
+                                       StringRef TT) {
   MCAsmInfo *MAI = new XCoreMCAsmInfo(TT);
 
   // Initial state of the frame pointer is SP.
-  MachineLocation Dst(MachineLocation::VirtualFP);
-  MachineLocation Src(XCore::SP, 0);
-  MAI->addInitialFrameState(0, Dst, Src);
+  MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, XCore::SP, 0);
+  MAI->addInitialFrameState(Inst);
 
   return MAI;
 }
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 07e5fff141..3ef1520c71 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -33,6 +33,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
     FrameLowering(Subtarget),
     TLInfo(*this),
     TSInfo(*this) {
+  initAsmInfo();
 }
 
 namespace {
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 0ef900e2b9..4a9cb27b03 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -3323,8 +3323,6 @@ bool GlobalOpt::runOnModule(Module &M) {
   // Try to find the llvm.globalctors list.
   GlobalVariable *GlobalCtors = FindGlobalCtors(M);
 
-  Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
-
   bool LocalChange = true;
   while (LocalChange) {
     LocalChange = false;
@@ -3342,7 +3340,9 @@ bool GlobalOpt::runOnModule(Module &M) {
     // Resolve aliases, when possible.
     LocalChange |= OptimizeGlobalAliases(M);
 
-    // Try to remove trivial global destructors.
+    // Try to remove trivial global destructors if they are not removed
+    // already.
+    Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
     if (CXAAtExitFn)
       LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
 
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 87d56214a3..51ca29bc07 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -846,7 +846,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
 /// FP value and:
 ///    1) 1/C is exact, or
 ///    2) reciprocal is allowed.
-/// If the convertion was successful, the simplified expression "X * 1/C" is
+/// If the conversion was successful, the simplified expression "X * 1/C" is
 /// returned; otherwise, NULL is returned.
 ///
 static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 43e2e20035..4bf25facc6 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -107,6 +107,12 @@ namespace {
       return std::make_pair(Vector.begin() + Pair.first->second, false);
     }
 
+    iterator find(const KeyT &Key) {
+      typename MapTy::iterator It = Map.find(Key);
+      if (It == Map.end()) return Vector.end();
+      return Vector.begin() + It->second;
+    }
+
     const_iterator find(const KeyT &Key) const {
       typename MapTy::const_iterator It = Map.find(Key);
       if (It == Map.end()) return Vector.end();
@@ -253,6 +259,40 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
   return false;
 }
 
+/// This is a wrapper around getUnderlyingObjCPtr along the lines of
+/// GetUnderlyingObjects except that it returns early when it sees the first
+/// alloca.
+static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) {
+  SmallPtrSet<const Value *, 4> Visited;
+  SmallVector<const Value *, 4> Worklist;
+  Worklist.push_back(V);
+  do {
+    const Value *P = Worklist.pop_back_val();
+    P = GetUnderlyingObjCPtr(P);
+
+    if (isa<AllocaInst>(P))
+      return true;
+
+    if (!Visited.insert(P))
+      continue;
+
+    if (const SelectInst *SI = dyn_cast<const SelectInst>(P)) {
+      Worklist.push_back(SI->getTrueValue());
+      Worklist.push_back(SI->getFalseValue());
+      continue;
+    }
+
+    if (const PHINode *PN = dyn_cast<const PHINode>(P)) {
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        Worklist.push_back(PN->getIncomingValue(i));
+      continue;
+    }
+  } while (!Worklist.empty());
+
+  return false;
+}
+
+
 /// @}
 ///
 /// \defgroup ARCOpt ARC Optimization.
@@ -300,18 +340,18 @@ STATISTIC(NumNoops,       "Number of no-op objc calls eliminated");
 STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
 STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
 STATISTIC(NumRets,        "Number of return value forwarding "
-                          "retain+autoreleaes eliminated");
+                          "retain+autoreleases eliminated");
 STATISTIC(NumRRs,         "Number of retain+release paths eliminated");
 STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
+#ifndef NDEBUG
 STATISTIC(NumRetainsBeforeOpt,
-          "Number of retains before optimization.");
+          "Number of retains before optimization");
 STATISTIC(NumReleasesBeforeOpt,
-          "Number of releases before optimization.");
-#ifndef NDEBUG
+          "Number of releases before optimization");
 STATISTIC(NumRetainsAfterOpt,
-          "Number of retains after optimization.");
+          "Number of retains after optimization");
 STATISTIC(NumReleasesAfterOpt,
-          "Number of releases after optimization.");
+          "Number of releases after optimization");
 #endif
 
 namespace {
@@ -414,8 +454,18 @@ namespace {
     /// sequence.
     SmallPtrSet<Instruction *, 2> ReverseInsertPts;
 
+    /// Does this pointer have multiple owners?
+    ///
+    /// In the presence of multiple owners with the same provenance caused by
+    /// allocas, we can not assume that the frontend will emit balanced code
+    /// since it could put the release on the pointer loaded from the
+    /// alloca. This confuses the optimizer so we must be more conservative in
+    /// that case.
+    bool MultipleOwners;
+
     RRInfo() :
-      KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {}
+      KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0),
+      MultipleOwners(false) {}
 
     void clear();
 
@@ -428,6 +478,7 @@ namespace {
 void RRInfo::clear() {
   KnownSafe = false;
   IsTailCallRelease = false;
+  MultipleOwners = false;
   ReleaseMetadata = 0;
   Calls.clear();
   ReverseInsertPts.clear();
@@ -457,10 +508,12 @@ namespace {
                  Seq(S_None) {}
 
     void SetKnownPositiveRefCount() {
+      DEBUG(dbgs() << "Setting Known Positive.\n");
       KnownPositiveRefCount = true;
     }
 
     void ClearKnownPositiveRefCount() {
+      DEBUG(dbgs() << "Clearing Known Positive.\n");
       KnownPositiveRefCount = false;
     }
 
@@ -516,6 +569,7 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
     RRI.IsTailCallRelease = RRI.IsTailCallRelease &&
                             Other.RRI.IsTailCallRelease;
     RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
+    RRI.MultipleOwners |= Other.RRI.MultipleOwners;
 
     // Merge the insert point sets. If there are any differences,
     // that makes this a partial merge.
@@ -587,14 +641,26 @@ namespace {
     /// definition.
     void SetAsExit()  { BottomUpPathCount = 1; }
 
+    /// Attempt to find the PtrState object describing the top down state for
+    /// pointer Arg. Return a new initialized PtrState describing the top down
+    /// state for Arg if we do not find one.
     PtrState &getPtrTopDownState(const Value *Arg) {
       return PerPtrTopDown[Arg];
     }
 
+    /// Attempt to find the PtrState object describing the bottom up state for
+    /// pointer Arg. Return a new initialized PtrState describing the bottom up
+    /// state for Arg if we do not find one.
     PtrState &getPtrBottomUpState(const Value *Arg) {
       return PerPtrBottomUp[Arg];
     }
 
+    /// Attempt to find the PtrState object describing the bottom up state for
+    /// pointer Arg.
+    ptr_iterator findPtrBottomUpState(const Value *Arg) {
+      return PerPtrBottomUp.find(Arg);
+    }
+
     void clearBottomUpPointers() {
       PerPtrBottomUp.clear();
     }
@@ -1440,11 +1506,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     case IC_RetainBlock:
       // If we strength reduce an objc_retainBlock to an objc_retain, continue
       // onto the objc_retain peephole optimizations. Otherwise break.
-      if (!OptimizeRetainBlockCall(F, Inst, Class))
-        break;
-      // FALLTHROUGH
-    case IC_Retain:
-      ++NumRetainsBeforeOpt;
+      OptimizeRetainBlockCall(F, Inst, Class);
       break;
     case IC_RetainRV:
       if (OptimizeRetainRVCall(F, Inst))
@@ -1453,9 +1515,6 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     case IC_AutoreleaseRV:
       OptimizeAutoreleaseRVCall(F, Inst, Class);
       break;
-    case IC_Release:
-      ++NumReleasesBeforeOpt;
-      break;
     }
 
     // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
@@ -1866,6 +1925,28 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
   case IC_None:
     // These are irrelevant.
     return NestingDetected;
+  case IC_User:
+    // If we have a store into an alloca of a pointer we are tracking, the
+    // pointer has multiple owners implying that we must be more conservative.
+    //
+    // This comes up in the context of a pointer being ``KnownSafe''. In the
+    // presense of a block being initialized, the frontend will emit the
+    // objc_retain on the original pointer and the release on the pointer loaded
+    // from the alloca. The optimizer will through the provenance analysis
+    // realize that the two are related, but since we only require KnownSafe in
+    // one direction, will match the inner retain on the original pointer with
+    // the guard release on the original pointer. This is fixed by ensuring that
+    // in the presense of allocas we only unconditionally remove pointers if
+    // both our retain and our release are KnownSafe.
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand())) {
+        BBState::ptr_iterator I = MyStates.findPtrBottomUpState(
+          StripPointerCastsAndObjCCalls(SI->getValueOperand()));
+        if (I != MyStates.bottom_up_ptr_end())
+          I->second.RRI.MultipleOwners = true;
+      }
+    }
+    break;
   default:
     break;
   }
@@ -2412,8 +2493,10 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
                                   bool KnownSafe,
                                   bool &AnyPairsCompletelyEliminated) {
   // If a pair happens in a region where it is known that the reference count
-  // is already incremented, we can similarly ignore possible decrements.
+  // is already incremented, we can similarly ignore possible decrements unless
+  // we are dealing with a retainable object with multiple provenance sources.
   bool KnownSafeTD = true, KnownSafeBU = true;
+  bool MultipleOwners = false;
 
   // Connect the dots between the top-down-collected RetainsToMove and
   // bottom-up-collected ReleasesToMove to form sets of related calls.
@@ -2432,6 +2515,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
       assert(It != Retains.end());
       const RRInfo &NewRetainRRI = It->second;
       KnownSafeTD &= NewRetainRRI.KnownSafe;
+      MultipleOwners |= NewRetainRRI.MultipleOwners;
       for (SmallPtrSet<Instruction *, 2>::const_iterator
              LI = NewRetainRRI.Calls.begin(),
              LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
@@ -2525,9 +2609,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
     if (NewRetains.empty()) break;
   }
 
-  // If the pointer is known incremented or nested, we can safely delete the
-  // pair regardless of what's between them.
-  if (KnownSafeTD || KnownSafeBU) {
+  // If the pointer is known incremented in 1 direction and we do not have
+  // MultipleOwners, we can safely remove the retain/releases. Otherwise we need
+  // to be known safe in both directions.
+  bool UnconditionallySafe = (KnownSafeTD && KnownSafeBU) ||
+    ((KnownSafeTD || KnownSafeBU) && !MultipleOwners);
+  if (UnconditionallySafe) {
     RetainsToMove.ReverseInsertPts.clear();
     ReleasesToMove.ReverseInsertPts.clear();
     NewCount = 0;
@@ -3050,6 +3137,12 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
 
   PA.setAA(&getAnalysis<AliasAnalysis>());
 
+#ifndef NDEBUG
+  if (AreStatisticsEnabled()) {
+    GatherStatistics(F, false);
+  }
+#endif
+
   // This pass performs several distinct transformations. As a compile-time aid
   // when compiling code that isn't ObjC, skip these if the relevant ObjC
   // library functions aren't declared.
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0dd6abb1ae..58a1a74655 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -318,6 +318,93 @@ private:
   ValueMap WidenMap;
 };
 
+/// \brief Check if conditionally executed loads are hoistable.
+///
+/// This class has two functions: isHoistableLoad and canHoistAllLoads.
+/// isHoistableLoad should be called on all load instructions that are executed
+/// conditionally. After all conditional loads are processed, the client should
+/// call canHoistAllLoads to determine if all of the conditional executed loads
+/// have an unconditional memory access to the same memory address in the loop.
+class LoadHoisting {
+  typedef SmallPtrSet<Value *, 8> MemorySet;
+
+  Loop *TheLoop;
+  DominatorTree *DT;
+  MemorySet CondLoadAddrSet;
+
+public:
+  LoadHoisting(Loop *L, DominatorTree *D) : TheLoop(L), DT(D) {}
+
+  /// \brief Check if the instruction is a load with a identifiable address.
+  bool isHoistableLoad(Instruction *L);
+
+  /// \brief Check if all of the conditional loads are hoistable because there
+  /// exists an unconditional memory access to the same address in the loop.
+  bool canHoistAllLoads();
+};
+
+bool LoadHoisting::isHoistableLoad(Instruction *L) {
+  LoadInst *LI = dyn_cast<LoadInst>(L);
+  if (!LI)
+    return false;
+
+  CondLoadAddrSet.insert(LI->getPointerOperand());
+  return true;
+}
+
+static void addMemAccesses(BasicBlock *BB, SmallPtrSet<Value *, 8> &Set) {
+  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+    Instruction *I = &*BI;
+    Value *Addr = 0;
+
+    // Try a load.
+    LoadInst *LI = dyn_cast<LoadInst>(I);
+    if (LI) {
+      Addr = LI->getPointerOperand();
+      Set.insert(Addr);
+      continue;
+    }
+
+    // Try a store.
+    StoreInst *SI = dyn_cast<StoreInst>(I);
+    if (!SI)
+      continue;
+
+    Addr = SI->getPointerOperand();
+    Set.insert(Addr);
+  }
+}
+
+bool LoadHoisting::canHoistAllLoads() {
+  // No conditional loads.
+  if (CondLoadAddrSet.empty())
+    return true;
+
+  MemorySet UncondMemAccesses;
+  std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
+  BasicBlock *LoopLatch = TheLoop->getLoopLatch();
+
+  // Iterate over the unconditional blocks and collect memory access addresses.
+  for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
+    BasicBlock *BB = LoopBlocks[i];
+
+    // Ignore conditional blocks.
+    if (BB != LoopLatch && !DT->dominates(BB, LoopLatch))
+      continue;
+
+    addMemAccesses(BB, UncondMemAccesses);
+  }
+
+  // And make sure there is a matching unconditional access for every
+  // conditional load.
+  for (MemorySet::iterator MI = CondLoadAddrSet.begin(),
+       ME = CondLoadAddrSet.end(); MI != ME; ++MI)
+    if (!UncondMemAccesses.count(*MI))
+      return false;
+
+  return true;
+}
+
 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
 /// to what vectorization factor.
 /// This class does not look at the profitability of vectorization, only the
@@ -337,7 +424,8 @@ public:
                             DominatorTree *DT, TargetTransformInfo* TTI,
                             AliasAnalysis *AA, TargetLibraryInfo *TLI)
       : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
-        Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false) {}
+        Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
+        LoadSpeculation(L, DT) {}
 
   /// This enum represents the kinds of reductions that we support.
   enum ReductionKind {
@@ -598,6 +686,9 @@ private:
   RuntimePointerCheck PtrRtCheck;
   /// Can we assume the absence of NaNs.
   bool HasFunNoNaNAttr;
+
+  /// Utility to determine whether loads can be speculated.
+  LoadHoisting LoadSpeculation;
 };
 
 /// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -1389,9 +1480,10 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
     case LoopVectorizationLegality::IK_NoInduction:
       llvm_unreachable("Unknown induction");
     case LoopVectorizationLegality::IK_IntInduction: {
-      // Handle the integer induction counter:
+      // Handle the integer induction counter.
       assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
-      assert(OrigPhi == OldInduction && "Unknown integer PHI");
+
+      // We have the canonical induction variable.
       if (OrigPhi == OldInduction) {
         // Create a truncated version of the resume value for the scalar loop,
         // we might have promoted the type to a larger width.
@@ -1402,11 +1494,20 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
         for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
           TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
         TruncResumeVal->addIncoming(EndValue, VecBody);
+
+        // We know what the end value is.
+        EndValue = IdxEndRoundDown;
+        // We also know which PHI node holds it.
+        ResumeIndex = ResumeVal;
+        break;
       }
-      // We know what the end value is.
-      EndValue = IdxEndRoundDown;
-      // We also know which PHI node holds it.
-      ResumeIndex = ResumeVal;
+
+      // Not the canonical induction variable - add the vector loop count to the
+      // start value.
+      Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
+                                                   II.StartValue->getType(),
+                                                   "cast.crd");
+      EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end");
       break;
     }
     case LoopVectorizationLegality::IK_ReverseIntInduction: {
@@ -2056,12 +2157,25 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
       case LoopVectorizationLegality::IK_NoInduction:
         llvm_unreachable("Unknown induction");
       case LoopVectorizationLegality::IK_IntInduction: {
-        assert(P == OldInduction && "Unexpected PHI");
-        // We might have had to extend the type.
-        Value *Trunc = Builder.CreateTrunc(Induction, P->getType());
-        Value *Broadcasted = getBroadcastInstrs(Trunc);
-        // After broadcasting the induction variable we need to make the
-        // vector consecutive by adding 0, 1, 2 ...
+        assert(P->getType() == II.StartValue->getType() && "Types must match");
+        Type *PhiTy = P->getType();
+        Value *Broadcasted;
+        if (P == OldInduction) {
+          // Handle the canonical induction variable. We might have had to
+          // extend the type.
+          Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
+        } else {
+          // Handle other induction variables that are now based on the
+          // canonical one.
+          Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
+                                                   "normalized.idx");
+          NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
+          Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
+                                          "offset.idx");
+        }
+        Broadcasted = getBroadcastInstrs(Broadcasted);
+        // After broadcasting the induction variable we need to make the vector
+        // consecutive by adding 0, 1, 2, etc.
         for (unsigned part = 0; part < UF; ++part)
           Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
         continue;
@@ -2466,11 +2580,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
           // Int inductions are special because we only allow one IV.
           if (IK == IK_IntInduction) {
-            if (Induction) {
-              DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
-              return false;
-            }
-            Induction = Phi;
+            // Use the phi node with the widest type as induction. Use the last
+            // one if there are multiple (no good reason for doing this other
+            // than it is expedient).
+            if (!Induction || PhiTy == WidestIndTy)
+              Induction = Phi;
           }
 
           DEBUG(dbgs() << "LV: Found an induction variable.\n");
@@ -3236,8 +3350,12 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB)  {
 
 bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
   for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
-    // We don't predicate loads/stores at the moment.
-    if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
+    // We might be able to hoist the load.
+    if (it->mayReadFromMemory() && !LoadSpeculation.isHoistableLoad(it))
+      return false;
+
+    // We don't predicate stores at the moment.
+    if (it->mayWriteToMemory() || it->mayThrow())
       return false;
 
     // The instructions below can trap.
@@ -3251,6 +3369,10 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
     }
   }
 
+  // Check that we can actually speculate the hoistable loads.
+  if (!LoadSpeculation.canHoistAllLoads())
+    return false;
+
   return true;
 }
 
diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp
index 55adf8a816..50d2af0f65 100644
--- a/lib/Transforms/Vectorize/VecUtils.cpp
+++ b/lib/Transforms/Vectorize/VecUtils.cpp
@@ -282,6 +282,7 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
         DEBUG(dbgs()<<"SLP: Adding to MustExtract "
               "because of a safe out of tree usage.\n");
         MustExtract.insert(*it);
+        continue;
       }
       if (Lane == -1) Lane = LaneMap[*I];
       if (Lane != LaneMap[*I]) {
@@ -610,6 +611,9 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
     GatherInstructions.push_back(Vec);
   }
 
+  for (unsigned i = 0; i < Ty->getNumElements(); ++i)
+    VectorizedValues[VL[i]] = Vec;
+
   return Vec;
 }
 
@@ -617,6 +621,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
   Value *V = vectorizeTree_rec(VL, VF);
 
   Instruction *LastInstr = GetLastInstr(VL, VL.size());
+  int LastInstrIdx = InstrIdx[LastInstr];
   IRBuilder<> Builder(LastInstr);
   for (ValueSet::iterator it = MustExtract.begin(), e = MustExtract.end();
        it != e; ++it) {
@@ -625,7 +630,16 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
     assert(LaneMap.count(I) && "Unable to find the lane for the external use");
     Value *Idx = Builder.getInt32(LaneMap[I]);
     Value *Extract = Builder.CreateExtractElement(Vec, Idx);
-    I->replaceAllUsesWith(Extract);
+    bool Replaced = false;
+    for (Value::use_iterator U = I->use_begin(), UE = U->use_end(); U != UE;
+         ++U) {
+      Instruction *UI = cast<Instruction>(*U);
+      if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx)
+        UI->replaceUsesOfWith(I ,Extract);
+      Replaced = true;
+    }
+    assert(Replaced && "Must replace at least one outside user");
+    (void)Replaced;
   }
 
   // We moved some instructions around. We have to number them again
@@ -633,6 +647,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
   numberInstructions();
   MustScalarize.clear();
   MustExtract.clear();
+  VectorizedValues.clear();
   return V;
 }
 
@@ -690,7 +705,10 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
     IRBuilder<> Builder(GetLastInstr(VL, VF));
     CastInst *CI = dyn_cast<CastInst>(VL0);
     Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
-    VectorizedValues[VL0] = V;
+
+    for (int i = 0; i < VF; ++i)
+      VectorizedValues[VL[i]] = V;
+
     return V;
   }
   case Instruction::Add:
@@ -713,16 +731,19 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
   case Instruction::Xor: {
     ValueList LHSVL, RHSVL;
     for (int i = 0; i < VF; ++i) {
-      RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
-      LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
+      LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+      RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
     }
 
-    Value *RHS = vectorizeTree_rec(RHSVL, VF);
     Value *LHS = vectorizeTree_rec(LHSVL, VF);
+    Value *RHS = vectorizeTree_rec(RHSVL, VF);
     IRBuilder<> Builder(GetLastInstr(VL, VF));
     BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
-    Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
-    VectorizedValues[VL0] = V;
+    Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS,RHS);
+
+    for (int i = 0; i < VF; ++i)
+      VectorizedValues[VL[i]] = V;
+
     return V;
   }
   case Instruction::Load: {
@@ -739,7 +760,10 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
                                           VecTy->getPointerTo());
     LI = Builder.CreateLoad(VecPtr);
     LI->setAlignment(Alignment);
-    VectorizedValues[VL0] = LI;
+
+    for (int i = 0; i < VF; ++i)
+      VectorizedValues[VL[i]] = LI;
+
     return LI;
   }
   case Instruction::Store: {
@@ -762,9 +786,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
     return 0;
   }
   default:
-    Value *S = Scalarize(VL, VecTy);
-    VectorizedValues[VL0] = S;
-    return S;
+    return Scalarize(VL, VecTy);
   }
 }