5 files changed, 159 insertions, 57 deletions
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 02f5fe4952..f9979a59d5 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -204,6 +204,9 @@ namespace {
     bool SelectLEAAddr(SDValue N, SDValue &Base,
                        SDValue &Scale, SDValue &Index, SDValue &Disp,
                        SDValue &Segment);
+    bool SelectLEA64_32Addr(SDValue N, SDValue &Base,
+                            SDValue &Scale, SDValue &Index, SDValue &Disp,
+                            SDValue &Segment);
     bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
                            SDValue &Scale, SDValue &Index, SDValue &Disp,
                            SDValue &Segment);
@@ -1394,7 +1397,8 @@ bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
   // In static codegen with small code model, we can get the address of a label
   // into a register with 'movl'. TableGen has already made sure we're looking
   // at a label of some kind.
-  assert(N->getOpcode() == X86ISD::Wrapper && "Unexpected node type for MOV32ri64");
+  assert(N->getOpcode() == X86ISD::Wrapper &&
+         "Unexpected node type for MOV32ri64");
   N = N.getOperand(0);
 
   if (N->getOpcode() != ISD::TargetConstantPool &&
@@ -1408,6 +1412,43 @@ bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
   return TM.getCodeModel() == CodeModel::Small;
 }
 
+bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
+                                         SDValue &Scale, SDValue &Index,
+                                         SDValue &Disp, SDValue &Segment) {
+  if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment))
+    return false;
+
+  SDLoc DL(N);
+  RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base);
+  if (RN && RN->getReg() == 0)
+    Base = CurDAG->getRegister(0, MVT::i64);
+  else if (Base.getValueType() == MVT::i32) {
+    // Base could already be %rip, particularly in the x32 ABI.
+    Base = SDValue(CurDAG->getMachineNode(
+                       TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
+                       CurDAG->getTargetConstant(0, MVT::i64),
+                       Base,
+                       CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
+                   0);
+  }
+
+  RN = dyn_cast<RegisterSDNode>(Index);
+  if (RN && RN->getReg() == 0)
+    Index = CurDAG->getRegister(0, MVT::i64);
+  else {
+    assert(Index.getValueType() == MVT::i32 &&
+           "Expect to be extending 32-bit registers for use in LEA");
+    Index = SDValue(CurDAG->getMachineNode(
+                        TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
+                        CurDAG->getTargetConstant(0, MVT::i64),
+                        Index,
+                        CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
+                    0);
+  }
+
+  return true;
+}
+
 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
 /// mode it matches can be cost effectively emitted as an LEA instruction.
 bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 5ed8604a39..86361ff8e3 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1778,11 +1778,16 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
   bool isDead = MI->getOperand(0).isDead();
   bool isKill = MI->getOperand(1).isKill();
 
-  unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
-    ? X86::LEA64_32r : X86::LEA32r;
   MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
-  unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
   unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+  unsigned Opc, leaInReg;
+  if (TM.getSubtarget<X86Subtarget>().is64Bit()) {
+    Opc = X86::LEA64_32r;
+    leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
+  } else {
+    Opc = X86::LEA32r;
+    leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+  }
 
   // Build and insert into an implicit UNDEF value. This is OK because
   // well be shifting and then extracting the lower 16-bits.
@@ -1832,7 +1837,10 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
       // just a single insert_subreg.
       addRegReg(MIB, leaInReg, true, leaInReg, false);
     } else {
-      leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+      if (TM.getSubtarget<X86Subtarget>().is64Bit())
+        leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
+      else
+        leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
       // Build and insert into an implicit UNDEF value. This is OK because
       // well be shifting and then extracting the lower 16-bits.
       BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF),leaInReg2);
@@ -1953,15 +1961,27 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     if (!isTruncatedShiftCountForLEA(ShAmt)) return 0;
 
     // LEA can't handle ESP.
-    if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
-        !MF.getRegInfo().constrainRegClass(Src.getReg(),
-                                           &X86::GR32_NOSPRegClass))
+    bool isKill = Src.isKill();
+    unsigned SrcReg = Src.getReg();
+    if (is64Bit) {
+      unsigned NewSrc = MF.getRegInfo().createVirtualRegister(&X86::GR64_NOSPRegClass);
+      BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+        .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
+        .addOperand(Src);
+
+      SrcReg = NewSrc;
+      isKill = true;
+    } else if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+               !MF.getRegInfo().constrainRegClass(SrcReg,
+                                                  &X86::GR32_NOSPRegClass)) {
       return 0;
+    }
 
     unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
     NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
       .addOperand(Dest)
-      .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
+      .addReg(0).addImm(1 << ShAmt)
+      .addReg(SrcReg, getKillRegState(isKill)).addImm(0).addReg(0);
     break;
   }
   case X86::SHL16ri: {
@@ -1986,17 +2006,28 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
       assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
       unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
         : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
-      const TargetRegisterClass *RC = MIOpc == X86::INC64r ?
+      const TargetRegisterClass *RC = is64Bit ?
         (const TargetRegisterClass*)&X86::GR64_NOSPRegClass :
         (const TargetRegisterClass*)&X86::GR32_NOSPRegClass;
 
       // LEA can't handle RSP.
-      if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
-          !MF.getRegInfo().constrainRegClass(Src.getReg(), RC))
+      bool isKill = Src.isKill();
+      unsigned SrcReg = Src.getReg();
+      if (Opc == X86::LEA64_32r) {
+        unsigned NewSrc = MF.getRegInfo().createVirtualRegister(&X86::GR64_NOSPRegClass);
+        BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+                get(TargetOpcode::COPY))
+            .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
+            .addOperand(Src);
+
+        SrcReg = NewSrc;
+        isKill = true;
+      } else if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+                 !MF.getRegInfo().constrainRegClass(SrcReg, RC))
         return 0;
 
       NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
-                        .addOperand(Dest).addOperand(Src), 1);
+                  .addOperand(Dest).addReg(SrcReg, getKillRegState(isKill)), 1);
       break;
     }
     case X86::INC16r:
@@ -2013,16 +2044,28 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
       assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
       unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
         : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
-      const TargetRegisterClass *RC = MIOpc == X86::DEC64r ?
+      const TargetRegisterClass *RC = is64Bit ?
         (const TargetRegisterClass*)&X86::GR64_NOSPRegClass :
         (const TargetRegisterClass*)&X86::GR32_NOSPRegClass;
       // LEA can't handle RSP.
-      if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
-          !MF.getRegInfo().constrainRegClass(Src.getReg(), RC))
+      bool isKill = Src.isKill();
+      unsigned SrcReg = Src.getReg();
+      if (Opc == X86::LEA64_32r) {
+        unsigned NewSrc =
+            MF.getRegInfo().createVirtualRegister(&X86::GR64_NOSPRegClass);
+        BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+                get(TargetOpcode::COPY))
+            .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
+            .addOperand(Src);
+
+        SrcReg = NewSrc;
+        isKill = true;
+      } else if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+          !MF.getRegInfo().constrainRegClass(SrcReg, RC))
         return 0;
 
       NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
-                        .addOperand(Dest).addOperand(Src), -1);
+                 .addOperand(Dest).addReg(SrcReg, getKillRegState(isKill)), -1);
       break;
     }
     case X86::DEC16r:
@@ -2045,30 +2088,50 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
         RC = &X86::GR64_NOSPRegClass;
       } else {
         Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
-        RC = &X86::GR32_NOSPRegClass;
+        RC = is64Bit ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
       }
 
-
-      unsigned Src2 = MI->getOperand(2).getReg();
-      bool isKill2 = MI->getOperand(2).isKill();
+      unsigned SrcReg = Src.getReg();
+      unsigned isKill = Src.isKill();
+      const MachineOperand &Src2 = MI->getOperand(2);
+      unsigned Src2Reg = Src2.getReg();
+      bool isKill2 = Src2.isKill();
 
       // LEA can't handle RSP.
-      if (TargetRegisterInfo::isVirtualRegister(Src2) &&
-          !MF.getRegInfo().constrainRegClass(Src2, RC))
+      if (Opc == X86::LEA64_32r) {
+        MachineBasicBlock &MBB = *MI->getParent();
+
+        unsigned NewSrc = MF.getRegInfo().createVirtualRegister(RC);
+        BuildMI(MBB, MI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+            .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
+            .addOperand(Src);
+        SrcReg = NewSrc;
+        isKill = true;
+
+        NewSrc = MF.getRegInfo().createVirtualRegister(RC);
+        BuildMI(MBB, MI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+            .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
+            .addOperand(Src2);
+        Src2Reg = NewSrc;
+        isKill2 = true;
+      } else if (TargetRegisterInfo::isVirtualRegister(Src2Reg) &&
+                 !MF.getRegInfo().constrainRegClass(Src2Reg, RC))
         return 0;
 
       NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
                         .addOperand(Dest),
-                        Src.getReg(), Src.isKill(), Src2, isKill2);
+                        SrcReg, isKill, Src2Reg, isKill2);
 
       // Preserve undefness of the operands.
-      bool isUndef = MI->getOperand(1).isUndef();
-      bool isUndef2 = MI->getOperand(2).isUndef();
-      NewMI->getOperand(1).setIsUndef(isUndef);
-      NewMI->getOperand(3).setIsUndef(isUndef2);
+      if (!is64Bit) {
+        bool isUndef = MI->getOperand(1).isUndef();
+        bool isUndef2 = MI->getOperand(2).isUndef();
+        NewMI->getOperand(1).setIsUndef(isUndef);
+        NewMI->getOperand(3).setIsUndef(isUndef2);
+      }
 
-      if (LV && isKill2)
-        LV->replaceKillInstruction(Src2, MI, NewMI);
+      if (LV && Src2.isKill())
+        LV->replaceKillInstruction(Src2Reg, MI, NewMI);
       break;
     }
     case X86::ADD16rr:
@@ -2106,10 +2169,24 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     case X86::ADD32ri_DB:
     case X86::ADD32ri8_DB: {
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
-      unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
-      NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
-                        .addOperand(Dest).addOperand(Src),
-                        MI->getOperand(2).getImm());
+      if (is64Bit) {
+        unsigned NewSrc =
+            MF.getRegInfo().createVirtualRegister(&X86::GR64_NOSPRegClass);
+        BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+                get(TargetOpcode::COPY))
+            .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
+            .addOperand(Src);
+
+        NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64_32r))
+                              .addOperand(Dest)
+                              .addReg(NewSrc, getKillRegState(true)),
+                          MI->getOperand(2).getImm());
+      } else {
+        NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA32r))
+                              .addOperand(Dest).addOperand(Src),
+                          MI->getOperand(2).getImm());
+      }
+
       break;
     }
     case X86::ADD16ri:
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index ad26bce01b..817bd6cc34 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -523,8 +523,7 @@ def i64i8imm   : Operand<i64> {
 
 def lea64_32mem : Operand<i32> {
   let PrintMethod = "printi32mem";
-  let AsmOperandLowerMethod = "lower_lea64_32mem";
-  let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
+  let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, i8imm);
   let ParserMatchClass = X86MemAsmOperand;
 }
 
@@ -546,7 +545,7 @@ def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
                                [add, sub, mul, X86mul_imm, shl, or, frameindex],
                                []>;
 // In 64-bit mode 32-bit LEAs can use RIP-relative addressing.
-def lea64_32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
+def lea64_32addr : ComplexPattern<i32, 5, "SelectLEA64_32Addr",
                                   [add, sub, mul, X86mul_imm, shl, or,
                                    frameindex, X86WrapperRIP],
                                   []>;
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 4b6503b6f6..a453245e7c 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -225,20 +225,6 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
 }
 
 
-
-static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
-  // Convert registers in the addr mode according to subreg64.
-  for (unsigned i = 0; i != 4; ++i) {
-    if (!MI->getOperand(OpNo+i).isReg()) continue;
-
-    unsigned Reg = MI->getOperand(OpNo+i).getReg();
-    // LEAs can use RIP-relative addressing, and RIP has no sub/super register.
-    if (Reg == 0 || Reg == X86::RIP) continue;
-
-    MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
-  }
-}
-
 /// LowerUnaryToTwoAddr - R = setb   -> R = sbb R, R
 static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) {
   OutMI.setOpcode(NewOpc);
@@ -364,9 +350,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
   // Handle a few special cases to eliminate operand modifiers.
 ReSimplify:
   switch (OutMI.getOpcode()) {
-  case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
-    lower_lea64_32mem(&OutMI, 1);
-    // FALL THROUGH.
+  case X86::LEA64_32r:
   case X86::LEA64r:
   case X86::LEA16r:
   case X86::LEA32r:
diff --git a/test/CodeGen/X86/lea.ll b/test/CodeGen/X86/lea.ll
index 542135529f..87f0b0b30a 100644
--- a/test/CodeGen/X86/lea.ll
+++ b/test/CodeGen/X86/lea.ll
@@ -6,7 +6,7 @@ define i32 @test1(i32 %x) nounwind {
         %tmp2 = add i32 %tmp1, 7
         ret i32 %tmp2
 ; CHECK: test1:
-; CHECK:    leal 7(,[[A0:%rdi|%rcx]],8), %eax
+; CHECK:    leal 7(,%r[[A0:di|cx]],8), %eax
 }
 
 
@@ -28,8 +28,9 @@ bb.nph:
 bb2:
 	ret i32 %x_offs
 ; CHECK: test2:
-; CHECK:	leal	-5([[A0]]), %eax
+; CHECK: movl %e[[A0]], %eax
+; CHECK: addl $-5, %eax
 ; CHECK:	andl	$-4, %eax
 ; CHECK:	negl	%eax
-; CHECK:	leal	-4([[A0]],%rax), %eax
+; CHECK:	leal	-4(%r[[A0]],%rax), %eax
 }