Re-instate MOV64r0 and MOV16r0, with adjustments to work with the

new AsmPrinter. This is perhaps less elegant than describing them in terms of MOV32r0 and subreg operations, but it allows the current register to rematerialize them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@93158 91177308-0d34-0410-b5e6-96231b3b80d8
author: Dan Gohman <gohman@apple.com> 2010-01-11 17:37:57 +0000
committer: Dan Gohman <gohman@apple.com> 2010-01-11 17:37:57 +0000
commit: 71c25b7d7bdf9b49dd70965c7486ce930b846aac (patch)
tree: b1ce33512c39fef7514399ffbf7a46d3710d26a6
parent: 3e8e4fd40c9d5b3afad2d4c7c1d5d4374b752651 (diff)
download: llvm-71c25b7d7bdf9b49dd70965c7486ce930b846aac.tar.gz
llvm-71c25b7d7bdf9b49dd70965c7486ce930b846aac.tar.bz2
llvm-71c25b7d7bdf9b49dd70965c7486ce930b846aac.tar.xz
6 files changed, 57 insertions, 37 deletions
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 1015b69247..a4939af1b8 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -399,6 +399,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     OutMI.setOpcode(X86::MOVZX32rm16);
     lower_subreg32(&OutMI, 0);
     break;
+  case X86::MOV16r0:
+    OutMI.setOpcode(X86::MOV32r0);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOV64r0:
+    OutMI.setOpcode(X86::MOV32r0);
+    lower_subreg32(&OutMI, 0);
+    break;
   }
 }
 
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index c795b62050..e2a53d1118 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1873,7 +1873,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
 
     unsigned LoReg, HiReg, ClrReg;
     unsigned ClrOpcode, SExtOpcode;
-    EVT ClrVT = NVT;
     switch (NVT.getSimpleVT().SimpleTy) {
     default: llvm_unreachable("Unsupported VT!");
     case MVT::i8:
@@ -1883,7 +1882,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       break;
     case MVT::i16:
       LoReg = X86::AX;  HiReg = X86::DX;
-      ClrOpcode  = X86::MOV32r0;  ClrReg = X86::EDX;  ClrVT = MVT::i32;
+      ClrOpcode  = X86::MOV16r0; ClrReg = X86::DX;
       SExtOpcode = X86::CWD;
       break;
     case MVT::i32:
@@ -1893,7 +1892,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       break;
     case MVT::i64:
       LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
-      ClrOpcode  = ~0U; // NOT USED.
+      ClrOpcode  = X86::MOV64r0;
       SExtOpcode = X86::CQO;
       break;
     }
@@ -1932,24 +1931,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
           SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
       } else {
         // Zero out the high part, effectively zero extending the input.
-        SDValue ClrNode;
-
-        if (NVT.getSimpleVT() == MVT::i64) {
-          ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32),
-                            0);
-          // We just did a 32-bit clear, insert it into a 64-bit register to
-          // clear the whole 64-bit reg.
-          SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64);
-          SDValue SubRegNo =
-            CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
-          ClrNode =
-            SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl,
-                                           MVT::i64, Zero, ClrNode, SubRegNo),
-                    0);
-        } else {
-          ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, ClrVT), 0);
-        }
-
+        SDValue ClrNode =
+          SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
         InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
                                       ClrNode, InFlag).getValue(1);
       }
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index c6d32747c4..1835c8feb6 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1598,17 +1598,20 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
 // Alias Instructions
 //===----------------------------------------------------------------------===//
 
-// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's
-// equivalent due to implicit zero-extending, and it sometimes has a smaller
-// encoding.
+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
+// smaller encoding, but doing so at isel time interferes with rematerialization
+// in the current register allocator. For now, this is rewritten when the
+// instruction is lowered to an MCInst.
 // FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
 // when we have a better way to specify isel priority.
-let AddedComplexity = 1 in
-def : Pat<(i64 0),
-          (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>;
-
+let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins),
+                 "",
+                 [(set GR64:$dst, 0)]>;
 
-// Materialize i64 constant where top 32-bits are zero.
+// Materialize i64 constant where top 32-bits are zero. This could theoretically
+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
+// that would make it more difficult to rematerialize.
 let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
                         "", [(set GR64:$dst, i64immZExt32:$src)]>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 9600cffa91..d458213438 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1019,12 +1019,16 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
   switch (Opc) {
   default: break;
   case X86::MOV8r0:
-  case X86::MOV32r0: {
+  case X86::MOV16r0:
+  case X86::MOV32r0:
+  case X86::MOV64r0: {
     if (!isSafeToClobberEFLAGS(MBB, I)) {
       switch (Opc) {
       default: break;
       case X86::MOV8r0:  Opc = X86::MOV8ri;  break;
+      case X86::MOV16r0: Opc = X86::MOV16ri; break;
       case X86::MOV32r0: Opc = X86::MOV32ri; break;
+      case X86::MOV64r0: Opc = X86::MOV64ri; break;
       }
       Clone = false;
     }
@@ -2291,8 +2295,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     OpcodeTablePtr = &RegOp2MemOpTable2Addr;
     isTwoAddrFold = true;
   } else if (i == 0) { // If operand 0
-    if (MI->getOpcode() == X86::MOV32r0)
+    if (MI->getOpcode() == X86::MOV64r0)
+      NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV32r0)
       NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV16r0)
+      NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
     else if (MI->getOpcode() == X86::MOV8r0)
       NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
     if (NewMI)
@@ -2560,7 +2568,9 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
   } else if (OpNum == 0) { // If operand 0
     switch (Opc) {
     case X86::MOV8r0:
+    case X86::MOV16r0:
     case X86::MOV32r0:
+    case X86::MOV64r0:
       return true;
     default: break;
     }
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 3b8f6ec790..341bfb2f74 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -3718,18 +3718,21 @@ let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
 def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
                  "xor{b}\t$dst, $dst",
                  [(set GR8:$dst, 0)]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+                 "",
+                 [(set GR16:$dst, 0)]>, OpSize;
                  
 def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),
                  "xor{l}\t$dst, $dst",
                  [(set GR32:$dst, 0)]>;
 }
 
-// Use xorl instead of xorw since we don't care about the high 16 bits,
-// it's smaller, and it avoids a partial-register update.
-let AddedComplexity = 1 in
-def : Pat<(i16 0),
-          (EXTRACT_SUBREG (MOV32r0), x86_subreg_16bit)>;
-
 //===----------------------------------------------------------------------===//
 // Thread Local Storage Instructions
 //
diff --git a/test/CodeGen/X86/remat-mov-0.ll b/test/CodeGen/X86/remat-mov-0.ll
new file mode 100644
index 0000000000..c4f768ca52
--- /dev/null
+++ b/test/CodeGen/X86/remat-mov-0.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | grep {xorl	%edi, %edi} | count 4
+
+; CodeGen should remat the zero instead of spilling it.
+
+declare void @foo(i64 %p)
+
+define void @bar() nounwind {
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  ret void
+}
author	Dan Gohman <gohman@apple.com>	2010-01-11 17:37:57 +0000
committer	Dan Gohman <gohman@apple.com>	2010-01-11 17:37:57 +0000
commit	71c25b7d7bdf9b49dd70965c7486ce930b846aac (patch)
tree	b1ce33512c39fef7514399ffbf7a46d3710d26a6
parent	3e8e4fd40c9d5b3afad2d4c7c1d5d4374b752651 (diff)
download	llvm-71c25b7d7bdf9b49dd70965c7486ce930b846aac.tar.gz llvm-71c25b7d7bdf9b49dd70965c7486ce930b846aac.tar.bz2 llvm-71c25b7d7bdf9b49dd70965c7486ce930b846aac.tar.xz