summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2013-05-30 10:43:18 +0000
committerTim Northover <tnorthover@apple.com>2013-05-30 10:43:18 +0000
commitda0416b9356c9ddf3dddeeb1ad5aec4d1de70016 (patch)
treecfb6f501995747658e941b057fa02b744033beca
parent14a926f13b768ee3771bb944bbbb29529a40dbe1 (diff)
downloadllvm-da0416b9356c9ddf3dddeeb1ad5aec4d1de70016.tar.gz
llvm-da0416b9356c9ddf3dddeeb1ad5aec4d1de70016.tar.bz2
llvm-da0416b9356c9ddf3dddeeb1ad5aec4d1de70016.tar.xz
X86: change zext moves to use sub-register infrastructure.
32-bit writes on amd64 zero out the high bits of the corresponding 64-bit register. LLVM makes use of this for zero-extension, but until now relied on custom MCLowering and other code to fixup instructions. Now we have proper handling of sub-registers, this can be done by creating SUBREG_TO_REG instructions at selection-time. Should be no change in functionality. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182921 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86FastISel.cpp39
-rw-r--r--lib/Target/X86/X86InstrCompiler.td33
-rw-r--r--lib/Target/X86/X86InstrExtension.td56
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp11
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp6
5 files changed, 74 insertions, 71 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index cf44bd033b..eeb934f378 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1005,10 +1005,6 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
}
bool X86FastISel::X86SelectZExt(const Instruction *I) {
- // Handle zero-extension from i1 to i8, which is common.
- if (!I->getOperand(0)->getType()->isIntegerTy(1))
- return false;
-
EVT DstVT = TLI.getValueType(I->getType());
if (!TLI.isTypeLegal(DstVT))
return false;
@@ -1017,12 +1013,37 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
if (ResultReg == 0)
return false;
- // Set the high bits to zero.
- ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
- if (ResultReg == 0)
- return false;
+ // Handle zero-extension from i1 to i8, which is common.
+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()).getSimpleVT();
+ if (SrcVT.SimpleTy == MVT::i1) {
+ // Set the high bits to zero.
+ ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+ SrcVT = MVT::i8;
+
+ if (ResultReg == 0)
+ return false;
+ }
+
+ if (DstVT == MVT::i64) {
+ // Handle extension to 64-bits via sub-register shenanigans.
+ unsigned MovInst;
+
+ switch (SrcVT.SimpleTy) {
+ case MVT::i8: MovInst = X86::MOVZX32rr8; break;
+ case MVT::i16: MovInst = X86::MOVZX32rr16; break;
+ case MVT::i32: MovInst = X86::MOV32rr; break;
+ default: llvm_unreachable("Unexpected zext to i64 source type");
+ }
+
+ unsigned Result32 = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovInst), Result32)
+ .addReg(ResultReg);
- if (DstVT != MVT::i8) {
+ ResultReg = createResultReg(&X86::GR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::SUBREG_TO_REG),
+ ResultReg)
+ .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
+ } else if (DstVT != MVT::i8) {
ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
ResultReg, /*Kill=*/true);
if (ResultReg == 0)
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index d9ff0c63c5..52bcd2c6ff 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -1119,7 +1119,8 @@ defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
-def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(zextloadi64i1 addr:$src),
+ (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
// extload bool -> extload byte
// When extloading from 16-bit and smaller memory locations into 64-bit
@@ -1133,14 +1134,16 @@ def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
-def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
-def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
-def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
// For other extloads, use subregs, since the high contents of the register are
// defined after an extload.
+def : Pat<(extloadi64i1 addr:$src),
+ (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
+def : Pat<(extloadi64i8 addr:$src),
+ (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
+def : Pat<(extloadi64i16 addr:$src),
+ (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
def : Pat<(extloadi64i32 addr:$src),
- (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
- sub_32bit)>;
+ (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
// anyext. Define these to do an explicit zero-extend to
// avoid partial-register updates.
@@ -1152,8 +1155,10 @@ def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
def : Pat<(i32 (anyext GR16:$src)),
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
-def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
-def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
+def : Pat<(i64 (anyext GR8 :$src)),
+ (SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>;
+def : Pat<(i64 (anyext GR16:$src)),
+ (SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>;
def : Pat<(i64 (anyext GR32:$src)),
(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
@@ -1318,13 +1323,19 @@ def : Pat<(and GR16:$src1, 0xff),
// r & (2^32-1) ==> movz
def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
- (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+ (SUBREG_TO_REG (i64 0),
+ (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
+ sub_32bit)>;
// r & (2^16-1) ==> movz
def : Pat<(and GR64:$src, 0xffff),
- (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
+ (SUBREG_TO_REG (i64 0),
+ (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
+ sub_32bit)>;
// r & (2^8-1) ==> movz
def : Pat<(and GR64:$src, 0xff),
- (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
+ (SUBREG_TO_REG (i64 0),
+ (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))),
+ sub_32bit)>;
// r & (2^8-1) ==> movz
def : Pat<(and GR32:$src1, 0xff),
(MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 6dc7175357..28954c65e8 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -149,38 +149,24 @@ def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
TB, Sched<[WriteALULd]>;
-// FIXME: These should be Pat patterns.
-let isCodeGenOnly = 1 in {
-
-// Use movzbl instead of movzbq when the destination is a register; it's
-// equivalent due to implicit zero-extending, and it has a smaller encoding.
-def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
- "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
- Sched<[WriteALU]>;
-def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
- "", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>,
- TB, Sched<[WriteALULd]>;
-// Use movzwl instead of movzwq when the destination is a register; it's
-// equivalent due to implicit zero-extending, and it has a smaller encoding.
-def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
- Sched<[WriteALU]>;
-def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "", [(set GR64:$dst, (zextloadi64i16 addr:$src))],
- IIC_MOVZX>, TB, Sched<[WriteALULd]>;
-
-// There's no movzlq instruction, but movl can be used for this purpose, using
-// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
-// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
-// zero-extension, however this isn't possible when the 32-bit value is
-// defined by a truncate or is copied from something where the high bits aren't
-// necessarily all zero. In such cases, we fall back to these explicit zext
-// instructions.
-def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
- "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>,
- Sched<[WriteALU]>;
-def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
- "", [(set GR64:$dst, (zextloadi64i32 addr:$src))],
- IIC_MOVZX>, Sched<[WriteALULd]>;
-}
-
+// 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a
+// 32-bit register.
+def : Pat<(i64 (zext GR8:$src)),
+ (SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8:$src), sub_32bit)>;
+def : Pat<(zextloadi64i8 addr:$src),
+ (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
+
+def : Pat<(i64 (zext GR16:$src)),
+ (SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16:$src), sub_32bit)>;
+def : Pat<(zextloadi64i16 addr:$src),
+ (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
+
+// The preferred way to do 32-bit-to-64-bit zero extension on x86-64 is to use a
+// SUBREG_TO_REG to utilize implicit zero-extension, however this isn't possible
+// when the 32-bit value is defined by a truncate or is copied from something
+// where the high bits aren't necessarily all zero. In such cases, we fall back
+// to these explicit zext instructions.
+def : Pat<(i64 (zext GR32:$src)),
+ (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src), sub_32bit)>;
+def : Pat<(i64 (zextloadi64i32 addr:$src)),
+ (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index ed3677e6b9..4f0c2f2049 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -451,9 +451,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
{ X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
{ X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
- { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
- { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 },
- { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
{ X86::PABSBrr128, X86::PABSBrm128, TB_ALIGN_16 },
{ X86::PABSDrr128, X86::PABSDrm128, TB_ALIGN_16 },
{ X86::PABSWrr128, X86::PABSWrm128, TB_ALIGN_16 },
@@ -1381,7 +1378,6 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
case X86::MOVSX32rr8:
case X86::MOVZX32rr8:
case X86::MOVSX64rr8:
- case X86::MOVZX64rr8:
if (!TM.getSubtarget<X86Subtarget>().is64Bit())
// It's not always legal to reference the low 8-bit of the larger
// register in 32-bit mode.
@@ -1389,9 +1385,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
case X86::MOVSX32rr16:
case X86::MOVZX32rr16:
case X86::MOVSX64rr16:
- case X86::MOVZX64rr16:
- case X86::MOVSX64rr32:
- case X86::MOVZX64rr32: {
+ case X86::MOVSX64rr32: {
if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
// Be conservative.
return false;
@@ -1404,17 +1398,14 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
case X86::MOVSX32rr8:
case X86::MOVZX32rr8:
case X86::MOVSX64rr8:
- case X86::MOVZX64rr8:
SubIdx = X86::sub_8bit;
break;
case X86::MOVSX32rr16:
case X86::MOVZX32rr16:
case X86::MOVSX64rr16:
- case X86::MOVZX64rr16:
SubIdx = X86::sub_16bit;
break;
case X86::MOVSX64rr32:
- case X86::MOVZX64rr32:
SubIdx = X86::sub_32bit;
break;
}
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index a8a9fd8acc..2da1f490e0 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -388,13 +388,7 @@ ReSimplify:
assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
"LEA has segment specified!");
break;
- case X86::MOVZX64rr32: LowerSubReg32_Op0(OutMI, X86::MOV32rr); break;
- case X86::MOVZX64rm32: LowerSubReg32_Op0(OutMI, X86::MOV32rm); break;
case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break;
- case X86::MOVZX64rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
- case X86::MOVZX64rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
- case X86::MOVZX64rr16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr16); break;
- case X86::MOVZX64rm16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm16); break;
case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;