summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2013-05-30 13:19:42 +0000
committerTim Northover <tnorthover@apple.com>2013-05-30 13:19:42 +0000
commit15983b80a0ceb224b74d2ee5ef53d3eed37dc03b (patch)
tree04df22eecc061cf808aa3a7a8a33ba02dc2b8ebf
parent52d65ab72dcdb3b5de8b84743537355067819c31 (diff)
downloadllvm-15983b80a0ceb224b74d2ee5ef53d3eed37dc03b.tar.gz
llvm-15983b80a0ceb224b74d2ee5ef53d3eed37dc03b.tar.bz2
llvm-15983b80a0ceb224b74d2ee5ef53d3eed37dc03b.tar.xz
X86: use sub-register sequences for MOV*r0 operations
Instead of having a bunch of separate MOV8r0, MOV16r0, ... pseudo-instructions, it's better to use a single MOV32r0 (which will expand to "xorl %reg, %reg") and obtain other sizes with EXTRACT_SUBREG and SUBREG_TO_REG. The encoding is smaller and partial register updates can sometimes be avoided. Until recently, this sequence was a barrier to rematerialization though. That should now be fixed so it's an appropriate time to make the change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182928 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86FastISel.cpp31
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp32
-rw-r--r--lib/Target/X86/X86InstrCompiler.td36
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp65
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp10
-rw-r--r--test/CodeGen/X86/2009-02-26-MachineLICMBug.ll6
-rw-r--r--test/CodeGen/X86/2011-09-14-valcoalesce.ll37
-rw-r--r--test/CodeGen/X86/fast-isel-divrem-x86-64.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-divrem.ll4
-rw-r--r--test/CodeGen/X86/hoist-common.ll2
-rw-r--r--test/CodeGen/X86/licm-dominance.ll2
-rw-r--r--test/CodeGen/X86/licm-nested.ll2
-rw-r--r--test/CodeGen/X86/lsr-interesting-step.ll2
-rw-r--r--test/CodeGen/X86/lsr-static-addr.ll3
-rw-r--r--test/CodeGen/X86/sibcall.ll4
-rw-r--r--test/CodeGen/X86/tail-opts.ll4
-rw-r--r--test/CodeGen/X86/zext-extract_subreg.ll2
-rw-r--r--test/CodeGen/X86/zext-sext.ll5
18 files changed, 125 insertions, 124 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index eeb934f378..d5423cec22 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1294,8 +1294,8 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
{ &X86::GR16RegClass, X86::AX, X86::DX, {
{ X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
{ X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
- { X86::DIV16r, X86::MOV16r0, Copy, X86::AX, U }, // UDiv
- { X86::DIV16r, X86::MOV16r0, Copy, X86::DX, U }, // URem
+ { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
+ { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
}
}, // i16
{ &X86::GR32RegClass, X86::EAX, X86::EDX, {
@@ -1308,8 +1308,8 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
{ &X86::GR64RegClass, X86::RAX, X86::RDX, {
{ X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
{ X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
- { X86::DIV64r, X86::MOV64r0, Copy, X86::RAX, U }, // UDiv
- { X86::DIV64r, X86::MOV64r0, Copy, X86::RDX, U }, // URem
+ { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
+ { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
}
}, // i64
};
@@ -1355,9 +1355,28 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
if (OpEntry.IsOpSigned)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(OpEntry.OpSignExtend));
- else
+ else {
+ unsigned Zero32 = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(OpEntry.OpSignExtend), TypeEntry.HighInReg);
+ TII.get(X86::MOV32r0), Zero32);
+
+ // Copy the zero into the appropriate sub/super/identical physical
+ // register. Unfortunately the operations needed are not uniform enough to
+ // fit neatly into the table above.
+ if (VT.SimpleTy == MVT::i16) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), TypeEntry.HighInReg)
+ .addReg(Zero32, 0, X86::sub_16bit);
+ } else if (VT.SimpleTy == MVT::i32) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), TypeEntry.HighInReg)
+ .addReg(Zero32);
+ } else if (VT.SimpleTy == MVT::i64) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
+ .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
+ }
+ }
}
// Generate the DIV/IDIV instruction.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index a801c8af30..0b6940ec71 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2363,27 +2363,24 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
unsigned LoReg, HiReg, ClrReg;
- unsigned ClrOpcode, SExtOpcode;
+ unsigned SExtOpcode;
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8:
LoReg = X86::AL; ClrReg = HiReg = X86::AH;
- ClrOpcode = 0;
SExtOpcode = X86::CBW;
break;
case MVT::i16:
LoReg = X86::AX; HiReg = X86::DX;
- ClrOpcode = X86::MOV16r0; ClrReg = X86::DX;
+ ClrReg = X86::DX;
SExtOpcode = X86::CWD;
break;
case MVT::i32:
LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
- ClrOpcode = X86::MOV32r0;
SExtOpcode = X86::CDQ;
break;
case MVT::i64:
LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
- ClrOpcode = X86::MOV64r0;
SExtOpcode = X86::CQO;
break;
}
@@ -2421,8 +2418,29 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
} else {
// Zero out the high part, effectively zero extending the input.
- SDValue ClrNode =
- SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
+ SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
+ switch (NVT.getSimpleVT().SimpleTy) {
+ case MVT::i16:
+ ClrNode =
+ SDValue(CurDAG->getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
+ CurDAG->getTargetConstant(X86::sub_16bit, MVT::i32)),
+ 0);
+ break;
+ case MVT::i32:
+ break;
+ case MVT::i64:
+ ClrNode =
+ SDValue(CurDAG->getMachineNode(
+ TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
+ CurDAG->getTargetConstant(0, MVT::i64), ClrNode,
+ CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
+ 0);
+ break;
+ default:
+ llvm_unreachable("Unexpected division source");
+ }
+
InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
ClrNode, InFlag).getValue(1);
}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 52bcd2c6ff..213993a56c 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -216,39 +216,21 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
// Alias Instructions
//===----------------------------------------------------------------------===//
-// Alias instructions that map movr0 to xor.
+// Alias instruction mapping movr0 to xor.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
// FIXME: Set encoding to pseudo.
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
- isCodeGenOnly = 1 in {
-def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
- [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
-
-// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
-// encoding and avoids a partial-register update sometimes, but doing so
-// at isel time interferes with rematerialization in the current register
-// allocator. For now, this is rewritten when the instruction is lowered
-// to an MCInst.
-def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
- "",
- [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize,
- Sched<[WriteZero]>;
-
-// FIXME: Set encoding to pseudo.
+ isCodeGenOnly = 1 in
def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
-}
-// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
-// smaller encoding, but doing so at isel time interferes with rematerialization
-// in the current register allocator. For now, this is rewritten when the
-// instruction is lowered to an MCInst.
-// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
-// when we have a better way to specify isel priority.
-let Defs = [EFLAGS], isCodeGenOnly=1,
- AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
+// Other widths can also make use of the 32-bit xor, which may have a smaller
+// encoding and avoid partial register updates.
+def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
+def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
+def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
+ let AddedComplexity = 20;
+}
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 4f0c2f2049..5ed8604a39 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1713,37 +1713,16 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
const TargetRegisterInfo &TRI) const {
- DebugLoc DL = Orig->getDebugLoc();
-
- // MOV32r0 etc. are implemented with xor which clobbers condition code.
- // Re-materialize them as movri instructions to avoid side effects.
- bool Clone = true;
+ // MOV32r0 is implemented with a xor which clobbers condition code.
+ // Re-materialize it as movri instructions to avoid side effects.
unsigned Opc = Orig->getOpcode();
- switch (Opc) {
- default: break;
- case X86::MOV8r0:
- case X86::MOV16r0:
- case X86::MOV32r0:
- case X86::MOV64r0: {
- if (!isSafeToClobberEFLAGS(MBB, I)) {
- switch (Opc) {
- default: llvm_unreachable("Unreachable!");
- case X86::MOV8r0: Opc = X86::MOV8ri; break;
- case X86::MOV16r0: Opc = X86::MOV16ri; break;
- case X86::MOV32r0: Opc = X86::MOV32ri; break;
- case X86::MOV64r0: Opc = X86::MOV64ri64i32; break;
- }
- Clone = false;
- }
- break;
- }
- }
-
- if (Clone) {
+ if (Opc == X86::MOV32r0 && !isSafeToClobberEFLAGS(MBB, I)) {
+ DebugLoc DL = Orig->getDebugLoc();
+ BuildMI(MBB, I, DL, get(X86::MOV32ri)).addOperand(Orig->getOperand(0))
+ .addImm(0);
+ } else {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
MBB.insert(I, MI);
- } else {
- BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0);
}
MachineInstr *NewMI = prior(I);
@@ -3364,10 +3343,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// MOV32r0 etc. are implemented with xor which clobbers condition code.
// They are safe to move up, if the definition to EFLAGS is dead and
// earlier instructions do not read or write EFLAGS.
- if (!Movr0Inst && (Instr->getOpcode() == X86::MOV8r0 ||
- Instr->getOpcode() == X86::MOV16r0 ||
- Instr->getOpcode() == X86::MOV32r0 ||
- Instr->getOpcode() == X86::MOV64r0) &&
+ if (!Movr0Inst && Instr->getOpcode() == X86::MOV32r0 &&
Instr->registerDefIsDead(X86::EFLAGS, TRI)) {
Movr0Inst = Instr;
continue;
@@ -3760,18 +3736,11 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
isTwoAddrFold = true;
} else if (i == 0) { // If operand 0
- unsigned Opc = 0;
- switch (MI->getOpcode()) {
- default: break;
- case X86::MOV64r0: Opc = X86::MOV64mi32; break;
- case X86::MOV32r0: Opc = X86::MOV32mi; break;
- case X86::MOV16r0: Opc = X86::MOV16mi; break;
- case X86::MOV8r0: Opc = X86::MOV8mi; break;
+ if (MI->getOpcode() == X86::MOV32r0) {
+ NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+ if (NewMI)
+ return NewMI;
}
- if (Opc)
- NewMI = MakeM0Inst(*this, Opc, MOs, MI);
- if (NewMI)
- return NewMI;
OpcodeTablePtr = &RegOp2MemOpTable0;
} else if (i == 1) {
@@ -4157,13 +4126,9 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
} else if (OpNum == 0) { // If operand 0
- switch (Opc) {
- case X86::MOV8r0:
- case X86::MOV16r0:
- case X86::MOV32r0:
- case X86::MOV64r0: return true;
- default: break;
- }
+ if (Opc == X86::MOV32r0)
+ return true;
+
OpcodeTablePtr = &RegOp2MemOpTable0;
} else if (OpNum == 1) {
OpcodeTablePtr = &RegOp2MemOpTable1;
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 2da1f490e0..b423e1e7d1 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -389,18 +389,8 @@ ReSimplify:
"LEA has segment specified!");
break;
case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break;
- case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
- case X86::MOV16r0:
- LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
- LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
- break;
- case X86::MOV64r0:
- LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV64r0 -> MOV32r0
- LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
- break;
-
// Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
// if one of the registers is extended, but other isn't.
case X86::VMOVAPDrr:
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 68a9fafb6d..8174fbdc9e 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "5 machine-licm"
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "4 machine-licm"
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s
; rdar://6627786
; rdar://7792037
@@ -15,11 +15,11 @@ entry:
bb4: ; preds = %bb.i, %bb26, %bb4, %entry
; CHECK: %bb4
-; CHECK: xorb
+; CHECK: xorl
; CHECK: callq
; CHECK: movq
; CHECK: xorl
-; CHECK: xorb
+; CHECK: xorl
%0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind ; <i32> [#uses=0]
%ins = or i64 %p, 2097152 ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
index 6d91109daa..4e84e84c1a 100644
--- a/test/CodeGen/X86/2011-09-14-valcoalesce.ll
+++ b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
@@ -14,22 +14,47 @@
; Prior to fixing PR10920 401.bzip miscompile, the coalescer would
; consider vreg1 and vreg27 to be copies of the same value. It would
; then remove one of the critical edge copes, which cannot safely be removed.
-;
+
+; There are two obvious ways the register-allocator could go here, either
+; reusing the pre-addition register later, or the post-addition one. Currently,
+; it does the latter, so we check:
+
; CHECK: # %while.body85.i
; CHECK-NOT: # %
; CHECK-NOT: add
; CHECK: movl %[[POSTR:e[abcdxi]+]], %[[PRER:e[abcdxi]+]]
; CHECK: addl %{{.*}}, %[[POSTR]]
; CHECK: # %while.end.i
-; CHECK: movl %[[POSTR]], %[[USER:e[abcdxi]+]]
+; CHECK-NOT: movl %[[POSTR]]
; CHECK: # %land.lhs.true.i
-; CHECK: movl %[[POSTR]], %[[USER]]
+; CHECK-NOT: movl %[[POSTR]]
; CHECK: # %land.lhs.true103.i
-; CHECK: movl %[[POSTR]], %[[USER]]
+; CHECK-NOT: movl %[[POSTR]]
; CHECK: # %if.then108.i
-; [[PRER] live out, so nothing on this path should define it.
-; CHECK-NOT: , %[[PRER]]
+; CHECK: movl %[[PRER]], %[[POSTR]]
; CHECK: # %if.end117.i
+; and use it for fprintf:
+; CHECK: movl %[[POSTR]], 12(%esp)
+
+
+; If it ever reverts to reusing the pre-addition register then we should
+; *probably* check this instead (it certainly worked last time):
+
+; CHECKALT: # %while.body85.i
+; CHECKALT-NOT: # %
+; CHECKALT-NOT: add
+; CHECKALT: movl %[[POSTR:e[abcdxi]+]], %[[PRER:e[abcdxi]+]]
+; CHECKALT: addl %{{.*}}, %[[POSTR]]
+; CHECKALT: # %while.end.i
+; CHECKALT: movl %[[POSTR]], %[[USER:e[abcdxi]+]]
+; CHECKALT: # %land.lhs.true.i
+; CHECKALT: movl %[[POSTR]], %[[USER]]
+; CHECKALT: # %land.lhs.true103.i
+; CHECKALT: movl %[[POSTR]], %[[USER]]
+; CHECKALT: # %if.then108.i
+; [[PRER] live out, so nothing on this path should define it.
+; CHECKALT-NOT: , %[[PRER]]
+; CHECKALT: # %if.end117.i
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
diff --git a/test/CodeGen/X86/fast-isel-divrem-x86-64.ll b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
index 45494f139e..f2afaa06bb 100644
--- a/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
define i64 @test_sdiv64(i64 %dividend, i64 %divisor) nounwind {
entry:
diff --git a/test/CodeGen/X86/fast-isel-divrem.ll b/test/CodeGen/X86/fast-isel-divrem.ll
index 7aba7f7b79..1a309a1ebc 100644
--- a/test/CodeGen/X86/fast-isel-divrem.ll
+++ b/test/CodeGen/X86/fast-isel-divrem.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
-; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
define i8 @test_sdiv8(i8 %dividend, i8 %divisor) nounwind {
entry:
diff --git a/test/CodeGen/X86/hoist-common.ll b/test/CodeGen/X86/hoist-common.ll
index 72e17c065b..cdfdea3d98 100644
--- a/test/CodeGen/X86/hoist-common.ll
+++ b/test/CodeGen/X86/hoist-common.ll
@@ -8,7 +8,7 @@
define zeroext i1 @t(i32 %c) nounwind ssp {
entry:
; CHECK: t:
-; CHECK: xorb %al, %al
+; CHECK: xorl %eax, %eax
; CHECK: test
; CHECK: je
%tobool = icmp eq i32 %c, 0
diff --git a/test/CodeGen/X86/licm-dominance.ll b/test/CodeGen/X86/licm-dominance.ll
index 019f8a32b6..7e3c6fdf95 100644
--- a/test/CodeGen/X86/licm-dominance.ll
+++ b/test/CodeGen/X86/licm-dominance.ll
@@ -2,7 +2,7 @@
; MachineLICM should check dominance before hoisting instructions.
; CHECK: ## in Loop:
-; CHECK-NEXT: xorb %al, %al
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll
index 66074fb368..083ae0875e 100644
--- a/test/CodeGen/X86/licm-nested.ll
+++ b/test/CodeGen/X86/licm-nested.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 3
+; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 4
; MachineLICM should be able to hoist the symbolic addresses out of
; the inner loops.
diff --git a/test/CodeGen/X86/lsr-interesting-step.ll b/test/CodeGen/X86/lsr-interesting-step.ll
index d1de0510a0..d4a7ac7da1 100644
--- a/test/CodeGen/X86/lsr-interesting-step.ll
+++ b/test/CodeGen/X86/lsr-interesting-step.ll
@@ -5,7 +5,7 @@
; CHECK: BB0_3:
; CHECK-NEXT: movb $0, flags(%rdx)
-; CHECK-NEXT: addq %rcx, %rdx
+; CHECK-NEXT: addq %rax, %rdx
; CHECK-NEXT: cmpq $8192, %rdx
; CHECK-NEXT: jl
diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll
index b2aea90500..1bac790f57 100644
--- a/test/CodeGen/X86/lsr-static-addr.ll
+++ b/test/CodeGen/X86/lsr-static-addr.ll
@@ -10,8 +10,9 @@
; CHECK-NEXT: movsd
; CHECK-NEXT: incq %rax
-; ATOM: movsd .LCPI0_0(%rip), %xmm0
+
; ATOM: xorl %eax, %eax
+; ATOM: movsd .LCPI0_0(%rip), %xmm0
; ATOM: align
; ATOM-NEXT: BB0_2:
; ATOM-NEXT: movsd A(,%rax,8)
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index ceb79ea927..de98cb4bb6 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -279,7 +279,7 @@ entry:
; 32: jmp {{_?}}bar5
; 64: t17:
-; 64: xorb %al, %al
+; 64: xorl %eax, %eax
; 64: jmp {{_?}}bar5
tail call void (...)* @bar5() nounwind
ret void
@@ -295,7 +295,7 @@ entry:
; 32: fstp %st(0)
; 64: t18:
-; 64: xorb %al, %al
+; 64: xorl %eax, %eax
; 64: jmp {{_?}}bar6
%0 = tail call double (...)* @bar6() nounwind
ret void
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index 6e20af5866..75a728cb3d 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -118,7 +118,7 @@ altret:
; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
; CHECK-NEXT: jbe .LBB2_2
; CHECK-NEXT: .LBB2_4:
-; CHECK-NEXT: xorb %al, %al
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_2:
; CHECK-NEXT: movb $1, %al
@@ -161,7 +161,7 @@ bb30:
; CHE: jmp .LBB3_11
; CHE-NEXT: .LBB3_9:
; CHE-NEXT: movq 8(%rax), %rax
-; CHE-NEXT: xorb %dl, %dl
+; CHE-NEXT: xorl %edx, %edx
; CHE-NEXT: movb 16(%rax), %al
; CHE-NEXT: cmpb $16, %al
; CHE-NEXT: je .LBB3_11
diff --git a/test/CodeGen/X86/zext-extract_subreg.ll b/test/CodeGen/X86/zext-extract_subreg.ll
index 168b898f12..7fa0574fc4 100644
--- a/test/CodeGen/X86/zext-extract_subreg.ll
+++ b/test/CodeGen/X86/zext-extract_subreg.ll
@@ -14,7 +14,7 @@ if.end: ; preds = %if.end.i
; CHECK: movl (%{{.*}}), [[REG:%[a-z]+]]
; CHECK-NOT: movl [[REG]], [[REG]]
; CHECK-NEXT: testl [[REG]], [[REG]]
-; CHECK-NEXT: xorb
+; CHECK-NEXT: xorl
%tmp138 = select i1 undef, i32 0, i32 %tmp7.i
%tmp867 = zext i32 %tmp138 to i64
br label %while.cond
diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll
index 6432ae38ff..e4264aef4c 100644
--- a/test/CodeGen/X86/zext-sext.ll
+++ b/test/CodeGen/X86/zext-sext.ll
@@ -1,8 +1,9 @@
-; XFAIL: *
-; ...should pass. See PR12324: misched bringup
; RUN: llc < %s -march=x86-64 | FileCheck %s
; <rdar://problem/8006248>
+; This randomly started passing after an unrelated change, if it fails again it
+; might be worth looking at PR12324: misched bringup.
+
@llvm.used = appending global [1 x i8*] [i8* bitcast (void ([40 x i16]*, i32*, i16**, i64*)* @func to i8*)], section "llvm.metadata"
define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind {