diff options
author | Tim Northover <tnorthover@apple.com> | 2013-05-30 13:19:42 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2013-05-30 13:19:42 +0000 |
commit | 15983b80a0ceb224b74d2ee5ef53d3eed37dc03b (patch) | |
tree | 04df22eecc061cf808aa3a7a8a33ba02dc2b8ebf | |
parent | 52d65ab72dcdb3b5de8b84743537355067819c31 (diff) | |
download | llvm-15983b80a0ceb224b74d2ee5ef53d3eed37dc03b.tar.gz llvm-15983b80a0ceb224b74d2ee5ef53d3eed37dc03b.tar.bz2 llvm-15983b80a0ceb224b74d2ee5ef53d3eed37dc03b.tar.xz |
X86: use sub-register sequences for MOV*r0 operations
Instead of having a bunch of separate MOV8r0, MOV16r0, ... pseudo-instructions,
it's better to use a single MOV32r0 (which will expand to "xorl %reg, %reg")
and obtain other sizes with EXTRACT_SUBREG and SUBREG_TO_REG. The encoding is
smaller and partial register updates can sometimes be avoided.
Until recently, this sequence was a barrier to rematerialization though. That
should now be fixed so it's an appropriate time to make the change.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182928 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 31 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelDAGToDAG.cpp | 32 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrCompiler.td | 36 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 65 | ||||
-rw-r--r-- | lib/Target/X86/X86MCInstLower.cpp | 10 | ||||
-rw-r--r-- | test/CodeGen/X86/2009-02-26-MachineLICMBug.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/2011-09-14-valcoalesce.ll | 37 | ||||
-rw-r--r-- | test/CodeGen/X86/fast-isel-divrem-x86-64.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/fast-isel-divrem.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/hoist-common.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/licm-dominance.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/licm-nested.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/lsr-interesting-step.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/lsr-static-addr.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/X86/sibcall.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/tail-opts.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/zext-extract_subreg.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/zext-sext.ll | 5 |
18 files changed, 125 insertions, 124 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index eeb934f378..d5423cec22 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1294,8 +1294,8 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) { { &X86::GR16RegClass, X86::AX, X86::DX, { { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem - { X86::DIV16r, X86::MOV16r0, Copy, X86::AX, U }, // UDiv - { X86::DIV16r, X86::MOV16r0, Copy, X86::DX, U }, // URem + { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv + { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem } }, // i16 { &X86::GR32RegClass, X86::EAX, X86::EDX, { @@ -1308,8 +1308,8 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) { { &X86::GR64RegClass, X86::RAX, X86::RDX, { { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem - { X86::DIV64r, X86::MOV64r0, Copy, X86::RAX, U }, // UDiv - { X86::DIV64r, X86::MOV64r0, Copy, X86::RDX, U }, // URem + { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv + { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem } }, // i64 }; @@ -1355,9 +1355,28 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) { if (OpEntry.IsOpSigned) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpEntry.OpSignExtend)); - else + else { + unsigned Zero32 = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(OpEntry.OpSignExtend), TypeEntry.HighInReg); + TII.get(X86::MOV32r0), Zero32); + + // Copy the zero into the appropriate sub/super/identical physical + // register. Unfortunately the operations needed are not uniform enough to + // fit neatly into the table above. + if (VT.SimpleTy == MVT::i16) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), TypeEntry.HighInReg) + .addReg(Zero32, 0, X86::sub_16bit); + } else if (VT.SimpleTy == MVT::i32) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), TypeEntry.HighInReg) + .addReg(Zero32); + } else if (VT.SimpleTy == MVT::i64) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) + .addImm(0).addReg(Zero32).addImm(X86::sub_32bit); + } + } } // Generate the DIV/IDIV instruction. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index a801c8af30..0b6940ec71 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2363,27 +2363,24 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } unsigned LoReg, HiReg, ClrReg; - unsigned ClrOpcode, SExtOpcode; + unsigned SExtOpcode; switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: LoReg = X86::AL; ClrReg = HiReg = X86::AH; - ClrOpcode = 0; SExtOpcode = X86::CBW; break; case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; - ClrOpcode = X86::MOV16r0; ClrReg = X86::DX; + ClrReg = X86::DX; SExtOpcode = X86::CWD; break; case MVT::i32: LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; - ClrOpcode = X86::MOV32r0; SExtOpcode = X86::CDQ; break; case MVT::i64: LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; - ClrOpcode = X86::MOV64r0; SExtOpcode = X86::CQO; break; } @@ -2421,8 +2418,29 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0); } else { // Zero out the high part, effectively zero extending the input. - SDValue ClrNode = - SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); + SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0); + switch (NVT.getSimpleVT().SimpleTy) { + case MVT::i16: + ClrNode = + SDValue(CurDAG->getMachineNode( + TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode, + CurDAG->getTargetConstant(X86::sub_16bit, MVT::i32)), + 0); + break; + case MVT::i32: + break; + case MVT::i64: + ClrNode = + SDValue(CurDAG->getMachineNode( + TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, + CurDAG->getTargetConstant(0, MVT::i64), ClrNode, + CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), + 0); + break; + default: + llvm_unreachable("Unexpected division source"); + } + InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, ClrNode, InFlag).getValue(1); } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 52bcd2c6ff..213993a56c 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -216,39 +216,21 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), // Alias Instructions //===----------------------------------------------------------------------===// -// Alias instructions that map movr0 to xor. +// Alias instruction mapping movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. // FIXME: Set encoding to pseudo. let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, - isCodeGenOnly = 1 in { -def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", - [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; - -// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller -// encoding and avoids a partial-register update sometimes, but doing so -// at isel time interferes with rematerialization in the current register -// allocator. For now, this is rewritten when the instruction is lowered -// to an MCInst. -def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), - "", - [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize, - Sched<[WriteZero]>; - -// FIXME: Set encoding to pseudo. + isCodeGenOnly = 1 in def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; -} -// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a -// smaller encoding, but doing so at isel time interferes with rematerialization -// in the current register allocator. For now, this is rewritten when the -// instruction is lowered to an MCInst. -// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove -// when we have a better way to specify isel priority. -let Defs = [EFLAGS], isCodeGenOnly=1, - AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; +// Other widths can also make use of the 32-bit xor, which may have a smaller +// encoding and avoid partial register updates. +def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>; +def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>; +def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> { + let AddedComplexity = 20; +} // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 4f0c2f2049..5ed8604a39 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1713,37 +1713,16 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, const TargetRegisterInfo &TRI) const { - DebugLoc DL = Orig->getDebugLoc(); - - // MOV32r0 etc. are implemented with xor which clobbers condition code. - // Re-materialize them as movri instructions to avoid side effects. - bool Clone = true; + // MOV32r0 is implemented with a xor which clobbers condition code. + // Re-materialize it as movri instructions to avoid side effects. unsigned Opc = Orig->getOpcode(); - switch (Opc) { - default: break; - case X86::MOV8r0: - case X86::MOV16r0: - case X86::MOV32r0: - case X86::MOV64r0: { - if (!isSafeToClobberEFLAGS(MBB, I)) { - switch (Opc) { - default: llvm_unreachable("Unreachable!"); - case X86::MOV8r0: Opc = X86::MOV8ri; break; - case X86::MOV16r0: Opc = X86::MOV16ri; break; - case X86::MOV32r0: Opc = X86::MOV32ri; break; - case X86::MOV64r0: Opc = X86::MOV64ri64i32; break; - } - Clone = false; - } - break; - } - } - - if (Clone) { + if (Opc == X86::MOV32r0 && !isSafeToClobberEFLAGS(MBB, I)) { + DebugLoc DL = Orig->getDebugLoc(); + BuildMI(MBB, I, DL, get(X86::MOV32ri)).addOperand(Orig->getOperand(0)) + .addImm(0); + } else { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); MBB.insert(I, MI); - } else { - BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0); } MachineInstr *NewMI = prior(I); @@ -3364,10 +3343,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // MOV32r0 etc. are implemented with xor which clobbers condition code. // They are safe to move up, if the definition to EFLAGS is dead and // earlier instructions do not read or write EFLAGS. - if (!Movr0Inst && (Instr->getOpcode() == X86::MOV8r0 || - Instr->getOpcode() == X86::MOV16r0 || - Instr->getOpcode() == X86::MOV32r0 || - Instr->getOpcode() == X86::MOV64r0) && + if (!Movr0Inst && Instr->getOpcode() == X86::MOV32r0 && Instr->registerDefIsDead(X86::EFLAGS, TRI)) { Movr0Inst = Instr; continue; @@ -3760,18 +3736,11 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr = &RegOp2MemOpTable2Addr; isTwoAddrFold = true; } else if (i == 0) { // If operand 0 - unsigned Opc = 0; - switch (MI->getOpcode()) { - default: break; - case X86::MOV64r0: Opc = X86::MOV64mi32; break; - case X86::MOV32r0: Opc = X86::MOV32mi; break; - case X86::MOV16r0: Opc = X86::MOV16mi; break; - case X86::MOV8r0: Opc = X86::MOV8mi; break; + if (MI->getOpcode() == X86::MOV32r0) { + NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); + if (NewMI) + return NewMI; } - if (Opc) - NewMI = MakeM0Inst(*this, Opc, MOs, MI); - if (NewMI) - return NewMI; OpcodeTablePtr = &RegOp2MemOpTable0; } else if (i == 1) { @@ -4157,13 +4126,9 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, if (isTwoAddr && NumOps >= 2 && OpNum < 2) { OpcodeTablePtr = &RegOp2MemOpTable2Addr; } else if (OpNum == 0) { // If operand 0 - switch (Opc) { - case X86::MOV8r0: - case X86::MOV16r0: - case X86::MOV32r0: - case X86::MOV64r0: return true; - default: break; - } + if (Opc == X86::MOV32r0) + return true; + OpcodeTablePtr = &RegOp2MemOpTable0; } else if (OpNum == 1) { OpcodeTablePtr = &RegOp2MemOpTable1; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 2da1f490e0..b423e1e7d1 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -389,18 +389,8 @@ ReSimplify: "LEA has segment specified!"); break; case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break; - case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break; case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break; - case X86::MOV16r0: - LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0 - LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr - break; - case X86::MOV64r0: - LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV64r0 -> MOV32r0 - LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr - break; - // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B // if one of the registers is extended, but other isn't. case X86::VMOVAPDrr: diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index 68a9fafb6d..8174fbdc9e 100644 --- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "5 machine-licm" +; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "4 machine-licm" ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s ; rdar://6627786 ; rdar://7792037 @@ -15,11 +15,11 @@ entry: bb4: ; preds = %bb.i, %bb26, %bb4, %entry ; CHECK: %bb4 -; CHECK: xorb +; CHECK: xorl ; CHECK: callq ; CHECK: movq ; CHECK: xorl -; CHECK: xorb +; CHECK: xorl %0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind ; <i32> [#uses=0] %ins = or i64 %p, 2097152 ; <i64> [#uses=1] diff --git a/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/test/CodeGen/X86/2011-09-14-valcoalesce.ll index 6d91109daa..4e84e84c1a 100644 --- a/test/CodeGen/X86/2011-09-14-valcoalesce.ll +++ b/test/CodeGen/X86/2011-09-14-valcoalesce.ll @@ -14,22 +14,47 @@ ; Prior to fixing PR10920 401.bzip miscompile, the coalescer would ; consider vreg1 and vreg27 to be copies of the same value. It would ; then remove one of the critical edge copes, which cannot safely be removed. -; + +; There are two obvious ways the register-allocator could go here, either +; reusing the pre-addition register later, or the post-addition one. Currently, +; it does the latter, so we check: + ; CHECK: # %while.body85.i ; CHECK-NOT: # % ; CHECK-NOT: add ; CHECK: movl %[[POSTR:e[abcdxi]+]], %[[PRER:e[abcdxi]+]] ; CHECK: addl %{{.*}}, %[[POSTR]] ; CHECK: # %while.end.i -; CHECK: movl %[[POSTR]], %[[USER:e[abcdxi]+]] +; CHECK-NOT: movl %[[POSTR]] ; CHECK: # %land.lhs.true.i -; CHECK: movl %[[POSTR]], %[[USER]] +; CHECK-NOT: movl %[[POSTR]] ; CHECK: # %land.lhs.true103.i -; CHECK: movl %[[POSTR]], %[[USER]] +; CHECK-NOT: movl %[[POSTR]] ; CHECK: # %if.then108.i -; [[PRER] live out, so nothing on this path should define it. -; CHECK-NOT: , %[[PRER]] +; CHECK: movl %[[PRER]], %[[POSTR]] ; CHECK: # %if.end117.i +; and use it for fprintf: +; CHECK: movl %[[POSTR]], 12(%esp) + + +; If it ever reverts to reusing the pre-addition register then we should +; *probably* check this instead (it certainly worked last time): + +; CHECKALT: # %while.body85.i +; CHECKALT-NOT: # % +; CHECKALT-NOT: add +; CHECKALT: movl %[[POSTR:e[abcdxi]+]], %[[PRER:e[abcdxi]+]] +; CHECKALT: addl %{{.*}}, %[[POSTR]] +; CHECKALT: # %while.end.i +; CHECKALT: movl %[[POSTR]], %[[USER:e[abcdxi]+]] +; CHECKALT: # %land.lhs.true.i +; CHECKALT: movl %[[POSTR]], %[[USER]] +; CHECKALT: # %land.lhs.true103.i +; CHECKALT: movl %[[POSTR]], %[[USER]] +; CHECKALT: # %if.then108.i +; [[PRER] live out, so nothing on this path should define it. +; CHECKALT-NOT: , %[[PRER]] +; CHECKALT: # %if.end117.i target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" diff --git a/test/CodeGen/X86/fast-isel-divrem-x86-64.ll b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll index 45494f139e..f2afaa06bb 100644 --- a/test/CodeGen/X86/fast-isel-divrem-x86-64.ll +++ b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s define i64 @test_sdiv64(i64 %dividend, i64 %divisor) nounwind { entry: diff --git a/test/CodeGen/X86/fast-isel-divrem.ll b/test/CodeGen/X86/fast-isel-divrem.ll index 7aba7f7b79..1a309a1ebc 100644 --- a/test/CodeGen/X86/fast-isel-divrem.ll +++ b/test/CodeGen/X86/fast-isel-divrem.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s -; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s define i8 @test_sdiv8(i8 %dividend, i8 %divisor) nounwind { entry: diff --git a/test/CodeGen/X86/hoist-common.ll b/test/CodeGen/X86/hoist-common.ll index 72e17c065b..cdfdea3d98 100644 --- a/test/CodeGen/X86/hoist-common.ll +++ b/test/CodeGen/X86/hoist-common.ll @@ -8,7 +8,7 @@ define zeroext i1 @t(i32 %c) nounwind ssp { entry: ; CHECK: t: -; CHECK: xorb %al, %al +; CHECK: xorl %eax, %eax ; CHECK: test ; CHECK: je %tobool = icmp eq i32 %c, 0 diff --git a/test/CodeGen/X86/licm-dominance.ll b/test/CodeGen/X86/licm-dominance.ll index 019f8a32b6..7e3c6fdf95 100644 --- a/test/CodeGen/X86/licm-dominance.ll +++ b/test/CodeGen/X86/licm-dominance.ll @@ -2,7 +2,7 @@ ; MachineLICM should check dominance before hoisting instructions. ; CHECK: ## in Loop: -; CHECK-NEXT: xorb %al, %al +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll index 66074fb368..083ae0875e 100644 --- a/test/CodeGen/X86/licm-nested.ll +++ b/test/CodeGen/X86/licm-nested.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 3 +; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 4 ; MachineLICM should be able to hoist the symbolic addresses out of ; the inner loops. diff --git a/test/CodeGen/X86/lsr-interesting-step.ll b/test/CodeGen/X86/lsr-interesting-step.ll index d1de0510a0..d4a7ac7da1 100644 --- a/test/CodeGen/X86/lsr-interesting-step.ll +++ b/test/CodeGen/X86/lsr-interesting-step.ll @@ -5,7 +5,7 @@ ; CHECK: BB0_3: ; CHECK-NEXT: movb $0, flags(%rdx) -; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: addq %rax, %rdx ; CHECK-NEXT: cmpq $8192, %rdx ; CHECK-NEXT: jl diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll index b2aea90500..1bac790f57 100644 --- a/test/CodeGen/X86/lsr-static-addr.ll +++ b/test/CodeGen/X86/lsr-static-addr.ll @@ -10,8 +10,9 @@ ; CHECK-NEXT: movsd ; CHECK-NEXT: incq %rax -; ATOM: movsd .LCPI0_0(%rip), %xmm0 + ; ATOM: xorl %eax, %eax +; ATOM: movsd .LCPI0_0(%rip), %xmm0 ; ATOM: align ; ATOM-NEXT: BB0_2: ; ATOM-NEXT: movsd A(,%rax,8) diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll index ceb79ea927..de98cb4bb6 100644 --- a/test/CodeGen/X86/sibcall.ll +++ b/test/CodeGen/X86/sibcall.ll @@ -279,7 +279,7 @@ entry: ; 32: jmp {{_?}}bar5 ; 64: t17: -; 64: xorb %al, %al +; 64: xorl %eax, %eax ; 64: jmp {{_?}}bar5 tail call void (...)* @bar5() nounwind ret void @@ -295,7 +295,7 @@ entry: ; 32: fstp %st(0) ; 64: t18: -; 64: xorb %al, %al +; 64: xorl %eax, %eax ; 64: jmp {{_?}}bar6 %0 = tail call double (...)* @bar6() nounwind ret void diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll index 6e20af5866..75a728cb3d 100644 --- a/test/CodeGen/X86/tail-opts.ll +++ b/test/CodeGen/X86/tail-opts.ll @@ -118,7 +118,7 @@ altret: ; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} ; CHECK-NEXT: jbe .LBB2_2 ; CHECK-NEXT: .LBB2_4: -; CHECK-NEXT: xorb %al, %al +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: movb $1, %al @@ -161,7 +161,7 @@ bb30: ; CHE: jmp .LBB3_11 ; CHE-NEXT: .LBB3_9: ; CHE-NEXT: movq 8(%rax), %rax -; CHE-NEXT: xorb %dl, %dl +; CHE-NEXT: xorl %edx, %edx ; CHE-NEXT: movb 16(%rax), %al ; CHE-NEXT: cmpb $16, %al ; CHE-NEXT: je .LBB3_11 diff --git a/test/CodeGen/X86/zext-extract_subreg.ll b/test/CodeGen/X86/zext-extract_subreg.ll index 168b898f12..7fa0574fc4 100644 --- a/test/CodeGen/X86/zext-extract_subreg.ll +++ b/test/CodeGen/X86/zext-extract_subreg.ll @@ -14,7 +14,7 @@ if.end: ; preds = %if.end.i ; CHECK: movl (%{{.*}}), [[REG:%[a-z]+]] ; CHECK-NOT: movl [[REG]], [[REG]] ; CHECK-NEXT: testl [[REG]], [[REG]] -; CHECK-NEXT: xorb +; CHECK-NEXT: xorl %tmp138 = select i1 undef, i32 0, i32 %tmp7.i %tmp867 = zext i32 %tmp138 to i64 br label %while.cond diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll index 6432ae38ff..e4264aef4c 100644 --- a/test/CodeGen/X86/zext-sext.ll +++ b/test/CodeGen/X86/zext-sext.ll @@ -1,8 +1,9 @@ -; XFAIL: * -; ...should pass. See PR12324: misched bringup ; RUN: llc < %s -march=x86-64 | FileCheck %s ; <rdar://problem/8006248> +; This randomly started passing after an unrelated change, if it fails again it +; might be worth looking at PR12324: misched bringup. + @llvm.used = appending global [1 x i8*] [i8* bitcast (void ([40 x i16]*, i32*, i16**, i64*)* @func to i8*)], section "llvm.metadata" define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind { |