diff options
author | Juergen Ributzka <juergen@apple.com> | 2014-06-23 21:55:36 +0000 |
---|---|---|
committer | Juergen Ributzka <juergen@apple.com> | 2014-06-23 21:55:36 +0000 |
commit | 5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3 (patch) | |
tree | a0cd63f7f0967b2f2ac09a04621e8d63c09e8ca8 /lib/Target/X86 | |
parent | 1f659329b63aa1d1af2b2bfc8b174a8ccdaba2c0 (diff) | |
download | llvm-5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3.tar.gz llvm-5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3.tar.bz2 llvm-5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3.tar.xz |
[FastISel][X86] Optimize selects when the condition comes from a compare.
Optimize the select instructions sequence to use the EFLAGS directly from a
compare when possible.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211543 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 180 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.h | 5 |
3 files changed, 152 insertions, 37 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 6bccd1290f..3c9acba5cb 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -111,6 +111,8 @@ private: bool X86SelectDivRem(const Instruction *I); + bool X86FastEmitCMoveSelect(const Instruction *I); + bool X86SelectSelect(const Instruction *I); bool X86SelectTrunc(const Instruction *I); @@ -1611,50 +1613,158 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) { return true; } -bool X86FastISel::X86SelectSelect(const Instruction *I) { - MVT VT; - if (!isTypeLegal(I->getType(), VT)) +/// \brief Emit a conditional move instruction (if the are supported) to lower +/// the select. +bool X86FastISel::X86FastEmitCMoveSelect(const Instruction *I) { + MVT RetVT; + if (!isTypeLegal(I->getType(), RetVT)) return false; - // We only use cmov here, if we don't have a cmov instruction bail. - if (!Subtarget->hasCMov()) return false; - - unsigned Opc = 0; - const TargetRegisterClass *RC = nullptr; - if (VT == MVT::i16) { - Opc = X86::CMOVE16rr; - RC = &X86::GR16RegClass; - } else if (VT == MVT::i32) { - Opc = X86::CMOVE32rr; - RC = &X86::GR32RegClass; - } else if (VT == MVT::i64) { - Opc = X86::CMOVE64rr; - RC = &X86::GR64RegClass; - } else { + // Check if the subtarget supports these instructions. + if (!Subtarget->hasCMov()) return false; + + // FIXME: Add support for i8. + unsigned Opc; + switch (RetVT.SimpleTy) { + default: return false; + case MVT::i16: Opc = X86::CMOVNE16rr; break; + case MVT::i32: Opc = X86::CMOVNE32rr; break; + case MVT::i64: Opc = X86::CMOVNE64rr; break; } - unsigned Op0Reg = getRegForValue(I->getOperand(0)); - if (Op0Reg == 0) return false; - unsigned Op1Reg = getRegForValue(I->getOperand(1)); - if (Op1Reg == 0) return false; - unsigned Op2Reg = getRegForValue(I->getOperand(2)); - if (Op2Reg == 0) return false; - - // Selects operate on i1, however, Op0Reg is 8 bits width and may contain - // garbage. Indeed, only the less significant bit is supposed to be accurate. - // If we read more than the lsb, we may see non-zero values whereas lsb - // is zero. Therefore, we have to truncate Op0Reg to i1 for the select. - // This is achieved by performing TEST against 1. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) - .addReg(Op0Reg).addImm(1); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(Op1Reg).addReg(Op2Reg); + const Value *Cond = I->getOperand(0); + const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); + bool NeedTest = true; + + // Optimize conditons coming from a compare. + if (const auto *CI = dyn_cast<CmpInst>(Cond)) { + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); + + // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. + static unsigned SETFOpcTable[2][3] = { + { X86::SETNPr, X86::SETEr , X86::TEST8rr }, + { X86::SETPr, X86::SETNEr, X86::OR8rr } + }; + unsigned *SETFOpc = nullptr; + switch (Predicate) { + default: break; + case CmpInst::FCMP_OEQ: + SETFOpc = &SETFOpcTable[0][0]; + Predicate = CmpInst::ICMP_NE; + break; + case CmpInst::FCMP_UNE: + SETFOpc = &SETFOpcTable[1][0]; + Predicate = CmpInst::ICMP_NE; + break; + } + + X86::CondCode CC; + bool NeedSwap; + std::tie(CC, NeedSwap) = getX86ConditonCode(Predicate); + assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); + Opc = X86::getCMovFromCond(CC, RC->getSize()); + + const Value *CmpLHS = CI->getOperand(0); + const Value *CmpRHS = CI->getOperand(1); + if (NeedSwap) + std::swap(CmpLHS, CmpRHS); + + EVT CmpVT = TLI.getValueType(CmpLHS->getType()); + // Emit a compare of the LHS and RHS, setting the flags. + if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT)) + return false; + + if (SETFOpc) { + unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); + unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), + FlagReg1); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), + FlagReg2); + auto const &II = TII.get(SETFOpc[2]); + if (II.getNumDefs()) { + unsigned TmpReg = createResultReg(&X86::GR8RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg) + .addReg(FlagReg2).addReg(FlagReg1); + } else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(FlagReg2).addReg(FlagReg1); + } + } + NeedTest = false; + } + + if (NeedTest) { + // Selects operate on i1, however, CondReg is 8 bits width and may contain + // garbage. Indeed, only the less significant bit is supposed to be + // accurate. If we read more than the lsb, we may see non-zero values + // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for + // the select. This is achieved by performing TEST against 1. + unsigned CondReg = getRegForValue(Cond); + if (CondReg == 0) + return false; + bool CondIsKill = hasTrivialKill(Cond); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) + .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1); + } + + const Value *LHS = I->getOperand(1); + const Value *RHS = I->getOperand(2); + + unsigned RHSReg = getRegForValue(RHS); + bool RHSIsKill = hasTrivialKill(RHS); + + unsigned LHSReg = getRegForValue(LHS); + bool LHSIsKill = hasTrivialKill(LHS); + + if (!LHSReg || !RHSReg) + return false; + + unsigned ResultReg = FastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, + LHSReg, LHSIsKill); UpdateValueMap(I, ResultReg); return true; } +bool X86FastISel::X86SelectSelect(const Instruction *I) { + MVT RetVT; + if (!isTypeLegal(I->getType(), RetVT)) + return false; + + // Check if we can fold the select. + if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) { + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); + const Value *Opnd = nullptr; + switch (Predicate) { + default: break; + case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break; + case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break; + } + // No need for a select anymore - this is an unconditional move. + if (Opnd) { + unsigned OpReg = getRegForValue(Opnd); + if (OpReg == 0) + return false; + bool OpIsKill = hasTrivialKill(Opnd); + const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(OpReg, getKillRegState(OpIsKill)); + UpdateValueMap(I, ResultReg); + return true; + } + } + + // First try to use real conditional move instructions. + if (X86FastEmitCMoveSelect(I)) + return true; + + return false; +} + bool X86FastISel::X86SelectFPExt(const Instruction *I) { // fpext from float to double. if (X86ScalarSSEf64 && diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index bfc8e2759d..0797fc6041 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2696,8 +2696,8 @@ unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) { /// getCMovFromCond - Return a cmov opcode for the given condition, /// register size in bytes, and operand type. -static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes, - bool HasMemoryOperand) { +unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes, + bool HasMemoryOperand) { static const uint16_t Opc[32][3] = { { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index d76c52ce47..c177e3a5c7 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -66,6 +66,11 @@ namespace X86 { /// a memory operand. unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false); + /// \brief Return a cmov opcode for the given condition, register size in + /// bytes, and operand type. + unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, + bool HasMemoryOperand = false); + // Turn CMov opcode into condition code. CondCode getCondFromCMovOpc(unsigned Opc); |