summaryrefslogtreecommitdiff
path: root/lib/Target/X86
diff options
context:
space:
mode:
authorJuergen Ributzka <juergen@apple.com>2014-06-23 21:55:36 +0000
committerJuergen Ributzka <juergen@apple.com>2014-06-23 21:55:36 +0000
commit5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3 (patch)
treea0cd63f7f0967b2f2ac09a04621e8d63c09e8ca8 /lib/Target/X86
parent1f659329b63aa1d1af2b2bfc8b174a8ccdaba2c0 (diff)
downloadllvm-5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3.tar.gz
llvm-5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3.tar.bz2
llvm-5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3.tar.xz
[FastISel][X86] Optimize selects when the condition comes from a compare.
Optimize the select instructions sequence to use the EFLAGS directly from a compare when possible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211543 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/X86FastISel.cpp180
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp4
-rw-r--r--lib/Target/X86/X86InstrInfo.h5
3 files changed, 152 insertions, 37 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 6bccd1290f..3c9acba5cb 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -111,6 +111,8 @@ private:
bool X86SelectDivRem(const Instruction *I);
+ bool X86FastEmitCMoveSelect(const Instruction *I);
+
bool X86SelectSelect(const Instruction *I);
bool X86SelectTrunc(const Instruction *I);
@@ -1611,50 +1613,158 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
return true;
}
-bool X86FastISel::X86SelectSelect(const Instruction *I) {
- MVT VT;
- if (!isTypeLegal(I->getType(), VT))
+/// \brief Emit a conditional move instruction (if the are supported) to lower
+/// the select.
+bool X86FastISel::X86FastEmitCMoveSelect(const Instruction *I) {
+ MVT RetVT;
+ if (!isTypeLegal(I->getType(), RetVT))
return false;
- // We only use cmov here, if we don't have a cmov instruction bail.
- if (!Subtarget->hasCMov()) return false;
-
- unsigned Opc = 0;
- const TargetRegisterClass *RC = nullptr;
- if (VT == MVT::i16) {
- Opc = X86::CMOVE16rr;
- RC = &X86::GR16RegClass;
- } else if (VT == MVT::i32) {
- Opc = X86::CMOVE32rr;
- RC = &X86::GR32RegClass;
- } else if (VT == MVT::i64) {
- Opc = X86::CMOVE64rr;
- RC = &X86::GR64RegClass;
- } else {
+ // Check if the subtarget supports these instructions.
+ if (!Subtarget->hasCMov())
return false;
+
+ // FIXME: Add support for i8.
+ unsigned Opc;
+ switch (RetVT.SimpleTy) {
+ default: return false;
+ case MVT::i16: Opc = X86::CMOVNE16rr; break;
+ case MVT::i32: Opc = X86::CMOVNE32rr; break;
+ case MVT::i64: Opc = X86::CMOVNE64rr; break;
}
- unsigned Op0Reg = getRegForValue(I->getOperand(0));
- if (Op0Reg == 0) return false;
- unsigned Op1Reg = getRegForValue(I->getOperand(1));
- if (Op1Reg == 0) return false;
- unsigned Op2Reg = getRegForValue(I->getOperand(2));
- if (Op2Reg == 0) return false;
-
- // Selects operate on i1, however, Op0Reg is 8 bits width and may contain
- // garbage. Indeed, only the less significant bit is supposed to be accurate.
- // If we read more than the lsb, we may see non-zero values whereas lsb
- // is zero. Therefore, we have to truncate Op0Reg to i1 for the select.
- // This is achieved by performing TEST against 1.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
- .addReg(Op0Reg).addImm(1);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(Op1Reg).addReg(Op2Reg);
+ const Value *Cond = I->getOperand(0);
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+ bool NeedTest = true;
+
+ // Optimize conditons coming from a compare.
+ if (const auto *CI = dyn_cast<CmpInst>(Cond)) {
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+
+ // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
+ static unsigned SETFOpcTable[2][3] = {
+ { X86::SETNPr, X86::SETEr , X86::TEST8rr },
+ { X86::SETPr, X86::SETNEr, X86::OR8rr }
+ };
+ unsigned *SETFOpc = nullptr;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_OEQ:
+ SETFOpc = &SETFOpcTable[0][0];
+ Predicate = CmpInst::ICMP_NE;
+ break;
+ case CmpInst::FCMP_UNE:
+ SETFOpc = &SETFOpcTable[1][0];
+ Predicate = CmpInst::ICMP_NE;
+ break;
+ }
+
+ X86::CondCode CC;
+ bool NeedSwap;
+ std::tie(CC, NeedSwap) = getX86ConditonCode(Predicate);
+ assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+ Opc = X86::getCMovFromCond(CC, RC->getSize());
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+ if (NeedSwap)
+ std::swap(CmpLHS, CmpRHS);
+
+ EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+ // Emit a compare of the LHS and RHS, setting the flags.
+ if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
+ return false;
+
+ if (SETFOpc) {
+ unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
+ unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
+ FlagReg1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
+ FlagReg2);
+ auto const &II = TII.get(SETFOpc[2]);
+ if (II.getNumDefs()) {
+ unsigned TmpReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
+ .addReg(FlagReg2).addReg(FlagReg1);
+ } else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(FlagReg2).addReg(FlagReg1);
+ }
+ }
+ NeedTest = false;
+ }
+
+ if (NeedTest) {
+ // Selects operate on i1, however, CondReg is 8 bits width and may contain
+ // garbage. Indeed, only the less significant bit is supposed to be
+ // accurate. If we read more than the lsb, we may see non-zero values
+ // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
+ // the select. This is achieved by performing TEST against 1.
+ unsigned CondReg = getRegForValue(Cond);
+ if (CondReg == 0)
+ return false;
+ bool CondIsKill = hasTrivialKill(Cond);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
+ .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
+ }
+
+ const Value *LHS = I->getOperand(1);
+ const Value *RHS = I->getOperand(2);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ if (!LHSReg || !RHSReg)
+ return false;
+
+ unsigned ResultReg = FastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
+ LHSReg, LHSIsKill);
UpdateValueMap(I, ResultReg);
return true;
}
+bool X86FastISel::X86SelectSelect(const Instruction *I) {
+ MVT RetVT;
+ if (!isTypeLegal(I->getType(), RetVT))
+ return false;
+
+ // Check if we can fold the select.
+ if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+ const Value *Opnd = nullptr;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
+ case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
+ }
+ // No need for a select anymore - this is an unconditional move.
+ if (Opnd) {
+ unsigned OpReg = getRegForValue(Opnd);
+ if (OpReg == 0)
+ return false;
+ bool OpIsKill = hasTrivialKill(Opnd);
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(OpReg, getKillRegState(OpIsKill));
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ // First try to use real conditional move instructions.
+ if (X86FastEmitCMoveSelect(I))
+ return true;
+
+ return false;
+}
+
bool X86FastISel::X86SelectFPExt(const Instruction *I) {
// fpext from float to double.
if (X86ScalarSSEf64 &&
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index bfc8e2759d..0797fc6041 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2696,8 +2696,8 @@ unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {
/// getCMovFromCond - Return a cmov opcode for the given condition,
/// register size in bytes, and operand type.
-static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes,
- bool HasMemoryOperand) {
+unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes,
+ bool HasMemoryOperand) {
static const uint16_t Opc[32][3] = {
{ X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
{ X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index d76c52ce47..c177e3a5c7 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -66,6 +66,11 @@ namespace X86 {
/// a memory operand.
unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false);
+ /// \brief Return a cmov opcode for the given condition, register size in
+ /// bytes, and operand type.
+ unsigned getCMovFromCond(CondCode CC, unsigned RegBytes,
+ bool HasMemoryOperand = false);
+
// Turn CMov opcode into condition code.
CondCode getCondFromCMovOpc(unsigned Opc);