[FastISel][X86] Optimize selects when the condition comes from a compare.

Optimize the select instructions sequence to use the EFLAGS directly from a compare when possible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211543 91177308-0d34-0410-b5e6-96231b3b80d8
author: Juergen Ributzka <juergen@apple.com> 2014-06-23 21:55:36 +0000
committer: Juergen Ributzka <juergen@apple.com> 2014-06-23 21:55:36 +0000
commit: 5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3 (patch)
tree: a0cd63f7f0967b2f2ac09a04621e8d63c09e8ca8 /lib/Target/X86
parent: 1f659329b63aa1d1af2b2bfc8b174a8ccdaba2c0 (diff)
download: llvm-5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3.tar.gz
llvm-5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3.tar.bz2
llvm-5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3.tar.xz
3 files changed, 152 insertions, 37 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 6bccd1290f..3c9acba5cb 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -111,6 +111,8 @@ private:
 
   bool X86SelectDivRem(const Instruction *I);
 
+  bool X86FastEmitCMoveSelect(const Instruction *I);
+
   bool X86SelectSelect(const Instruction *I);
 
   bool X86SelectTrunc(const Instruction *I);
@@ -1611,50 +1613,158 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
   return true;
 }
 
-bool X86FastISel::X86SelectSelect(const Instruction *I) {
-  MVT VT;
-  if (!isTypeLegal(I->getType(), VT))
+/// \brief Emit a conditional move instruction (if the are supported) to lower
+/// the select.
+bool X86FastISel::X86FastEmitCMoveSelect(const Instruction *I) {
+  MVT RetVT;
+  if (!isTypeLegal(I->getType(), RetVT))
     return false;
 
-  // We only use cmov here, if we don't have a cmov instruction bail.
-  if (!Subtarget->hasCMov()) return false;
-
-  unsigned Opc = 0;
-  const TargetRegisterClass *RC = nullptr;
-  if (VT == MVT::i16) {
-    Opc = X86::CMOVE16rr;
-    RC = &X86::GR16RegClass;
-  } else if (VT == MVT::i32) {
-    Opc = X86::CMOVE32rr;
-    RC = &X86::GR32RegClass;
-  } else if (VT == MVT::i64) {
-    Opc = X86::CMOVE64rr;
-    RC = &X86::GR64RegClass;
-  } else {
+  // Check if the subtarget supports these instructions.
+  if (!Subtarget->hasCMov())
     return false;
+
+  // FIXME: Add support for i8.
+  unsigned Opc;
+  switch (RetVT.SimpleTy) {
+  default: return false;
+  case MVT::i16: Opc = X86::CMOVNE16rr; break;
+  case MVT::i32: Opc = X86::CMOVNE32rr; break;
+  case MVT::i64: Opc = X86::CMOVNE64rr; break;
   }
 
-  unsigned Op0Reg = getRegForValue(I->getOperand(0));
-  if (Op0Reg == 0) return false;
-  unsigned Op1Reg = getRegForValue(I->getOperand(1));
-  if (Op1Reg == 0) return false;
-  unsigned Op2Reg = getRegForValue(I->getOperand(2));
-  if (Op2Reg == 0) return false;
-
-  // Selects operate on i1, however, Op0Reg is 8 bits width and may contain
-  // garbage. Indeed, only the less significant bit is supposed to be accurate.
-  // If we read more than the lsb, we may see non-zero values whereas lsb
-  // is zero. Therefore, we have to truncate Op0Reg to i1 for the select.
-  // This is achieved by performing TEST against 1.
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
-    .addReg(Op0Reg).addImm(1);
-  unsigned ResultReg = createResultReg(RC);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
-    .addReg(Op1Reg).addReg(Op2Reg);
+  const Value *Cond = I->getOperand(0);
+  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+  bool NeedTest = true;
+
+  // Optimize conditons coming from a compare.
+  if (const auto *CI = dyn_cast<CmpInst>(Cond)) {
+    CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+
+    // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
+    static unsigned SETFOpcTable[2][3] = {
+      { X86::SETNPr, X86::SETEr , X86::TEST8rr },
+      { X86::SETPr,  X86::SETNEr, X86::OR8rr   }
+    };
+    unsigned *SETFOpc = nullptr;
+    switch (Predicate) {
+    default: break;
+    case CmpInst::FCMP_OEQ:
+      SETFOpc = &SETFOpcTable[0][0];
+      Predicate = CmpInst::ICMP_NE;
+      break;
+    case CmpInst::FCMP_UNE:
+      SETFOpc = &SETFOpcTable[1][0];
+      Predicate = CmpInst::ICMP_NE;
+      break;
+    }
+
+    X86::CondCode CC;
+    bool NeedSwap;
+    std::tie(CC, NeedSwap) = getX86ConditonCode(Predicate);
+    assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+    Opc = X86::getCMovFromCond(CC, RC->getSize());
+
+    const Value *CmpLHS = CI->getOperand(0);
+    const Value *CmpRHS = CI->getOperand(1);
+    if (NeedSwap)
+      std::swap(CmpLHS, CmpRHS);
+
+    EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+    // Emit a compare of the LHS and RHS, setting the flags.
+    if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
+     return false;
+
+    if (SETFOpc) {
+      unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
+      unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
+              FlagReg1);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
+              FlagReg2);
+      auto const &II = TII.get(SETFOpc[2]);
+      if (II.getNumDefs()) {
+        unsigned TmpReg = createResultReg(&X86::GR8RegClass);
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
+          .addReg(FlagReg2).addReg(FlagReg1);
+      } else {
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+          .addReg(FlagReg2).addReg(FlagReg1);
+      }
+    }
+    NeedTest = false;
+  }
+
+  if (NeedTest) {
+    // Selects operate on i1, however, CondReg is 8 bits width and may contain
+    // garbage. Indeed, only the less significant bit is supposed to be
+    // accurate. If we read more than the lsb, we may see non-zero values
+    // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
+    // the select. This is achieved by performing TEST against 1.
+    unsigned CondReg = getRegForValue(Cond);
+    if (CondReg == 0)
+      return false;
+    bool CondIsKill = hasTrivialKill(Cond);
+
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
+      .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
+  }
+
+  const Value *LHS = I->getOperand(1);
+  const Value *RHS = I->getOperand(2);
+
+  unsigned RHSReg = getRegForValue(RHS);
+  bool RHSIsKill = hasTrivialKill(RHS);
+
+  unsigned LHSReg = getRegForValue(LHS);
+  bool LHSIsKill = hasTrivialKill(LHS);
+
+  if (!LHSReg || !RHSReg)
+    return false;
+
+  unsigned ResultReg = FastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
+                                       LHSReg, LHSIsKill);
   UpdateValueMap(I, ResultReg);
   return true;
 }
 
+bool X86FastISel::X86SelectSelect(const Instruction *I) {
+  MVT RetVT;
+  if (!isTypeLegal(I->getType(), RetVT))
+    return false;
+
+  // Check if we can fold the select.
+  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
+    CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+    const Value *Opnd = nullptr;
+    switch (Predicate) {
+    default:                              break;
+    case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
+    case CmpInst::FCMP_TRUE:  Opnd = I->getOperand(1); break;
+    }
+    // No need for a select anymore - this is an unconditional move.
+    if (Opnd) {
+      unsigned OpReg = getRegForValue(Opnd);
+      if (OpReg == 0)
+        return false;
+      bool OpIsKill = hasTrivialKill(Opnd);
+      const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+      unsigned ResultReg = createResultReg(RC);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(TargetOpcode::COPY), ResultReg)
+        .addReg(OpReg, getKillRegState(OpIsKill));
+      UpdateValueMap(I, ResultReg);
+      return true;
+    }
+  }
+
+  // First try to use real conditional move instructions.
+  if (X86FastEmitCMoveSelect(I))
+    return true;
+
+  return false;
+}
+
 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
   // fpext from float to double.
   if (X86ScalarSSEf64 &&
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index bfc8e2759d..0797fc6041 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2696,8 +2696,8 @@ unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {
 
 /// getCMovFromCond - Return a cmov opcode for the given condition,
 /// register size in bytes, and operand type.
-static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes,
-                                bool HasMemoryOperand) {
+unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes,
+                              bool HasMemoryOperand) {
   static const uint16_t Opc[32][3] = {
     { X86::CMOVA16rr,  X86::CMOVA32rr,  X86::CMOVA64rr  },
     { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index d76c52ce47..c177e3a5c7 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -66,6 +66,11 @@ namespace X86 {
   /// a memory operand.
   unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false);
 
+  /// \brief Return a cmov opcode for the given condition, register size in
+  /// bytes, and operand type.
+  unsigned getCMovFromCond(CondCode CC, unsigned RegBytes,
+                           bool HasMemoryOperand = false);
+
   // Turn CMov opcode into condition code.
   CondCode getCondFromCMovOpc(unsigned Opc);
author	Juergen Ributzka <juergen@apple.com>	2014-06-23 21:55:36 +0000
committer	Juergen Ributzka <juergen@apple.com>	2014-06-23 21:55:36 +0000
commit	5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3 (patch)
tree	a0cd63f7f0967b2f2ac09a04621e8d63c09e8ca8 /lib/Target/X86
parent	1f659329b63aa1d1af2b2bfc8b174a8ccdaba2c0 (diff)
download	llvm-5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3.tar.gz llvm-5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3.tar.bz2 llvm-5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3.tar.xz