X86: peephole optimization to remove cmp instruction

This patch will optimize the following: sub r1, r3 cmp r3, r1 or cmp r1, r3 bge L1 TO sub r1, r3 bge L1 or ble L1 If the branch instruction can use flag from "sub", then we can eliminate the "cmp" instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157831 91177308-0d34-0410-b5e6-96231b3b80d8
author: Manman Ren <mren@apple.com> 2012-06-01 19:49:33 +0000
committer: Manman Ren <mren@apple.com> 2012-06-01 19:49:33 +0000
commit: 73c2f7f5ed767a6fc062fd198551be902b7b7d5b (patch)
tree: 0e7ddc57a166cd5f076eac14c404412061d88d0f
parent: 68f25571e759c1fcf2da206109647259f49f7416 (diff)
download: llvm-73c2f7f5ed767a6fc062fd198551be902b7b7d5b.tar.gz
llvm-73c2f7f5ed767a6fc062fd198551be902b7b7d5b.tar.bz2
llvm-73c2f7f5ed767a6fc062fd198551be902b7b7d5b.tar.xz
4 files changed, 350 insertions, 0 deletions
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 0eee083393..c2b59b40ef 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1143,7 +1143,9 @@ let Uses = [EFLAGS] in {
                             0, 0>;
 }
 
+let isCompare = 1 in {
 defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
+}
 
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index ad7521cf5f..bd0cb1187a 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
 #include <limits>
 
 #define GET_INSTRINFO_CTOR
@@ -40,6 +41,8 @@
 
 using namespace llvm;
 
+STATISTIC(NumCmpsRemoved, "Number of Cmps removed due to an earlier Sub");
+
 static cl::opt<bool>
 NoFusing("disable-spill-fusing",
          cl::desc("Disable fusing of spill code into instructions"));
@@ -2728,6 +2731,326 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
 }
 
 bool X86InstrInfo::
+AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
+               int &CmpValue) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case X86::CMP64ri32:
+  case X86::CMP64ri8:
+  case X86::CMP32ri:
+  case X86::CMP32ri8:
+  case X86::CMP16ri:
+  case X86::CMP16ri8:
+  case X86::CMP8ri:
+    SrcReg = MI->getOperand(0).getReg();
+    CmpMask = ~0;
+    CmpValue = MI->getOperand(1).getImm();
+    return true;
+  case X86::CMP64rr:
+  case X86::CMP32rr:
+  case X86::CMP16rr:
+  case X86::CMP8rr:
+    SrcReg = MI->getOperand(0).getReg();
+    CmpMask = ~0;
+    CmpValue = 0;
+    return true;
+  }
+
+  return false;
+}
+
+// This function updates condition code for SET Opcodes.
+// The input condition code can be E,NE,L,LE,G,GE,B,BE,A,AE and
+// the updated condition code will be E,NE,G,GE,L,LE,A,AE,B,BE.
+// This is to convert from a > b to b < a, a >= b to b <= a etc.
+static unsigned UpdateSETCondToOptimizeCmp(unsigned SETOpc) {
+  switch (SETOpc) {
+  default: return 0;
+  case X86::SETEr:  return X86::SETEr;
+  case X86::SETEm:  return X86::SETEm;
+  case X86::SETNEr: return X86::SETNEr;
+  case X86::SETNEm: return X86::SETNEm;
+  case X86::SETLr:  return X86::SETGr;
+  case X86::SETLm:  return X86::SETGm;
+  case X86::SETLEr: return X86::SETGEr;
+  case X86::SETLEm: return X86::SETGEm;
+  case X86::SETGr:  return X86::SETLr;
+  case X86::SETGm:  return X86::SETLm;
+  case X86::SETGEr: return X86::SETLEr;
+  case X86::SETGEm: return X86::SETLEm;
+  case X86::SETBr:  return X86::SETAr;
+  case X86::SETBm:  return X86::SETAm;
+  case X86::SETBEr: return X86::SETAEr;
+  case X86::SETBEm: return X86::SETAEm;
+  case X86::SETAr:  return X86::SETBr;
+  case X86::SETAm:  return X86::SETBm;
+  case X86::SETAEr: return X86::SETBEr;
+  case X86::SETAEm: return X86::SETBEm;
+  }
+}
+
+// This function updates condition code for Branch Opcodes.
+// The input condition code can be E,NE,L,LE,G,GE,B,BE,A,AE and
+// the updated condition code will be E,NE,G,GE,L,LE,A,AE,B,BE.
+// This is to convert from a > b to b < a, a >= b to b <= a etc.
+static unsigned UpdateBranchCondToOptimizeCmp(unsigned BranchOpc) {
+  switch (BranchOpc) {
+  default: return 0;
+  case X86::JE_4:  return X86::JE_4;
+  case X86::JNE_4: return X86::JNE_4;
+  case X86::JL_4:  return X86::JG_4;
+  case X86::JLE_4: return X86::JGE_4;
+  case X86::JG_4:  return X86::JL_4;
+  case X86::JGE_4: return X86::JLE_4;
+  case X86::JB_4:  return X86::JA_4;
+  case X86::JBE_4: return X86::JAE_4;
+  case X86::JA_4:  return X86::JB_4;
+  case X86::JAE_4: return X86::JBE_4;
+  }
+}
+
+// This function updates condition code for CMOV Opcodes.
+// The input condition code can be E,NE,L,LE,G,GE,B,BE,A,AE and
+// the updated condition code will be E,NE,G,GE,L,LE,A,AE,B,BE.
+// This is to convert from a > b to b < a, a >= b to b <= a etc.
+static unsigned UpdateCMovCondToOptimizeCmp(unsigned CMovOpc) {
+  switch (CMovOpc) {
+  default: return 0;
+  case X86::CMOVE16rm:  return X86::CMOVE16rm;
+  case X86::CMOVE16rr:  return X86::CMOVE16rr;
+  case X86::CMOVE32rm:  return X86::CMOVE32rm;
+  case X86::CMOVE32rr:  return X86::CMOVE32rr;
+  case X86::CMOVE64rm:  return X86::CMOVE64rm;
+  case X86::CMOVE64rr:  return X86::CMOVE64rr;
+  case X86::CMOVNE16rm: return X86::CMOVNE16rm;
+  case X86::CMOVNE16rr: return X86::CMOVNE16rr;
+  case X86::CMOVNE32rm: return X86::CMOVNE32rm;
+  case X86::CMOVNE32rr: return X86::CMOVNE32rr;
+  case X86::CMOVNE64rm: return X86::CMOVNE64rm;
+  case X86::CMOVNE64rr: return X86::CMOVNE64rr;
+
+  case X86::CMOVL16rm:  return X86::CMOVG16rm;
+  case X86::CMOVL16rr:  return X86::CMOVG16rr;
+  case X86::CMOVL32rm:  return X86::CMOVG32rm;
+  case X86::CMOVL32rr:  return X86::CMOVG32rr;
+  case X86::CMOVL64rm:  return X86::CMOVG64rm;
+  case X86::CMOVL64rr:  return X86::CMOVG64rr;
+  case X86::CMOVLE16rm: return X86::CMOVGE16rm;
+  case X86::CMOVLE16rr: return X86::CMOVGE16rr;
+  case X86::CMOVLE32rm: return X86::CMOVGE32rm;
+  case X86::CMOVLE32rr: return X86::CMOVGE32rr;
+  case X86::CMOVLE64rm: return X86::CMOVGE64rm;
+  case X86::CMOVLE64rr: return X86::CMOVGE64rr;
+
+  case X86::CMOVG16rm:  return X86::CMOVL16rm;
+  case X86::CMOVG16rr:  return X86::CMOVL16rr;
+  case X86::CMOVG32rm:  return X86::CMOVL32rm;
+  case X86::CMOVG32rr:  return X86::CMOVL32rr;
+  case X86::CMOVG64rm:  return X86::CMOVL64rm;
+  case X86::CMOVG64rr:  return X86::CMOVL64rr;
+  case X86::CMOVGE16rm: return X86::CMOVLE16rm;
+  case X86::CMOVGE16rr: return X86::CMOVLE16rr;
+  case X86::CMOVGE32rm: return X86::CMOVLE32rm;
+  case X86::CMOVGE32rr: return X86::CMOVLE32rr;
+  case X86::CMOVGE64rm: return X86::CMOVLE64rm;
+  case X86::CMOVGE64rr: return X86::CMOVLE64rr;
+
+  case X86::CMOVB16rm:  return X86::CMOVA16rm;
+  case X86::CMOVB16rr:  return X86::CMOVA16rr;
+  case X86::CMOVB32rm:  return X86::CMOVA32rm;
+  case X86::CMOVB32rr:  return X86::CMOVA32rr;
+  case X86::CMOVB64rm:  return X86::CMOVA64rm;
+  case X86::CMOVB64rr:  return X86::CMOVA64rr;
+  case X86::CMOVBE16rm: return X86::CMOVAE16rm;
+  case X86::CMOVBE16rr: return X86::CMOVAE16rr;
+  case X86::CMOVBE32rm: return X86::CMOVAE32rm;
+  case X86::CMOVBE32rr: return X86::CMOVAE32rr;
+  case X86::CMOVBE64rm: return X86::CMOVAE64rm;
+  case X86::CMOVBE64rr: return X86::CMOVAE64rr;
+
+  case X86::CMOVA16rm:  return X86::CMOVB16rm;
+  case X86::CMOVA16rr:  return X86::CMOVB16rr;
+  case X86::CMOVA32rm:  return X86::CMOVB32rm;
+  case X86::CMOVA32rr:  return X86::CMOVB32rr;
+  case X86::CMOVA64rm:  return X86::CMOVB64rm;
+  case X86::CMOVA64rr:  return X86::CMOVB64rr;
+  case X86::CMOVAE16rm: return X86::CMOVBE16rm;
+  case X86::CMOVAE16rr: return X86::CMOVBE16rr;
+  case X86::CMOVAE32rm: return X86::CMOVBE32rm;
+  case X86::CMOVAE32rr: return X86::CMOVBE32rr;
+  case X86::CMOVAE64rm: return X86::CMOVBE64rm;
+  case X86::CMOVAE64rr: return X86::CMOVBE64rr;
+  }
+}
+
+bool X86InstrInfo::
+OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
+                     int CmpValue, const MachineRegisterInfo *MRI) const {
+  MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
+  if (llvm::next(DI) != MRI->def_end())
+    // Only support one definition.
+    return false;
+
+  MachineInstr *MI = &*DI;
+  // Get ready to iterate backward from CmpInstr.
+  MachineBasicBlock::iterator I = CmpInstr, E = MI,
+                              B = CmpInstr->getParent()->begin();
+
+  // Early exit if CmpInstr is at the beginning of the BB.
+  if (I == B) return false;
+
+  // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
+  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
+  MachineInstr *Sub = NULL;
+  unsigned SrcReg2 = 0;
+  if (CmpInstr->getOpcode() == X86::CMP64rr ||
+      CmpInstr->getOpcode() == X86::CMP32rr ||
+      CmpInstr->getOpcode() == X86::CMP16rr ||
+      CmpInstr->getOpcode() == X86::CMP8rr) {
+    SrcReg2 = CmpInstr->getOperand(1).getReg();
+  }
+
+  // Search backwards for a SUB instruction.
+  // If EFLAGS is updated in the middle, we can not remove the CMP instruction.
+  --I;
+  for (; I != E; --I) {
+    const MachineInstr &Instr = *I;
+
+    // Check whether the current instruction is SUB(r1, r2) or SUB(r2, r1).
+    if (((CmpInstr->getOpcode() == X86::CMP64rr &&
+          Instr.getOpcode() == X86::SUB64rr) ||
+         (CmpInstr->getOpcode() == X86::CMP32rr &&
+          Instr.getOpcode() == X86::SUB32rr)||
+         (CmpInstr->getOpcode() == X86::CMP16rr &&
+          Instr.getOpcode() == X86::SUB16rr)||
+         (CmpInstr->getOpcode() == X86::CMP8rr &&
+          Instr.getOpcode() == X86::SUB8rr)) &&
+        ((Instr.getOperand(1).getReg() == SrcReg &&
+          Instr.getOperand(2).getReg() == SrcReg2) ||
+         (Instr.getOperand(1).getReg() == SrcReg2 &&
+          Instr.getOperand(2).getReg() == SrcReg))) {
+      Sub = &*I;
+      break;
+    }
+
+    // Check whether the current instruction is SUBri(r1, CmpValue).
+    if (((CmpInstr->getOpcode() == X86::CMP64ri32 &&
+          Instr.getOpcode() == X86::SUB64ri32) ||
+         (CmpInstr->getOpcode() == X86::CMP64ri8 &&
+          Instr.getOpcode() == X86::SUB64ri8) ||
+         (CmpInstr->getOpcode() == X86::CMP32ri &&
+          Instr.getOpcode() == X86::SUB32ri) ||
+         (CmpInstr->getOpcode() == X86::CMP32ri8 &&
+          Instr.getOpcode() == X86::SUB32ri8) ||
+         (CmpInstr->getOpcode() == X86::CMP16ri &&
+          Instr.getOpcode() == X86::SUB16ri) ||
+         (CmpInstr->getOpcode() == X86::CMP16ri8 &&
+          Instr.getOpcode() == X86::SUB16ri8) ||
+         (CmpInstr->getOpcode() == X86::CMP8ri &&
+          Instr.getOpcode() == X86::SUB8ri)) &&
+        CmpValue != 0 &&
+        Instr.getOperand(1).getReg() == SrcReg &&
+        Instr.getOperand(2).getImm() == CmpValue) {
+      Sub = &*I;
+      break;
+    }
+
+    for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
+      const MachineOperand &MO = Instr.getOperand(IO);
+      if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
+        return false;
+      if (!MO.isReg()) continue;
+
+      // This instruction modifies or uses EFLAGS before we find a SUB.
+      // We can't do this transformation.
+      if (MO.getReg() == X86::EFLAGS)
+        return false;
+    }
+
+    if (I == B)
+      // Reaching beginning of the block.
+      return false;
+  }
+
+  // Return false if no candidates exist.
+  if (!Sub)
+    return false;
+  MI = Sub;
+
+  switch (MI->getOpcode()) {
+  default: break;
+  case X86::SUB64rr:
+  case X86::SUB32rr:
+  case X86::SUB16rr:
+  case X86::SUB8rr:
+  case X86::SUB64ri32:
+  case X86::SUB64ri8:
+  case X86::SUB32ri:
+  case X86::SUB32ri8:
+  case X86::SUB16ri:
+  case X86::SUB16ri8:
+  case X86::SUB8ri: {
+    // Scan forward from CmpInstr for the use of EFLAGS.
+    // Handle the condition codes GE, L, G, LE, B, L, BE, LE.
+    SmallVector<std::pair<MachineInstr*, unsigned /*NewOpc*/>, 4> OpsToUpdate;
+    bool isSafe = false;
+    I = CmpInstr;
+    E = CmpInstr->getParent()->end();
+    while (!isSafe && ++I != E) {
+      const MachineInstr &Instr = *I;
+      for (unsigned IO = 0, EO = Instr.getNumOperands();
+           !isSafe && IO != EO; ++IO) {
+        const MachineOperand &MO = Instr.getOperand(IO);
+        if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS)) {
+          isSafe = true;
+          break;
+        }
+        if (!MO.isReg() || MO.getReg() != X86::EFLAGS)
+          continue;
+        // EFLAGS is redefined by this instruction.
+        if (MO.isDef()) {
+          isSafe = true;
+          break;
+        }
+        // EFLAGS is used by this instruction.
+
+        // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
+        // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+        unsigned NewOpc = UpdateBranchCondToOptimizeCmp(Instr.getOpcode());
+        if (!NewOpc) NewOpc = UpdateSETCondToOptimizeCmp(Instr.getOpcode());
+        if (!NewOpc) NewOpc = UpdateCMovCondToOptimizeCmp(Instr.getOpcode());
+        if (!NewOpc) return false;
+
+        // Push the MachineInstr to OpsToUpdate.
+        // If it is safe to remove CmpInstr, the condition code of these
+        // instructions will be modified.
+        if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+            Sub->getOperand(2).getReg() == SrcReg)
+          OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
+      }
+    }
+
+    // We may exit the loop at end of the basic block.
+    // In that case, it is still safe to remove CmpInstr.
+
+    // Make sure Sub instruction defines EFLAGS.
+    assert(MI->getOperand(3).getReg() == X86::EFLAGS &&
+           "Expected EFLAGS is the 4th operand of SUBrr or SUBri.");
+    MI->getOperand(3).setIsDef(true);
+    CmpInstr->eraseFromParent();
+
+    // Modify the condition code of instructions in OpsToUpdate.
+    for (unsigned i = 0; i < OpsToUpdate.size(); i++)
+      OpsToUpdate[i].first->setDesc(get(OpsToUpdate[i].second));
+    NumCmpsRemoved++;
+    return true;
+  }
+  }
+
+  return false;
+}
+
+bool X86InstrInfo::
 OptimizeSubInstr(MachineInstr *SubInstr, const MachineRegisterInfo *MRI) const {
   // If destination is a memory operand, do not perform this optimization.
   if ((SubInstr->getOpcode() != X86::SUB64rr) &&
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 088c49f629..8289d437b7 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -367,6 +367,12 @@ public:
   virtual bool OptimizeSubInstr(MachineInstr *SubInstr,
                                 const MachineRegisterInfo *MRI) const;
 
+  virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+                              int &CmpMask, int &CmpValue) const;
+  virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+                                    int CmpMask, int CmpValue,
+                                    const MachineRegisterInfo *MRI) const;
+
 private:
   MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
                                               MachineFunction::iterator &MFI,
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index cf5408e071..c1fad43fe5 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -83,6 +83,25 @@ entry:
   %cond = select i1 %cmp, i32 %sub, i32 0
   ret i32 %cond
 }
+; redundant cmp instruction
+define i32 @l(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: l:
+; CHECK-NOT: cmp
+  %cmp = icmp slt i32 %b, %a
+  %sub = sub nsw i32 %a, %b
+  %cond = select i1 %cmp, i32 %sub, i32 %a
+  ret i32 %cond
+}
+define i32 @m(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: m:
+; CHECK-NOT: cmp
+  %cmp = icmp sgt i32 %a, %b
+  %sub = sub nsw i32 %a, %b
+  %cond = select i1 %cmp, i32 %b, i32 %sub
+  ret i32 %cond
+}
 ; rdar://11540023
 define i64 @n(i64 %x, i64 %y) nounwind {
 entry:
author	Manman Ren <mren@apple.com>	2012-06-01 19:49:33 +0000
committer	Manman Ren <mren@apple.com>	2012-06-01 19:49:33 +0000
commit	73c2f7f5ed767a6fc062fd198551be902b7b7d5b (patch)
tree	0e7ddc57a166cd5f076eac14c404412061d88d0f
parent	68f25571e759c1fcf2da206109647259f49f7416 (diff)
download	llvm-73c2f7f5ed767a6fc062fd198551be902b7b7d5b.tar.gz llvm-73c2f7f5ed767a6fc062fd198551be902b7b7d5b.tar.bz2 llvm-73c2f7f5ed767a6fc062fd198551be902b7b7d5b.tar.xz