X86: elide comparisons after cmpxchg instructions.

The C++ and C semantics of the compare_and_swap operations actually require us to return a boolean "success" value. In LLVM terms this means a second comparison of the output of "cmpxchg" against the input desired value. However, x86's "cmpxchg" instruction sets all flags for the comparison formed, so we can skip any secondary comparison. (N.b. this isn't true for cmpxchg8b/16b, which only set ZF). rdar://problem/13201607 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210523 91177308-0d34-0410-b5e6-96231b3b80d8
author: Tim Northover <tnorthover@apple.com> 2014-06-10 10:49:07 +0000
committer: Tim Northover <tnorthover@apple.com> 2014-06-10 10:49:07 +0000
commit: 984ee6544580453320597d496f2a67adec8d1e93 (patch)
tree: f453f399972a362f1a91ffecd2357eff2cbcd39f
parent: 46b3076cd0b1e57e16cfe4ecaa517eb78863a92a (diff)
download: llvm-984ee6544580453320597d496f2a67adec8d1e93.tar.gz
llvm-984ee6544580453320597d496f2a67adec8d1e93.tar.bz2
llvm-984ee6544580453320597d496f2a67adec8d1e93.tar.xz
3 files changed, 188 insertions, 57 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ecf718f639..171f832a44 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3504,6 +3504,24 @@ static bool isX86CCUnsigned(unsigned X86CC) {
   llvm_unreachable("covered switch fell through?!");
 }
 
+/// Convert an integer condition code to an x86 one in the most straightforward
+/// way possible, with no optimisation attempt.
+static unsigned getSimpleX86IntCC(ISD::CondCode SetCCOpcode) {
+  switch (SetCCOpcode) {
+  default: llvm_unreachable("Invalid integer condition!");
+  case ISD::SETEQ:  return X86::COND_E;
+  case ISD::SETGT:  return X86::COND_G;
+  case ISD::SETGE:  return X86::COND_GE;
+  case ISD::SETLT:  return X86::COND_L;
+  case ISD::SETLE:  return X86::COND_LE;
+  case ISD::SETNE:  return X86::COND_NE;
+  case ISD::SETULT: return X86::COND_B;
+  case ISD::SETUGT: return X86::COND_A;
+  case ISD::SETULE: return X86::COND_BE;
+  case ISD::SETUGE: return X86::COND_AE;
+  }
+}
+
 /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
 /// specific condition code, returning the condition code and the LHS/RHS of the
 /// comparison to make.
@@ -3527,19 +3545,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
       }
     }
 
-    switch (SetCCOpcode) {
-    default: llvm_unreachable("Invalid integer condition!");
-    case ISD::SETEQ:  return X86::COND_E;
-    case ISD::SETGT:  return X86::COND_G;
-    case ISD::SETGE:  return X86::COND_GE;
-    case ISD::SETLT:  return X86::COND_L;
-    case ISD::SETLE:  return X86::COND_LE;
-    case ISD::SETNE:  return X86::COND_NE;
-    case ISD::SETULT: return X86::COND_B;
-    case ISD::SETUGT: return X86::COND_A;
-    case ISD::SETULE: return X86::COND_BE;
-    case ISD::SETUGE: return X86::COND_AE;
-    }
+    return getSimpleX86IntCC(SetCCOpcode);
   }
 
   // First determine if it is required or is profitable to flip the operands.
@@ -10374,10 +10380,77 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
   return SDValue(New.getNode(), 1);
 }
 
-/// Emit nodes that will be selected as "cmp Op0,Op1", or something
-/// equivalent.
-SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
-                                   SDLoc dl, SelectionDAG &DAG) const {
+static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
+                             SelectionDAG &DAG) {
+  MVT T = Op.getSimpleValueType();
+  SDLoc DL(Op);
+  unsigned Reg = 0;
+  unsigned size = 0;
+  switch(T.SimpleTy) {
+  default: llvm_unreachable("Invalid value type!");
+  case MVT::i8:  Reg = X86::AL;  size = 1; break;
+  case MVT::i16: Reg = X86::AX;  size = 2; break;
+  case MVT::i32: Reg = X86::EAX; size = 4; break;
+  case MVT::i64:
+    assert(Subtarget->is64Bit() && "Node not type legal!");
+    Reg = X86::RAX; size = 8;
+    break;
+  }
+  SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
+                                    Op.getOperand(2), SDValue());
+  SDValue Ops[] = { cpIn.getValue(0),
+                    Op.getOperand(1),
+                    Op.getOperand(3),
+                    DAG.getTargetConstant(size, MVT::i8),
+                    cpIn.getValue(1) };
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
+  SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
+                                           Ops, T, MMO);
+  SDValue cpOut =
+    DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
+  return cpOut;
+}
+
+
+/// Emit nodes that will be selected as "cmp Op0,Op1", or something equivalent
+/// and set X86CC to the needed condition code to make use of the
+/// comparison. This may not be the obvious equivalent to CC if the comparison
+/// can be achieved more efficiently using a different method.
+SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, ISD::CondCode CC,
+                                   SDLoc dl, SelectionDAG &DAG,
+                                   unsigned &X86CC) const {
+   // A cmpxchg instruction will actually perform the comparison
+  if ((Op0.getOpcode() == ISD::ATOMIC_CMP_SWAP && Op0.getOperand(2) == Op1) ||
+      (Op1.getOpcode() == ISD::ATOMIC_CMP_SWAP && Op1.getOperand(2) == Op0)) {
+    SDValue CmpSwap;
+    if (Op0.getOpcode() == ISD::ATOMIC_CMP_SWAP) {
+      // x86's cmpxchg instruction performs the equivalent of "cmp desired,
+      // loaded".
+      CmpSwap = Op0;
+      CC = getSetCCSwappedOperands(CC);
+    } else
+      CmpSwap = Op1;
+
+    // Lowering the CmpSwap node gives us a getCopyFromReg SDValue representing
+    // the value loaded. I.e. the ATOMIC_CMP_SWAP
+    X86CC = getSimpleX86IntCC(CC);
+    SDValue EFLAGS = LowerCMP_SWAP(CmpSwap, Subtarget, DAG);
+    DAG.ReplaceAllUsesOfValueWith(CmpSwap.getValue(0), EFLAGS);
+
+    // Glue on a copy from EFLAGS to be used in place of the required Cmp.
+    EFLAGS = DAG.getCopyFromReg(EFLAGS.getValue(1), dl, X86::EFLAGS,
+                                MVT::i32, EFLAGS.getValue(2));
+    DAG.ReplaceAllUsesOfValueWith(CmpSwap.getValue(1), EFLAGS.getValue(1));
+
+    return EFLAGS;
+  }
+
+  bool IsFP = Op1.getSimpleValueType().isFloatingPoint();
+  X86CC = TranslateX86CC(CC, IsFP, Op0, Op1, DAG);
+  if (X86CC == X86::COND_INVALID)
+    return SDValue();
+
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1)) {
     if (C->getAPIntValue() == 0)
       return EmitTest(Op0, X86CC, dl, DAG);
@@ -10385,7 +10458,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
      if (Op0.getValueType() == MVT::i1)
        llvm_unreachable("Unexpected comparison operation for MVT::i1 operands");
   }
- 
+
   if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
        Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
     // Do the comparison at i32 if it's smaller, besides the Atom case. 
@@ -10951,12 +11024,11 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, MVT::i1), NewCC);
   }
 
-  bool isFP = Op1.getSimpleValueType().isFloatingPoint();
-  unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
-  if (X86CC == X86::COND_INVALID)
+  unsigned X86CC;
+  SDValue EFLAGS = EmitCmp(Op0, Op1, CC, dl, DAG, X86CC);
+  if (!EFLAGS.getNode())
     return SDValue();
 
-  SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, dl, DAG);
   EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
   SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
                               DAG.getConstant(X86CC, MVT::i8), EFLAGS);
@@ -11384,6 +11456,14 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
     if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) {
       Cond = Cmp;
       addTest = false;
+    } else if (Opc == ISD::CopyFromReg &&
+               Cmp->getOperand(0)->getOpcode() == ISD::CopyFromReg &&
+               Cmp->getOperand(0)->getOperand(0)->getOpcode() ==
+                   X86ISD::LCMPXCHG_DAG) {
+      assert(cast<RegisterSDNode>(Cmp->getOperand(1))->getReg() == X86::EFLAGS);
+      Chain = Cmp.getValue(1);
+      Cond = Cmp;
+      addTest = false;
     } else {
       switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
       default: break;
@@ -14363,38 +14443,6 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
   return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
 }
 
-static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
-                             SelectionDAG &DAG) {
-  MVT T = Op.getSimpleValueType();
-  SDLoc DL(Op);
-  unsigned Reg = 0;
-  unsigned size = 0;
-  switch(T.SimpleTy) {
-  default: llvm_unreachable("Invalid value type!");
-  case MVT::i8:  Reg = X86::AL;  size = 1; break;
-  case MVT::i16: Reg = X86::AX;  size = 2; break;
-  case MVT::i32: Reg = X86::EAX; size = 4; break;
-  case MVT::i64:
-    assert(Subtarget->is64Bit() && "Node not type legal!");
-    Reg = X86::RAX; size = 8;
-    break;
-  }
-  SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
-                                    Op.getOperand(2), SDValue());
-  SDValue Ops[] = { cpIn.getValue(0),
-                    Op.getOperand(1),
-                    Op.getOperand(3),
-                    DAG.getTargetConstant(size, MVT::i8),
-                    cpIn.getValue(1) };
-  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
-  MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
-  SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
-                                           Ops, T, MMO);
-  SDValue cpOut =
-    DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
-  return cpOut;
-}
-
 static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
                             SelectionDAG &DAG) {
   MVT SrcVT = Op.getOperand(0).getSimpleValueType();
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index af2e4344e5..2521446490 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -1006,9 +1006,10 @@ namespace llvm {
                      SelectionDAG &DAG) const;
 
     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
-    /// equivalent, for use with the given x86 condition code.
-    SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SDLoc dl,
-                    SelectionDAG &DAG) const;
+    /// equivalent, possibly adjusting the condition code to a more appropriate
+    /// x86 form.
+    SDValue EmitCmp(SDValue Op0, SDValue Op1, ISD::CondCode CC, SDLoc dl,
+                    SelectionDAG &DAG, unsigned &X86CC) const;
 
     /// Convert a comparison if required by the subtarget.
     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
diff --git a/test/CodeGen/X86/cmpxchg-i1.ll b/test/CodeGen/X86/cmpxchg-i1.ll
new file mode 100644
index 0000000000..53cf3e300f
--- /dev/null
+++ b/test/CodeGen/X86/cmpxchg-i1.ll
@@ -0,0 +1,82 @@
+; RUN: llc -mtriple=x86_64 -o - %s | FileCheck %s
+
+define i1 @try_cmpxchg(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: try_cmpxchg:
+; CHECK: cmpxchgl
+; CHECK-NOT: cmp
+; CHECK: sete %al
+; CHECK: retq
+  %old = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %success = icmp eq i32 %old, %desired
+  ret i1 %success
+}
+
+define void @cmpxchg_flow(i64* %addr, i64 %desired, i64 %new) {
+; CHECK-LABEL: cmpxchg_flow:
+; CHECK: cmpxchgq
+; CHECK-NOT: cmp
+; CHECK-NOT: set
+; CHECK: {{jne|jeq}}
+  %old = cmpxchg i64* %addr, i64 %desired, i64 %new seq_cst seq_cst
+  %success = icmp eq i64 %old, %desired
+  br i1 %success, label %true, label %false
+
+true:
+  call void @foo()
+  ret void
+
+false:
+  call void @bar()
+  ret void
+}
+
+define i1 @cmpxchg_arithcmp(i16* %addr, i16 %desired, i16 %new) {
+; CHECK-LABEL: cmpxchg_arithcmp:
+; CHECK: cmpxchgw
+; CHECK-NOT: cmp
+; CHECK: setbe %al
+; CHECK: retq
+  %old = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst seq_cst
+  %success = icmp uge i16 %old, %desired
+  ret i1 %success
+}
+
+define i1 @cmpxchg_arithcmp_swapped(i8* %addr, i8 %desired, i8 %new) {
+; CHECK-LABEL: cmpxchg_arithcmp_swapped:
+; CHECK: cmpxchgb
+; CHECK-NOT: cmp
+; CHECK: setge %al
+; CHECK: retq
+  %old = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst seq_cst
+  %success = icmp sge i8 %desired, %old
+  ret i1 %success
+}
+
+define i64 @cmpxchg_sext(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: cmpxchg_sext:
+; CHECK-DAG: cmpxchgl
+; CHECK-DAG: movq $-1, %rax
+; CHECK-DAG: xorl %e[[ZERO:[a-z0-9]+]], %e[[ZERO]]
+; CHECK-NOT: cmpl
+; CHECK: cmovneq %r[[ZERO]], %rax
+; CHECK: retq
+  %old = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %success = icmp eq i32 %old, %desired
+  %mask = sext i1 %success to i64
+  ret i64 %mask
+}
+
+define i32 @cmpxchg_zext(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: cmpxchg_zext:
+; CHECK: cmpxchgl
+; CHECK-NOT: cmp
+; CHECK: sete [[BYTE:%[a-z0-9]+]]
+; CHECK: movzbl [[BYTE]], %eax
+  %old = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %success = icmp eq i32 %old, %desired
+  %mask = zext i1 %success to i32
+  ret i32 %mask
+}
+
+declare void @foo()
+declare void @bar()
author	Tim Northover <tnorthover@apple.com>	2014-06-10 10:49:07 +0000
committer	Tim Northover <tnorthover@apple.com>	2014-06-10 10:49:07 +0000
commit	984ee6544580453320597d496f2a67adec8d1e93 (patch)
tree	f453f399972a362f1a91ffecd2357eff2cbcd39f
parent	46b3076cd0b1e57e16cfe4ecaa517eb78863a92a (diff)
download	llvm-984ee6544580453320597d496f2a67adec8d1e93.tar.gz llvm-984ee6544580453320597d496f2a67adec8d1e93.tar.bz2 llvm-984ee6544580453320597d496f2a67adec8d1e93.tar.xz