[X86] Add target combine rule to select ADDSUB instructions from a build_vector

This patch teaches the backend how to combine a build_vector that implements an 'addsub' between packed float vectors into a sequence of vector add and vector sub followed by a VSELECT. The new VSELECT is expected to be lowered into a BLENDI. At ISel stage, the sequence 'vector add + vector sub + BLENDI' is pattern-matched against ISel patterns added at r211427 to select 'addsub' instructions. Added three more ISel patterns for ADDSUB. Added test sse3-avx-addsub-2.ll to verify that we correctly emit 'addsub' instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211679 91177308-0d34-0410-b5e6-96231b3b80d8
author: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2014-06-25 10:02:21 +0000
committer: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2014-06-25 10:02:21 +0000
commit: 3e5582cc15639f7c908326a7c15b941fb3253a33 (patch)
tree: 9c04cbc46280ed543efd6ff268a931ad2e0f5bf7 /lib/Target
parent: 97c856a6126f0853dd894cc5f78dfaa574c78540 (diff)
download: llvm-3e5582cc15639f7c908326a7c15b941fb3253a33.tar.gz
llvm-3e5582cc15639f7c908326a7c15b941fb3253a33.tar.bz2
llvm-3e5582cc15639f7c908326a7c15b941fb3253a33.tar.xz
2 files changed, 138 insertions, 1 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 874257f212..714fc0ccbd 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -6222,6 +6222,127 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
 }
 
+/// \brief Try to fold a build_vector that performs an 'addsub' into the
+/// sequence of 'vadd + vsub + blendi'.
+static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
+                           const X86Subtarget *Subtarget) {
+  SDLoc DL(BV);
+  EVT VT = BV->getValueType(0);
+  unsigned NumElts = VT.getVectorNumElements();
+  SDValue InVec0 = DAG.getUNDEF(VT);
+  SDValue InVec1 = DAG.getUNDEF(VT);
+
+  assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
+          VT == MVT::v2f64) && "build_vector with an invalid type found!");
+
+  // Don't try to emit a VSELECT that cannot be lowered into a blend.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
+    return SDValue();
+
+  // Odd-numbered elements in the input build vector are obtained from
+  // adding two integer/float elements.
+  // Even-numbered elements in the input build vector are obtained from
+  // subtracting two integer/float elements.
+  unsigned ExpectedOpcode = ISD::FSUB;
+  unsigned NextExpectedOpcode = ISD::FADD;
+  bool AddFound = false;
+  bool SubFound = false;
+
+  for (unsigned i = 0, e = NumElts; i != e; i++) {
+    SDValue Op = BV->getOperand(i);
+      
+    // Skip 'undef' values.
+    unsigned Opcode = Op.getOpcode();
+    if (Opcode == ISD::UNDEF) {
+      std::swap(ExpectedOpcode, NextExpectedOpcode);
+      continue;
+    }
+      
+    // Early exit if we found an unexpected opcode.
+    if (Opcode != ExpectedOpcode)
+      return SDValue();
+
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+
+    // Try to match the following pattern:
+    // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
+    // Early exit if we cannot match that sequence.
+    if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+        Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+        !isa<ConstantSDNode>(Op0.getOperand(1)) ||
+        !isa<ConstantSDNode>(Op1.getOperand(1)) ||
+        Op0.getOperand(1) != Op1.getOperand(1))
+      return SDValue();
+
+    unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
+    if (I0 != i)
+      return SDValue();
+
+    // We found a valid add/sub node. Update the information accordingly.
+    if (i & 1)
+      AddFound = true;
+    else
+      SubFound = true;
+
+    // Update InVec0 and InVec1.
+    if (InVec0.getOpcode() == ISD::UNDEF)
+      InVec0 = Op0.getOperand(0);
+    if (InVec1.getOpcode() == ISD::UNDEF)
+      InVec1 = Op1.getOperand(0);
+
+    // Make sure that operands in input to each add/sub node always
+    // come from a same pair of vectors.
+    if (InVec0 != Op0.getOperand(0)) {
+      if (ExpectedOpcode == ISD::FSUB)
+        return SDValue();
+
+      // FADD is commutable. Try to commute the operands
+      // and then test again.
+      std::swap(Op0, Op1);
+      if (InVec0 != Op0.getOperand(0))
+        return SDValue();
+    }
+
+    if (InVec1 != Op1.getOperand(0))
+      return SDValue();
+
+    // Update the pair of expected opcodes.
+    std::swap(ExpectedOpcode, NextExpectedOpcode);
+  }
+
+  // Don't try to fold this build_vector into a VSELECT if it has
+  // too many UNDEF operands.
+  if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF &&
+      InVec1.getOpcode() != ISD::UNDEF) {
+    // Emit a sequence of vector add and sub followed by a VSELECT.
+    // The new VSELECT will be lowered into a BLENDI.
+    // At ISel stage, we pattern-match the sequence 'add + sub + BLENDI'
+    // and emit a single ADDSUB instruction.
+    SDValue Sub = DAG.getNode(ExpectedOpcode, DL, VT, InVec0, InVec1);
+    SDValue Add = DAG.getNode(NextExpectedOpcode, DL, VT, InVec0, InVec1);
+
+    // Construct the VSELECT mask.
+    EVT MaskVT = VT.changeVectorElementTypeToInteger();
+    EVT SVT = MaskVT.getVectorElementType();
+    unsigned SVTBits = SVT.getSizeInBits();
+    SmallVector<SDValue, 8> Ops;
+
+    for (unsigned i = 0, e = NumElts; i != e; ++i) {
+      APInt Value = i & 1 ? APInt::getNullValue(SVTBits) :
+                            APInt::getAllOnesValue(SVTBits);
+      SDValue Constant = DAG.getConstant(Value, SVT);
+      Ops.push_back(Constant);
+    }
+
+    SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVT, Ops);
+    return DAG.getSelect(DL, VT, Mask, Sub, Add);
+  }
+  
+  return SDValue();
+}
+
 static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
                                           const X86Subtarget *Subtarget) {
   SDLoc DL(N);
@@ -6230,6 +6351,14 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
   BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
   SDValue InVec0, InVec1;
 
+  // Try to match an ADDSUB.
+  if ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+      (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) {
+    SDValue Value = matchAddSub(BV, DAG, Subtarget);
+    if (Value.getNode())
+      return Value;
+  }
+
   // Try to match horizontal ADD/SUB.
   unsigned NumUndefsLO = 0;
   unsigned NumUndefsHI = 0;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index a33d2cc832..988a059897 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -5377,10 +5377,15 @@ let Predicates = [HasAVX] in {
   def : Pat<(v4f64 (X86Shufp (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
                              (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))),
             (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
+  def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
+                              (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
+            (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
   def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
                               (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
             (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
-
+  def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
+                              (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))), 
+            (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
   def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
                              (v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
             (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
@@ -5396,6 +5401,9 @@ let Predicates = [UseSSE3] in {
                               (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
             (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
 
+  def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
+                              (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))), 
+            (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
   def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
                              (v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
             (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
author	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2014-06-25 10:02:21 +0000
committer	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2014-06-25 10:02:21 +0000
commit	3e5582cc15639f7c908326a7c15b941fb3253a33 (patch)
tree	9c04cbc46280ed543efd6ff268a931ad2e0f5bf7 /lib/Target
parent	97c856a6126f0853dd894cc5f78dfaa574c78540 (diff)
download	llvm-3e5582cc15639f7c908326a7c15b941fb3253a33.tar.gz llvm-3e5582cc15639f7c908326a7c15b941fb3253a33.tar.bz2 llvm-3e5582cc15639f7c908326a7c15b941fb3253a33.tar.xz