summaryrefslogtreecommitdiff
path: root/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-02-20 21:33:32 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-02-20 21:33:32 +0000
commitc46e2df74cf75a33742f57d2b4d6c6fcf73bced9 (patch)
tree2527c3b68b24b2e2fb335d6c2988c382ee9e950a /lib/CodeGen/SelectionDAG/DAGCombiner.cpp
parent64f3e763cd8e4f32f91ae5b44ac4bd9986afddf2 (diff)
downloadllvm-c46e2df74cf75a33742f57d2b4d6c6fcf73bced9.tar.gz
llvm-c46e2df74cf75a33742f57d2b4d6c6fcf73bced9.tar.bz2
llvm-c46e2df74cf75a33742f57d2b4d6c6fcf73bced9.tar.xz
DAGCombiner: Fold pointless truncate, bitcast, buildvector series
(2xi32) (truncate ((2xi64) bitcast (buildvector i32 a, i32 x, i32 b, i32 y))) can be folded into a (2xi32) (buildvector i32 a, i32 b). Such a DAG would cause uneccessary vdup instructions followed by vmovn instructions. We generate this code on ARM NEON for a setcc olt, 2xf64, 2xf64. For example, in the vectorized version of the code below. double A[N]; double B[N]; void test_double_compare_to_double() { int i; for(i=0;i<N;i++) A[i] = (double)(A[i] < B[i]); } radar://13191881 Fixes bug 15283. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175670 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp32
1 files changed, 32 insertions, 0 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9d40ff799d..2777d7c2eb 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5361,6 +5361,38 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
+ // Fold a series of buildvector, bitcast, and truncate if possible.
+ // For example fold
+ // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
+ // (2xi32 (buildvector x, y)).
+ if (Level == AfterLegalizeVectorOps && VT.isVector() &&
+ N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+ N0.getOperand(0).hasOneUse()) {
+
+ SDValue BuildVect = N0.getOperand(0);
+ EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
+ EVT TruncVecEltTy = VT.getVectorElementType();
+
+ // Check that the element types match.
+ if (BuildVectEltTy == TruncVecEltTy) {
+ // Now we only need to compute the offset of the truncated elements.
+ unsigned BuildVecNumElts = BuildVect.getNumOperands();
+ unsigned TruncVecNumElts = VT.getVectorNumElements();
+ unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
+
+ assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
+ "Invalid number of elements");
+
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
+ Opnds.push_back(BuildVect.getOperand(i));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0],
+ Opnds.size());
+ }
+ }
+
// See if we can simplify the input to this truncate through knowledge that
// only the low bits are being used.
// For example "trunc (or (shl x, 8), y)" // -> trunc y