summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Molloy <james.molloy@arm.com>2012-09-06 09:55:02 +0000
committerJames Molloy <james.molloy@arm.com>2012-09-06 09:55:02 +0000
commitba8562af4440753ba6175ccd54d71f79f5c4f3dc (patch)
treeb3a682afcaec10c5546b6c31f0e192659dc56b66
parent486270aee6ffd2a0c3c2333a8a0091c29f037aae (diff)
downloadllvm-ba8562af4440753ba6175ccd54d71f79f5c4f3dc.tar.gz
llvm-ba8562af4440753ba6175ccd54d71f79f5c4f3dc.tar.bz2
llvm-ba8562af4440753ba6175ccd54d71f79f5c4f3dc.tar.xz
Improve codegen for BUILD_VECTORs on ARM.
If we have a BUILD_VECTOR that is mostly a constant splat, it is often better to splat that constant then insertelement the non-constant lanes instead of insertelementing every lane from an undef base. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163304 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp66
-rw-r--r--test/CodeGen/ARM/vdup.ll34
2 files changed, 90 insertions, 10 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index c17e9ae381..62c758931e 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -4161,10 +4161,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
// Scan through the operands to see if only one value is used.
+ //
+ // As an optimisation, even if more than one value is used it may be more
+ // profitable to splat with one value then change some lanes.
+ //
+ // Heuristically we decide to do this if the vector has a "dominant" value,
+ // defined as splatted to more than half of the lanes.
unsigned NumElts = VT.getVectorNumElements();
bool isOnlyLowElement = true;
bool usesOnlyOneValue = true;
+ bool hasDominantValue = false;
bool isConstant = true;
+
+ // Map of the number of times a particular SDValue appears in the
+ // element list.
+ DenseMap<SDValue, int> ValueCounts;
SDValue Value;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
@@ -4175,13 +4186,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
isConstant = false;
- if (!Value.getNode())
+ ValueCounts.insert(std::make_pair(V, 0));
+ int &Count = ValueCounts[V];
+
+ // Is this value dominant? (takes up more than half of the lanes)
+ if (++Count > (NumElts / 2)) {
+ hasDominantValue = true;
Value = V;
- else if (V != Value)
- usesOnlyOneValue = false;
+ }
}
+ if (ValueCounts.size() != 1)
+ usesOnlyOneValue = false;
+ if (!Value.getNode() && ValueCounts.size() > 0)
+ Value = ValueCounts.begin()->first;
- if (!Value.getNode())
+ if (ValueCounts.size() == 0)
return DAG.getUNDEF(VT);
if (isOnlyLowElement)
@@ -4191,9 +4210,34 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// Use VDUP for non-constant splats. For f32 constant splats, reduce to
// i32 and try again.
- if (usesOnlyOneValue && EltSize <= 32) {
- if (!isConstant)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (hasDominantValue && EltSize <= 32) {
+ if (!isConstant) {
+ SDValue N;
+
+ // If we are VDUPing a value that comes directly from a vector, that will
+ // cause an unnecessary move to and from a GPR, where instead we could
+ // just use VDUPLANE.
+ if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT)
+ N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ Value->getOperand(0), Value->getOperand(1));
+ else
+ N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+
+ if (!usesOnlyOneValue) {
+ // The dominant value was splatted as 'N', but we now have to insert
+ // all differing elements.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ if (Op.getOperand(I) == Value)
+ continue;
+ SmallVector<SDValue, 3> Ops;
+ Ops.push_back(N);
+ Ops.push_back(Op.getOperand(I));
+ Ops.push_back(DAG.getConstant(I, MVT::i32));
+ N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3);
+ }
+ }
+ return N;
+ }
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i)
@@ -4205,9 +4249,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (Val.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
- SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
- if (Val.getNode())
- return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ if (usesOnlyOneValue) {
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (isConstant && Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ }
}
// If all elements are constants and the case above didn't get hit, fall back
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index 05332e4d8c..a8c224b438 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -261,3 +261,37 @@ define void @redundantVdup(<8 x i8>* %ptr) nounwind {
store <8 x i8> %2, <8 x i8>* %ptr, align 8
ret void
}
+
+define <4 x i32> @tdupi(i32 %x, i32 %y) {
+;CHECK: tdupi
+;CHECK: vdup.32
+ %1 = insertelement <4 x i32> undef, i32 %x, i32 0
+ %2 = insertelement <4 x i32> %1, i32 %x, i32 1
+ %3 = insertelement <4 x i32> %2, i32 %x, i32 2
+ %4 = insertelement <4 x i32> %3, i32 %y, i32 3
+ ret <4 x i32> %4
+}
+
+define <4 x float> @tdupf(float %x, float %y) {
+;CHECK: tdupf
+;CHECK: vdup.32
+ %1 = insertelement <4 x float> undef, float %x, i32 0
+ %2 = insertelement <4 x float> %1, float %x, i32 1
+ %3 = insertelement <4 x float> %2, float %x, i32 2
+ %4 = insertelement <4 x float> %3, float %y, i32 3
+ ret <4 x float> %4
+}
+
+; This test checks that when splatting an element from a vector into another,
+; the value isn't moved out to GPRs first.
+define <4 x i32> @tduplane(<4 x i32> %invec) {
+;CHECK: tduplane
+;CHECK-NOT: vmov {{.*}}, d16[1]
+;CHECK: vdup.32 {{.*}}, d16[1]
+ %in = extractelement <4 x i32> %invec, i32 1
+ %1 = insertelement <4 x i32> undef, i32 %in, i32 0
+ %2 = insertelement <4 x i32> %1, i32 %in, i32 1
+ %3 = insertelement <4 x i32> %2, i32 %in, i32 2
+ %4 = insertelement <4 x i32> %3, i32 255, i32 3
+ ret <4 x i32> %4
+}