summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Qin <Kevin.Qin@arm.com>2014-06-18 05:54:42 +0000
committerKevin Qin <Kevin.Qin@arm.com>2014-06-18 05:54:42 +0000
commit74287ec34c1443e462aba573c6e8f7d7f9defa35 (patch)
tree8b967c714bd45e4ee587cefcdfb9ea407b41fbd2
parent10d664fee72917960c8bf38cc2ec8e3b84924044 (diff)
downloadllvm-74287ec34c1443e462aba573c6e8f7d7f9defa35.tar.gz
llvm-74287ec34c1443e462aba573c6e8f7d7f9defa35.tar.bz2
llvm-74287ec34c1443e462aba573c6e8f7d7f9defa35.tar.xz
[AArch64] Fix a pattern match failure caused by creating improper CONCAT_VECTOR.
ReconstructShuffle() may wrongly creat a CONCAT_VECTOR trying to concat 2 of v2i32 into v4i16. This commit is to fix this issue and try to generate UZP1 instead of lots of MOV and INS. Patch is initalized by Kevin Qin, and refactored by Tim Northover. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211144 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp66
-rw-r--r--test/CodeGen/AArch64/arm64-convert-v4f64.ll33
2 files changed, 72 insertions, 27 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index ec10968874..e45ca4dbc0 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4110,6 +4110,7 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
// shuffle in combination with VEXTs.
SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
@@ -4158,35 +4159,47 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
int VEXTOffsets[2] = { 0, 0 };
+ int OffsetMultipliers[2] = { 1, 1 };
// This loop extracts the usage patterns of the source vectors
// and prepares appropriate SDValues for a shuffle if possible.
for (unsigned i = 0; i < SourceVecs.size(); ++i) {
- if (SourceVecs[i].getValueType() == VT) {
+ unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
+ SDValue CurSource = SourceVecs[i];
+ if (SourceVecs[i].getValueType().getVectorElementType() !=
+ VT.getVectorElementType()) {
+ // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
+ // Then bitcast it to the vector which holds asserted element type,
+ // and record the multiplier of element width between SourceVecs and
+ // Build_vector which is needed to extract the correct lanes later.
+ EVT CastVT =
+ EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ SourceVecs[i].getValueSizeInBits() /
+ VT.getVectorElementType().getSizeInBits());
+
+ CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
+ OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
+ NumSrcElts *= OffsetMultipliers[i];
+ MaxElts[i] *= OffsetMultipliers[i];
+ MinElts[i] *= OffsetMultipliers[i];
+ }
+
+ if (CurSource.getValueType() == VT) {
// No VEXT necessary
- ShuffleSrcs[i] = SourceVecs[i];
+ ShuffleSrcs[i] = CurSource;
VEXTOffsets[i] = 0;
continue;
- } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+ } else if (NumSrcElts < NumElts) {
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
- ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i],
- DAG.getUNDEF(SourceVecs[i].getValueType()));
+ ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
+ DAG.getUNDEF(CurSource.getValueType()));
continue;
}
- // Don't attempt to extract subvectors from BUILD_VECTOR sources
- // that expand or trunc the original value.
- // TODO: We can try to bitcast and ANY_EXTEND the result but
- // we need to consider the cost of vector ANY_EXTEND, and the
- // legality of all the types.
- if (SourceVecs[i].getValueType().getVectorElementType() !=
- VT.getVectorElementType())
- return SDValue();
-
// Since only 64-bit and 128-bit vectors are legal on ARM and
// we've eliminated the other cases...
- assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts &&
+ assert(NumSrcElts == 2 * NumElts &&
"unexpected vector sizes in ReconstructShuffle");
if (MaxElts[i] - MinElts[i] >= NumElts) {
@@ -4197,22 +4210,20 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
if (MinElts[i] >= NumElts) {
// The extraction can just take the second half
VEXTOffsets[i] = NumElts;
- ShuffleSrcs[i] =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
- DAG.getIntPtrConstant(NumElts));
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(NumElts));
} else if (MaxElts[i] < NumElts) {
// The extraction can just take the first half
VEXTOffsets[i] = 0;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i], DAG.getIntPtrConstant(0));
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(0));
} else {
// An actual VEXT is needed
VEXTOffsets[i] = MinElts[i];
- SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i], DAG.getIntPtrConstant(0));
- SDValue VEXTSrc2 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
- DAG.getIntPtrConstant(NumElts));
+ SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(0));
+ SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(NumElts));
unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
DAG.getConstant(Imm, MVT::i32));
@@ -4232,9 +4243,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
int ExtractElt =
cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
if (ExtractVec == SourceVecs[0]) {
- Mask.push_back(ExtractElt - VEXTOffsets[0]);
+ Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
} else {
- Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+ Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
+ VEXTOffsets[1]);
}
}
diff --git a/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
new file mode 100644
index 0000000000..6bfb0a54ea
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=arm64 | FileCheck %s
+
+
+define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
+; CHECK: fptosi_v4f64_to_v4i16
+; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v1.2d
+; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v0.2d
+; CHECK-DAG: xtn v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d
+; CHECK-DAG: xtn v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d
+; CHECK: uzp1 v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h
+ %tmp1 = load <4 x double>* %ptr
+ %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
+ ret <4 x i16> %tmp2
+}
+
+define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
+; CHECK: fptosi_v4f64_to_v4i8
+; CHECK-DAG: fcvtzs v[[CONV3:[0-9]+]].2d, v3.2d
+; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d
+; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d
+; CHECK-DAG: fcvtzs v[[CONV0:[0-9]+]].2d, v0.2d
+; CHECK-DAG: xtn v[[NA3:[0-9]+]].2s, v[[CONV3]].2d
+; CHECK-DAG: xtn v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
+; CHECK-DAG: xtn v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
+; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
+; CHECK-DAG: uzp1 v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h
+; CHECK-DAG: uzp1 v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h
+; CHECK: uzp1 v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
+ %tmp1 = load <8 x double>* %ptr
+ %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
+ ret <8 x i8> %tmp2
+}
+