summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp66
-rw-r--r--test/CodeGen/AArch64/arm64-convert-v4f64.ll33
2 files changed, 72 insertions, 27 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index ec10968874..e45ca4dbc0 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4110,6 +4110,7 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
// shuffle in combination with VEXTs.
SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
@@ -4158,35 +4159,47 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
int VEXTOffsets[2] = { 0, 0 };
+ int OffsetMultipliers[2] = { 1, 1 };
// This loop extracts the usage patterns of the source vectors
// and prepares appropriate SDValues for a shuffle if possible.
for (unsigned i = 0; i < SourceVecs.size(); ++i) {
- if (SourceVecs[i].getValueType() == VT) {
+ unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
+ SDValue CurSource = SourceVecs[i];
+ if (SourceVecs[i].getValueType().getVectorElementType() !=
+ VT.getVectorElementType()) {
+ // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
+ // Then bitcast it to the vector which holds asserted element type,
+ // and record the multiplier of element width between SourceVecs and
+ // Build_vector which is needed to extract the correct lanes later.
+ EVT CastVT =
+ EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ SourceVecs[i].getValueSizeInBits() /
+ VT.getVectorElementType().getSizeInBits());
+
+ CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
+ OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
+ NumSrcElts *= OffsetMultipliers[i];
+ MaxElts[i] *= OffsetMultipliers[i];
+ MinElts[i] *= OffsetMultipliers[i];
+ }
+
+ if (CurSource.getValueType() == VT) {
// No VEXT necessary
- ShuffleSrcs[i] = SourceVecs[i];
+ ShuffleSrcs[i] = CurSource;
VEXTOffsets[i] = 0;
continue;
- } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+ } else if (NumSrcElts < NumElts) {
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
- ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i],
- DAG.getUNDEF(SourceVecs[i].getValueType()));
+ ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
+ DAG.getUNDEF(CurSource.getValueType()));
continue;
}
- // Don't attempt to extract subvectors from BUILD_VECTOR sources
- // that expand or trunc the original value.
- // TODO: We can try to bitcast and ANY_EXTEND the result but
- // we need to consider the cost of vector ANY_EXTEND, and the
- // legality of all the types.
- if (SourceVecs[i].getValueType().getVectorElementType() !=
- VT.getVectorElementType())
- return SDValue();
-
// Since only 64-bit and 128-bit vectors are legal on ARM and
// we've eliminated the other cases...
- assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts &&
+ assert(NumSrcElts == 2 * NumElts &&
"unexpected vector sizes in ReconstructShuffle");
if (MaxElts[i] - MinElts[i] >= NumElts) {
@@ -4197,22 +4210,20 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
if (MinElts[i] >= NumElts) {
// The extraction can just take the second half
VEXTOffsets[i] = NumElts;
- ShuffleSrcs[i] =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
- DAG.getIntPtrConstant(NumElts));
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(NumElts));
} else if (MaxElts[i] < NumElts) {
// The extraction can just take the first half
VEXTOffsets[i] = 0;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i], DAG.getIntPtrConstant(0));
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(0));
} else {
// An actual VEXT is needed
VEXTOffsets[i] = MinElts[i];
- SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i], DAG.getIntPtrConstant(0));
- SDValue VEXTSrc2 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
- DAG.getIntPtrConstant(NumElts));
+ SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(0));
+ SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(NumElts));
unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
DAG.getConstant(Imm, MVT::i32));
@@ -4232,9 +4243,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
int ExtractElt =
cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
if (ExtractVec == SourceVecs[0]) {
- Mask.push_back(ExtractElt - VEXTOffsets[0]);
+ Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
} else {
- Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+ Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
+ VEXTOffsets[1]);
}
}
diff --git a/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
new file mode 100644
index 0000000000..6bfb0a54ea
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=arm64 | FileCheck %s
+
+
+define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
+; CHECK: fptosi_v4f64_to_v4i16
+; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v1.2d
+; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v0.2d
+; CHECK-DAG: xtn v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d
+; CHECK-DAG: xtn v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d
+; CHECK: uzp1 v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h
+ %tmp1 = load <4 x double>* %ptr
+ %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
+ ret <4 x i16> %tmp2
+}
+
+define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
+; CHECK: fptosi_v4f64_to_v4i8
+; CHECK-DAG: fcvtzs v[[CONV3:[0-9]+]].2d, v3.2d
+; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d
+; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d
+; CHECK-DAG: fcvtzs v[[CONV0:[0-9]+]].2d, v0.2d
+; CHECK-DAG: xtn v[[NA3:[0-9]+]].2s, v[[CONV3]].2d
+; CHECK-DAG: xtn v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
+; CHECK-DAG: xtn v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
+; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
+; CHECK-DAG: uzp1 v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h
+; CHECK-DAG: uzp1 v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h
+; CHECK: uzp1 v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
+ %tmp1 = load <8 x double>* %ptr
+ %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
+ ret <8 x i8> %tmp2
+}
+