summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp113
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h6
-rw-r--r--test/CodeGen/AArch64/neon-copy.ll270
3 files changed, 360 insertions, 29 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 27277c47f3..4aae9e59e2 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4154,21 +4154,70 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
-// Check whether a Build Vector could be presented as Shuffle Vector. If yes,
-// try to call LowerVECTOR_SHUFFLE to lower it.
+// Check whether a shuffle_vecotor could be presented as conact_vector.
+bool AArch64TargetLowering::isConactVector(SDValue Op,SelectionDAG &DAG,
+ SDValue V0, SDValue V1,
+ const int* Mask,
+ SDValue &Res) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned V0NumElts = V0.getValueType().getVectorNumElements();
+ bool isContactVector = true;
+ bool splitV0 = false;
+ int offset = 0;
+ for (int I = 0, E = NumElts; I != E; I++){
+ if (Mask[I] != I + offset) {
+ if(I && !splitV0 && Mask[I] == I + (int)V0NumElts / 2) {
+ splitV0 = true;
+ offset = V0NumElts / 2;
+ } else {
+ isContactVector = false;
+ break;
+ }
+ }
+ }
+ if (isContactVector) {
+ EVT CastVT = EVT::getVectorVT(*DAG.getContext(),
+ VT.getVectorElementType(), NumElts / 2);
+ if(CastVT.getSizeInBits() < 64)
+ return false;
+
+ if (splitV0) {
+ assert(V0NumElts >= NumElts / 2 &&
+ "invalid operand for extract_subvector!");
+ V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
+ DAG.getConstant(0, MVT::i64));
+ }
+ if (NumElts != V1.getValueType().getVectorNumElements() * 2) {
+ assert(V1.getValueType().getVectorNumElements() >= NumElts / 2 &&
+ "invalid operand for extract_subvector!");
+ V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
+ DAG.getConstant(0, MVT::i64));
+ }
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
+ return true;
+ }
+ return false;
+}
+
+// Check whether a Build Vector could be presented as Shuffle Vector.
+// This Shuffle Vector maybe not legalized, so the length of its operand and
+// the length of result may not equal.
bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
- SDValue &Res) const {
+ SDValue &V0, SDValue &V1,
+ int *Mask) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
unsigned V0NumElts = 0;
- int Mask[16];
- SDValue V0, V1;
// Check if all elements are extracted from less than 3 vectors.
for (unsigned i = 0; i < NumElts; ++i) {
SDValue Elt = Op.getOperand(i);
- if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Elt.getOperand(0).getValueType().getVectorElementType() !=
+ VT.getVectorElementType())
return false;
if (V0.getNode() == 0) {
@@ -4189,25 +4238,7 @@ bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
return false;
}
}
-
- if (!V1.getNode() && V0NumElts == NumElts * 2) {
- V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
- DAG.getConstant(NumElts, MVT::i64));
- V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
- DAG.getConstant(0, MVT::i64));
- V0NumElts = V0.getValueType().getVectorNumElements();
- }
-
- if (V1.getNode() && NumElts == V0NumElts &&
- V0NumElts == V1.getValueType().getVectorNumElements()) {
- SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
- if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
- Res = Shuffle;
- else
- Res = LowerVECTOR_SHUFFLE(Shuffle, DAG);
- return true;
- } else
- return false;
+ return true;
}
// If this is a case we can't handle, return null and let the default
@@ -4413,9 +4444,31 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return SDValue();
// Try to lower this in lowering ShuffleVector way.
- SDValue Shuf;
- if (isKnownShuffleVector(Op, DAG, Shuf))
- return Shuf;
+ SDValue V0, V1;
+ int Mask[16];
+ if (isKnownShuffleVector(Op, DAG, V0, V1, Mask)) {
+ unsigned V0NumElts = V0.getValueType().getVectorNumElements();
+ if (!V1.getNode() && V0NumElts == NumElts * 2) {
+ V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
+ DAG.getConstant(NumElts, MVT::i64));
+ V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
+ DAG.getConstant(0, MVT::i64));
+ V0NumElts = V0.getValueType().getVectorNumElements();
+ }
+
+ if (V1.getNode() && NumElts == V0NumElts &&
+ V0NumElts == V1.getValueType().getVectorNumElements()) {
+ SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
+ if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
+ return Shuffle;
+ else
+ return LowerVECTOR_SHUFFLE(Shuffle, DAG);
+ } else {
+ SDValue Res;
+ if(isConactVector(Op, DAG, V0, V1, Mask, Res))
+ return Res;
+ }
+ }
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
// know the default expansion would otherwise fall back on something even
@@ -4601,6 +4654,10 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISDNo, dl, VT, V1, V2);
}
+ SDValue Res;
+ if (isConactVector(Op, DAG, V1, V2, &ShuffleMask[0], Res))
+ return Res;
+
// If the element of shuffle mask are all the same constant, we can
// transform it into either NEON_VDUP or NEON_VDUPLANE
if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 3879663e57..8baf992a2b 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -232,7 +232,11 @@ public:
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
- bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &Res) const;
+ bool isConactVector(SDValue Op,SelectionDAG &DAG, SDValue V0, SDValue V1,
+ const int* Mask, SDValue &Res) const;
+
+ bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &V0,
+ SDValue &V1, int *Mask) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const AArch64Subtarget *ST) const;
diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll
index 7faf2a2a8c..514689a2e6 100644
--- a/test/CodeGen/AArch64/neon-copy.ll
+++ b/test/CodeGen/AArch64/neon-copy.ll
@@ -979,4 +979,274 @@ entry:
%0 = extractelement <1 x float> %a, i32 0
%vecinit1.i = insertelement <4 x float> undef, float %0, i32 0
ret <4 x float> %vecinit1.i
+}
+
+define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
+; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %vecinit30
+}
+
+define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
+; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecext = extractelement <8 x i8> %x, i32 0
+ %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
+ %vecext1 = extractelement <8 x i8> %x, i32 1
+ %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
+ %vecext3 = extractelement <8 x i8> %x, i32 2
+ %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
+ %vecext5 = extractelement <8 x i8> %x, i32 3
+ %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
+ %vecext7 = extractelement <8 x i8> %x, i32 4
+ %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
+ %vecext9 = extractelement <8 x i8> %x, i32 5
+ %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
+ %vecext11 = extractelement <8 x i8> %x, i32 6
+ %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
+ %vecext13 = extractelement <8 x i8> %x, i32 7
+ %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
+ %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %vecinit30
+}
+
+define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
+; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecext = extractelement <16 x i8> %x, i32 0
+ %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
+ %vecext1 = extractelement <16 x i8> %x, i32 1
+ %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
+ %vecext3 = extractelement <16 x i8> %x, i32 2
+ %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
+ %vecext5 = extractelement <16 x i8> %x, i32 3
+ %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
+ %vecext7 = extractelement <16 x i8> %x, i32 4
+ %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
+ %vecext9 = extractelement <16 x i8> %x, i32 5
+ %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
+ %vecext11 = extractelement <16 x i8> %x, i32 6
+ %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
+ %vecext13 = extractelement <16 x i8> %x, i32 7
+ %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
+ %vecext15 = extractelement <8 x i8> %y, i32 0
+ %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
+ %vecext17 = extractelement <8 x i8> %y, i32 1
+ %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
+ %vecext19 = extractelement <8 x i8> %y, i32 2
+ %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
+ %vecext21 = extractelement <8 x i8> %y, i32 3
+ %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
+ %vecext23 = extractelement <8 x i8> %y, i32 4
+ %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
+ %vecext25 = extractelement <8 x i8> %y, i32 5
+ %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
+ %vecext27 = extractelement <8 x i8> %y, i32 6
+ %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
+ %vecext29 = extractelement <8 x i8> %y, i32 7
+ %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
+ ret <16 x i8> %vecinit30
+}
+
+define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
+; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecext = extractelement <8 x i8> %x, i32 0
+ %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
+ %vecext1 = extractelement <8 x i8> %x, i32 1
+ %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
+ %vecext3 = extractelement <8 x i8> %x, i32 2
+ %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
+ %vecext5 = extractelement <8 x i8> %x, i32 3
+ %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
+ %vecext7 = extractelement <8 x i8> %x, i32 4
+ %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
+ %vecext9 = extractelement <8 x i8> %x, i32 5
+ %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
+ %vecext11 = extractelement <8 x i8> %x, i32 6
+ %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
+ %vecext13 = extractelement <8 x i8> %x, i32 7
+ %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
+ %vecext15 = extractelement <8 x i8> %y, i32 0
+ %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
+ %vecext17 = extractelement <8 x i8> %y, i32 1
+ %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
+ %vecext19 = extractelement <8 x i8> %y, i32 2
+ %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
+ %vecext21 = extractelement <8 x i8> %y, i32 3
+ %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
+ %vecext23 = extractelement <8 x i8> %y, i32 4
+ %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
+ %vecext25 = extractelement <8 x i8> %y, i32 5
+ %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
+ %vecext27 = extractelement <8 x i8> %y, i32 6
+ %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
+ %vecext29 = extractelement <8 x i8> %y, i32 7
+ %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
+ ret <16 x i8> %vecinit30
+}
+
+define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
+; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ ret <8 x i16> %vecinit14
+}
+
+define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
+; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecext = extractelement <4 x i16> %x, i32 0
+ %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
+ %vecext1 = extractelement <4 x i16> %x, i32 1
+ %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
+ %vecext3 = extractelement <4 x i16> %x, i32 2
+ %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
+ %vecext5 = extractelement <4 x i16> %x, i32 3
+ %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
+ %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ ret <8 x i16> %vecinit14
+}
+
+define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
+; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecext = extractelement <8 x i16> %x, i32 0
+ %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
+ %vecext1 = extractelement <8 x i16> %x, i32 1
+ %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
+ %vecext3 = extractelement <8 x i16> %x, i32 2
+ %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
+ %vecext5 = extractelement <8 x i16> %x, i32 3
+ %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
+ %vecext7 = extractelement <4 x i16> %y, i32 0
+ %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
+ %vecext9 = extractelement <4 x i16> %y, i32 1
+ %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
+ %vecext11 = extractelement <4 x i16> %y, i32 2
+ %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
+ %vecext13 = extractelement <4 x i16> %y, i32 3
+ %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
+ ret <8 x i16> %vecinit14
+}
+
+define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
+; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecext = extractelement <4 x i16> %x, i32 0
+ %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
+ %vecext1 = extractelement <4 x i16> %x, i32 1
+ %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
+ %vecext3 = extractelement <4 x i16> %x, i32 2
+ %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
+ %vecext5 = extractelement <4 x i16> %x, i32 3
+ %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
+ %vecext7 = extractelement <4 x i16> %y, i32 0
+ %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
+ %vecext9 = extractelement <4 x i16> %y, i32 1
+ %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
+ %vecext11 = extractelement <4 x i16> %y, i32 2
+ %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
+ %vecext13 = extractelement <4 x i16> %y, i32 3
+ %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
+ ret <8 x i16> %vecinit14
+}
+
+define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
+; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ ret <4 x i32> %vecinit6
+}
+
+define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
+; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecext = extractelement <2 x i32> %x, i32 0
+ %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+ %vecext1 = extractelement <2 x i32> %x, i32 1
+ %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
+ %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ ret <4 x i32> %vecinit6
+}
+
+define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
+; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecext = extractelement <4 x i32> %x, i32 0
+ %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+ %vecext1 = extractelement <4 x i32> %x, i32 1
+ %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
+ %vecext3 = extractelement <2 x i32> %y, i32 0
+ %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
+ %vecext5 = extractelement <2 x i32> %y, i32 1
+ %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
+ ret <4 x i32> %vecinit6
+}
+
+define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
+; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecext = extractelement <2 x i32> %x, i32 0
+ %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+ %vecext1 = extractelement <2 x i32> %x, i32 1
+ %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
+ %vecext3 = extractelement <2 x i32> %y, i32 0
+ %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
+ %vecext5 = extractelement <2 x i32> %y, i32 1
+ %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
+ ret <4 x i32> %vecinit6
+}
+
+define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
+; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %vecinit2
+}
+
+define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
+; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecext = extractelement <1 x i64> %x, i32 0
+ %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
+ %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %vecinit2
+}
+
+define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
+; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecext = extractelement <2 x i64> %x, i32 0
+ %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
+ %vecext1 = extractelement <1 x i64> %y, i32 0
+ %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
+ ret <2 x i64> %vecinit2
+}
+
+define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
+; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %vecext = extractelement <1 x i64> %x, i32 0
+ %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
+ %vecext1 = extractelement <1 x i64> %y, i32 0
+ %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
+ ret <2 x i64> %vecinit2
} \ No newline at end of file