summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp29
-rw-r--r--test/CodeGen/X86/avx-shuffle.ll6
-rw-r--r--test/CodeGen/X86/sse41.ll11
3 files changed, 35 insertions, 11 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 610be3f0e1..9e728c7f7a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3964,14 +3964,22 @@ static bool isINSERTPSMask(ArrayRef<int> Mask, MVT VT) {
unsigned CorrectPosV1 = 0;
unsigned CorrectPosV2 = 0;
- for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i)
+ for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) {
+ if (Mask[i] == -1) {
+ ++CorrectPosV1;
+ ++CorrectPosV2;
+ continue;
+ }
+
if (Mask[i] == i)
++CorrectPosV1;
else if (Mask[i] == i + 4)
++CorrectPosV2;
+ }
if (CorrectPosV1 == 3 || CorrectPosV2 == 3)
- // We have 3 elements from one vector, and one from another.
+ // We have 3 elements (undefs count as elements from any vector) from one
+ // vector, and one from another.
return true;
return false;
@@ -7462,8 +7470,9 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
"unsupported vector type for insertps/pinsrd");
- int FromV1 = std::count_if(Mask.begin(), Mask.end(),
- [](const int &i) { return i < 4; });
+ auto FromV1Predicate = [](const int &i) { return i < 4 && i > -1; };
+ auto FromV2Predicate = [](const int &i) { return i >= 4; };
+ int FromV1 = std::count_if(Mask.begin(), Mask.end(), FromV1Predicate);
SDValue From;
SDValue To;
@@ -7471,15 +7480,17 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
if (FromV1 == 1) {
From = V1;
To = V2;
- DestIndex = std::find_if(Mask.begin(), Mask.end(),
- [](const int &i) { return i < 4; }) -
+ DestIndex = std::find_if(Mask.begin(), Mask.end(), FromV1Predicate) -
Mask.begin();
} else {
+ assert(std::count_if(Mask.begin(), Mask.end(), FromV2Predicate) == 1 &&
+ "More than one element from V1 and from V2, or no elements from one "
+ "of the vectors. This case should not have returned true from "
+ "isINSERTPSMask");
From = V2;
To = V1;
- DestIndex = std::find_if(Mask.begin(), Mask.end(),
- [](const int &i) { return i >= 4; }) -
- Mask.begin();
+ DestIndex =
+ std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin();
}
if (MayFoldLoad(From)) {
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index f407ba4cc1..f3f7e554a3 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -5,8 +5,10 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
%b = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 5, i32 undef, i32 undef>
ret <4 x float> %b
; CHECK-LABEL: test1:
-; CHECK: vshufps
-; CHECK: vpshufd
+;; TODO: This test could be improved by removing the xor instruction and
+;; having vinsertps zero out the needed elements.
+; CHECK: vxorps
+; CHECK: vinsertps
}
; rdar://10538417
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index a3c62016c4..a77ede228d 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -692,3 +692,14 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl
%13 = fadd <4 x float> %11, %12
ret <4 x float> %13
}
+
+define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) {
+; CHECK-LABEL: insertps_with_undefs:
+; CHECK-NOT: shufps
+; CHECK: insertps $32, %xmm0
+; CHECK: ret
+ %1 = load float* %b, align 4
+ %2 = insertelement <4 x float> undef, float %1, i32 0
+ %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 4, i32 undef, i32 0, i32 7>
+ ret <4 x float> %result
+}