diff options
author | Filipe Cabecinhas <me@filcab.net> | 2014-06-06 18:07:06 +0000 |
---|---|---|
committer | Filipe Cabecinhas <me@filcab.net> | 2014-06-06 18:07:06 +0000 |
commit | 78cf19b9b98e7ea3d4873fc5b1496c1835221951 (patch) | |
tree | f273ed22e3f5f3d20649d0eb0d55dd437aa48f5b /lib | |
parent | 64d39d3281aaabc09ee792312a48251fd3114a3b (diff) | |
download | llvm-78cf19b9b98e7ea3d4873fc5b1496c1835221951.tar.gz llvm-78cf19b9b98e7ea3d4873fc5b1496c1835221951.tar.bz2 llvm-78cf19b9b98e7ea3d4873fc5b1496c1835221951.tar.xz |
Fixed a bug in lowering shuffle_vectors to insertps
Summary:
We were being too strict and not accounting for undefs.
Added a test case and fixed another one where we improved codegen.
Reviewers: grosbach, nadav, delena
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D4039
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210361 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 610be3f0e1..9e728c7f7a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3964,14 +3964,22 @@ static bool isINSERTPSMask(ArrayRef<int> Mask, MVT VT) { unsigned CorrectPosV1 = 0; unsigned CorrectPosV2 = 0; - for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) + for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) { + if (Mask[i] == -1) { + ++CorrectPosV1; + ++CorrectPosV2; + continue; + } + if (Mask[i] == i) ++CorrectPosV1; else if (Mask[i] == i + 4) ++CorrectPosV2; + } if (CorrectPosV1 == 3 || CorrectPosV2 == 3) - // We have 3 elements from one vector, and one from another. + // We have 3 elements (undefs count as elements from any vector) from one + // vector, and one from another. return true; return false; @@ -7462,8 +7470,9 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl, assert((VT == MVT::v4f32 || VT == MVT::v4i32) && "unsupported vector type for insertps/pinsrd"); - int FromV1 = std::count_if(Mask.begin(), Mask.end(), - [](const int &i) { return i < 4; }); + auto FromV1Predicate = [](const int &i) { return i < 4 && i > -1; }; + auto FromV2Predicate = [](const int &i) { return i >= 4; }; + int FromV1 = std::count_if(Mask.begin(), Mask.end(), FromV1Predicate); SDValue From; SDValue To; @@ -7471,15 +7480,17 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl, if (FromV1 == 1) { From = V1; To = V2; - DestIndex = std::find_if(Mask.begin(), Mask.end(), - [](const int &i) { return i < 4; }) - + DestIndex = std::find_if(Mask.begin(), Mask.end(), FromV1Predicate) - Mask.begin(); } else { + assert(std::count_if(Mask.begin(), Mask.end(), FromV2Predicate) == 1 && + "More than one element from V1 and from V2, or no elements from one " + "of the vectors. This case should not have returned true from " + "isINSERTPSMask"); From = V2; To = V1; - DestIndex = std::find_if(Mask.begin(), Mask.end(), - [](const int &i) { return i >= 4; }) - - Mask.begin(); + DestIndex = + std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin(); } if (MayFoldLoad(From)) { |