From f55ec9ac184e654ff8052b02c4f88fc27710f4da Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Mon, 20 Jan 2014 07:32:26 +0000 Subject: [AArch64 NEON] Fix a bug caused by undef lane when generating VEXT. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199628 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 36 +++++++++++++++++------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index e98fbe1d3a..27277c47f3 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4654,22 +4654,28 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // it into NEON_VEXTRACT. if (V1EltNum == Length) { // Check if the shuffle mask is sequential. - bool IsSequential = true; - int CurMask = ShuffleMask[0]; - for (int I = 0; I < Length; ++I) { - if (ShuffleMask[I] != CurMask) { - IsSequential = false; - break; - } - CurMask++; + int SkipUndef = 0; + while (ShuffleMask[SkipUndef] == -1) { + SkipUndef++; } - if (IsSequential) { - assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect"); - unsigned VecSize = EltSize * V1EltNum; - unsigned Index = (EltSize/8) * ShuffleMask[0]; - if (VecSize == 64 || VecSize == 128) - return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2, - DAG.getConstant(Index, MVT::i64)); + int CurMask = ShuffleMask[SkipUndef]; + if (CurMask >= SkipUndef) { + bool IsSequential = true; + for (int I = SkipUndef; I < Length; ++I) { + if (ShuffleMask[I] != -1 && ShuffleMask[I] != CurMask) { + IsSequential = false; + break; + } + CurMask++; + } + if (IsSequential) { + assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect"); + unsigned VecSize = EltSize * V1EltNum; + unsigned Index = (EltSize / 8) * (ShuffleMask[SkipUndef] - SkipUndef); + if (VecSize == 64 || VecSize == 128) + return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2, + DAG.getConstant(Index, MVT::i64)); + } } } -- cgit v1.2.3