diff options
author | Hao Liu <Hao.Liu@arm.com> | 2014-04-22 09:57:06 +0000 |
---|---|---|
committer | Hao Liu <Hao.Liu@arm.com> | 2014-04-22 09:57:06 +0000 |
commit | 07dcdc7c9057e11c7e1d61ac4eca1b2720f18f6a (patch) | |
tree | 1462e3595688457811d65d73016f0b3fee01767f | |
parent | 53b4d83b63148d620469d0ecc4152dfcba97fb20 (diff) | |
download | llvm-07dcdc7c9057e11c7e1d61ac4eca1b2720f18f6a.tar.gz llvm-07dcdc7c9057e11c7e1d61ac4eca1b2720f18f6a.tar.bz2 llvm-07dcdc7c9057e11c7e1d61ac4eca1b2720f18f6a.tar.xz |
Fix an infinite loop bug in DAG Combine about keeping transfering between ANY_EXTEND and SIGN_EXTEND.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206873 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 | ||||
-rw-r--r-- | test/CodeGen/ARM64/2014-04-16-AnInfiniteLoopInDAGCombine.ll | 23 | ||||
-rw-r--r-- | test/CodeGen/ARM64/vselect.ll | 9 |
3 files changed, 38 insertions, 10 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cdf1aedd97..a52dacf521 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5479,7 +5479,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { - // aext(setcc) -> sext_in_reg(vsetcc) for vectors. + // For vectors: + // aext(setcc) -> vsetcc + // aext(setcc) -> truncate(vsetcc) + // aext(setcc) -> aext(vsetcc) // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { EVT N0VT = N0.getOperand(0).getValueType(); @@ -5494,19 +5497,14 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { cast<CondCodeSDNode>(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then - // truncate/sign extend + // truncate/any extend else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); + EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); + return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); } } diff --git a/test/CodeGen/ARM64/2014-04-16-AnInfiniteLoopInDAGCombine.ll b/test/CodeGen/ARM64/2014-04-16-AnInfiniteLoopInDAGCombine.ll new file mode 100644 index 0000000000..a73b707180 --- /dev/null +++ b/test/CodeGen/ARM64/2014-04-16-AnInfiniteLoopInDAGCombine.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=arm64 + +; This test case tests an infinite loop bug in DAG combiner. +; It just tries to do the following replacing endlessly: +; (1) Replacing.3 0x2c509f0: v4i32 = any_extend 0x2c4cd08 [ORD=4] +; With: 0x2c4d128: v4i32 = sign_extend 0x2c4cd08 [ORD=4] +; +; (2) Replacing.2 0x2c4d128: v4i32 = sign_extend 0x2c4cd08 [ORD=4] +; With: 0x2c509f0: v4i32 = any_extend 0x2c4cd08 [ORD=4] +; As we think the (2) optimization from SIGN_EXTEND to ANY_EXTEND is +; an optimization to replace unused bits with undefined bits, we remove +; the (1) optimization (It doesn't make sense to replace undefined bits +; with signed bits). + +define <4 x i32> @infiniteLoop(<4 x i32> %in0, <4 x i16> %in1) { +entry: + %cmp.i = icmp sge <4 x i16> %in1, <i16 32767, i16 32767, i16 -1, i16 -32768> + %sext.i = sext <4 x i1> %cmp.i to <4 x i32> + %mul.i = mul <4 x i32> %in0, %sext.i + %sext = shl <4 x i32> %mul.i, <i32 16, i32 16, i32 16, i32 16> + %vmovl.i.i = ashr <4 x i32> %sext, <i32 16, i32 16, i32 16, i32 16> + ret <4 x i32> %vmovl.i.i +}
\ No newline at end of file diff --git a/test/CodeGen/ARM64/vselect.ll b/test/CodeGen/ARM64/vselect.ll index 07274a0501..aa8e81eb70 100644 --- a/test/CodeGen/ARM64/vselect.ll +++ b/test/CodeGen/ARM64/vselect.ll @@ -2,7 +2,14 @@ ;CHECK: @func63 ;CHECK: cmeq.4h v0, v0, v1 -;CHECK: sshll.4s v0, v0, #0 + +;FIXME: currently, it will generate 3 instructions: +; ushll.4s v0, v0, #0 +; shl.4s v0, v0, #31 +; sshr.4s v0, v0, #31 +;But these instrucitons can be optimized into 1 instruction: +; sshll.4s v0, v0, #0 + ;CHECK: bsl.16b v0, v2, v3 ;CHECK: str q0, [x0] ;CHECK: ret |