diff options
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 90 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-v1i1-setcc.ll | 68 |
2 files changed, 158 insertions, 0 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 769a2bcb62..c7aa753f6c 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -521,6 +521,10 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v1i64, Expand); setOperationAction(ISD::MUL, MVT::v2i64, Expand); } + + setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::VSELECT); } EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { @@ -4258,6 +4262,89 @@ static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return SDValue(N, 0); } +// v1i1 setcc -> +// v1i1 (bitcast (i1 setcc (extract_vector_elt, extract_vector_elt)) +// FIXME: Currently the type legalizer can't handle SETCC having v1i1 as result. +// If it can legalize "v1i1 SETCC" correctly, no need to combine such SETCC. +static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { + EVT ResVT = N->getValueType(0); + + if (!ResVT.isVector() || ResVT.getVectorNumElements() != 1 || + ResVT.getVectorElementType() != MVT::i1) + return SDValue(); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT CmpVT = LHS.getValueType(); + LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), + CmpVT.getVectorElementType(), LHS, + DAG.getConstant(0, MVT::i64)); + RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), + CmpVT.getVectorElementType(), RHS, + DAG.getConstant(0, MVT::i64)); + SDValue SetCC = + DAG.getSetCC(SDLoc(N), MVT::i1, LHS, RHS, + cast<CondCodeSDNode>(N->getOperand(2))->get()); + return DAG.getNode(ISD::BITCAST, SDLoc(N), ResVT, SetCC); +} + +// vselect (v1i1 setcc) -> +// vselect (v1iXX setcc) (XX is the size of the compared operand type) +// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as +// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine +// such VSELECT. +static SDValue PerformVSelectCombine(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + EVT CCVT = N0.getValueType(); + + if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 || + CCVT.getVectorElementType() != MVT::i1) + return SDValue(); + + EVT ResVT = N->getValueType(0); + EVT CmpVT = N0.getOperand(0).getValueType(); + // Only combine when the result type is of the same size as the compared + // operands. + if (ResVT.getSizeInBits() != CmpVT.getSizeInBits()) + return SDValue(); + + SDValue IfTrue = N->getOperand(1); + SDValue IfFalse = N->getOperand(2); + SDValue SetCC = + DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(), + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC, + IfTrue, IfFalse); +} + +// sign_extend (extract_vector_elt (v1i1 setcc)) -> +// extract_vector_elt (v1iXX setcc) +// (XX is the size of the compared operand type) +static SDValue PerformSignExtendCombine(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue Vec = N0.getOperand(0); + + if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Vec.getOpcode() != ISD::SETCC) + return SDValue(); + + EVT ResVT = N->getValueType(0); + EVT CmpVT = Vec.getOperand(0).getValueType(); + // Only optimize when the result type is of the same size as the element + // type of the compared operand. + if (ResVT.getSizeInBits() != CmpVT.getVectorElementType().getSizeInBits()) + return SDValue(); + + SDValue Lane = N0.getOperand(1); + SDValue SetCC = + DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(), + Vec.getOperand(0), Vec.getOperand(1), + cast<CondCodeSDNode>(Vec.getOperand(2))->get()); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ResVT, + SetCC, Lane); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { @@ -4269,6 +4356,9 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DCI, getSubtarget()); + case ISD::SETCC: return PerformSETCCCombine(N, DCI.DAG); + case ISD::VSELECT: return PerformVSelectCombine(N, DCI.DAG); + case ISD::SIGN_EXTEND: return PerformSignExtendCombine(N, DCI.DAG); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case AArch64ISD::NEON_VDUPLANE: diff --git a/test/CodeGen/AArch64/neon-v1i1-setcc.ll b/test/CodeGen/AArch64/neon-v1i1-setcc.ll new file mode 100644 index 0000000000..6c7d009f08 --- /dev/null +++ b/test/CodeGen/AArch64/neon-v1i1-setcc.ll @@ -0,0 +1,68 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s + +; This file test the DAG node like "v1i1 SETCC v1i64, v1i64". As the v1i1 type +; is illegal in AArch64 backend, the legalizer tries to scalarize this node. +; As the v1i64 operands of SETCC are legal types, they will not be scalarized. +; Currently the type legalizer will have an assertion failure as it assumes all +; operands of SETCC have been legalized. +; FIXME: If the algorithm of type scalarization is improved and can legaize +; "v1i1 SETCC" correctly, these test cases are not needed. + +define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) { +; CHECK-LABEL: test_sext_extr_cmp_0: +; CHECK: cmge d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = icmp sge <1 x i64> %v1, %v2 + %2 = extractelement <1 x i1> %1, i32 0 + %vget_lane = sext i1 %2 to i64 + ret i64 %vget_lane +} + +define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) { +; CHECK-LABEL: test_sext_extr_cmp_1: +; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fcmp oeq <1 x double> %v1, %v2 + %2 = extractelement <1 x i1> %1, i32 0 + %vget_lane = sext i1 %2 to i64 + ret i64 %vget_lane +} + +define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { +; CHECK-LABEL: test_select_v1i1_0: +; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %1 = icmp eq <1 x i64> %v1, %v2 + %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3 + ret <1 x i64> %res +} + +define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) { +; CHECK-LABEL: test_select_v1i1_1: +; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %1 = fcmp oeq <1 x double> %v1, %v2 + %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3 + ret <1 x i64> %res +} + +define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) { +; CHECK-LABEL: test_select_v1i1_2: +; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %1 = icmp eq <1 x i64> %v1, %v2 + %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3 + ret <1 x double> %res +} + +define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) { +; CHECK-LABEL: test_br_extr_cmp: +; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}} + %1 = icmp eq <1 x i64> %v1, %v2 + %2 = extractelement <1 x i1> %1, i32 0 + br i1 %2, label %if.end, label %if.then + +if.then: + ret i32 0; + +if.end: + ret i32 1; +} |