summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHao Liu <Hao.Liu@arm.com>2014-02-14 02:21:56 +0000
committerHao Liu <Hao.Liu@arm.com>2014-02-14 02:21:56 +0000
commit9c4127a6a57de4affe71b9dd1ac6b3b8ebfe99e3 (patch)
treefcf19ec6b340fa3a221ca390fbfb388614e2055b
parent35d2f6151024a63b4bf84730aaa88e34413a8220 (diff)
downloadllvm-9c4127a6a57de4affe71b9dd1ac6b3b8ebfe99e3.tar.gz
llvm-9c4127a6a57de4affe71b9dd1ac6b3b8ebfe99e3.tar.bz2
llvm-9c4127a6a57de4affe71b9dd1ac6b3b8ebfe99e3.tar.xz
[AArch64]Fix the assertion failure caused by "v1i1 SETCC" DAG node.
As v1i1 is illegal, the type legalizer tries to scalarize such node. But if the type operands of SETCC is legal, the scalarization algorithm will cause an assertion failure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201381 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp90
-rw-r--r--test/CodeGen/AArch64/neon-v1i1-setcc.ll68
2 files changed, 158 insertions, 0 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 769a2bcb62..c7aa753f6c 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -521,6 +521,10 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
}
+
+ setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::VSELECT);
}
EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
@@ -4258,6 +4262,89 @@ static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return SDValue(N, 0);
}
+// v1i1 setcc ->
+// v1i1 (bitcast (i1 setcc (extract_vector_elt, extract_vector_elt))
+// FIXME: Currently the type legalizer can't handle SETCC having v1i1 as result.
+// If it can legalize "v1i1 SETCC" correctly, no need to combine such SETCC.
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT ResVT = N->getValueType(0);
+
+ if (!ResVT.isVector() || ResVT.getVectorNumElements() != 1 ||
+ ResVT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT CmpVT = LHS.getValueType();
+ LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ CmpVT.getVectorElementType(), LHS,
+ DAG.getConstant(0, MVT::i64));
+ RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ CmpVT.getVectorElementType(), RHS,
+ DAG.getConstant(0, MVT::i64));
+ SDValue SetCC =
+ DAG.getSetCC(SDLoc(N), MVT::i1, LHS, RHS,
+ cast<CondCodeSDNode>(N->getOperand(2))->get());
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), ResVT, SetCC);
+}
+
+// vselect (v1i1 setcc) ->
+// vselect (v1iXX setcc) (XX is the size of the compared operand type)
+// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
+// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
+// such VSELECT.
+static SDValue PerformVSelectCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ EVT CCVT = N0.getValueType();
+
+ if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 ||
+ CCVT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ EVT ResVT = N->getValueType(0);
+ EVT CmpVT = N0.getOperand(0).getValueType();
+ // Only combine when the result type is of the same size as the compared
+ // operands.
+ if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
+ return SDValue();
+
+ SDValue IfTrue = N->getOperand(1);
+ SDValue IfFalse = N->getOperand(2);
+ SDValue SetCC =
+ DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
+ IfTrue, IfFalse);
+}
+
+// sign_extend (extract_vector_elt (v1i1 setcc)) ->
+// extract_vector_elt (v1iXX setcc)
+// (XX is the size of the compared operand type)
+static SDValue PerformSignExtendCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue Vec = N0.getOperand(0);
+
+ if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Vec.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ EVT ResVT = N->getValueType(0);
+ EVT CmpVT = Vec.getOperand(0).getValueType();
+ // Only optimize when the result type is of the same size as the element
+ // type of the compared operand.
+ if (ResVT.getSizeInBits() != CmpVT.getVectorElementType().getSizeInBits())
+ return SDValue();
+
+ SDValue Lane = N0.getOperand(1);
+ SDValue SetCC =
+ DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
+ Vec.getOperand(0), Vec.getOperand(1),
+ cast<CondCodeSDNode>(Vec.getOperand(2))->get());
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ResVT,
+ SetCC, Lane);
+}
+
SDValue
AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
@@ -4269,6 +4356,9 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SRA:
case ISD::SRL:
return PerformShiftCombine(N, DCI, getSubtarget());
+ case ISD::SETCC: return PerformSETCCCombine(N, DCI.DAG);
+ case ISD::VSELECT: return PerformVSelectCombine(N, DCI.DAG);
+ case ISD::SIGN_EXTEND: return PerformSignExtendCombine(N, DCI.DAG);
case ISD::INTRINSIC_WO_CHAIN:
return PerformIntrinsicCombine(N, DCI.DAG);
case AArch64ISD::NEON_VDUPLANE:
diff --git a/test/CodeGen/AArch64/neon-v1i1-setcc.ll b/test/CodeGen/AArch64/neon-v1i1-setcc.ll
new file mode 100644
index 0000000000..6c7d009f08
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-v1i1-setcc.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+; This file test the DAG node like "v1i1 SETCC v1i64, v1i64". As the v1i1 type
+; is illegal in AArch64 backend, the legalizer tries to scalarize this node.
+; As the v1i64 operands of SETCC are legal types, they will not be scalarized.
+; Currently the type legalizer will have an assertion failure as it assumes all
+; operands of SETCC have been legalized.
+; FIXME: If the algorithm of type scalarization is improved and can legaize
+; "v1i1 SETCC" correctly, these test cases are not needed.
+
+define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {
+; CHECK-LABEL: test_sext_extr_cmp_0:
+; CHECK: cmge d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = icmp sge <1 x i64> %v1, %v2
+ %2 = extractelement <1 x i1> %1, i32 0
+ %vget_lane = sext i1 %2 to i64
+ ret i64 %vget_lane
+}
+
+define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) {
+; CHECK-LABEL: test_sext_extr_cmp_1:
+; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %1 = fcmp oeq <1 x double> %v1, %v2
+ %2 = extractelement <1 x i1> %1, i32 0
+ %vget_lane = sext i1 %2 to i64
+ ret i64 %vget_lane
+}
+
+define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
+; CHECK-LABEL: test_select_v1i1_0:
+; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+ %1 = icmp eq <1 x i64> %v1, %v2
+ %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
+ ret <1 x i64> %res
+}
+
+define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) {
+; CHECK-LABEL: test_select_v1i1_1:
+; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+ %1 = fcmp oeq <1 x double> %v1, %v2
+ %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
+ ret <1 x i64> %res
+}
+
+define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) {
+; CHECK-LABEL: test_select_v1i1_2:
+; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+ %1 = icmp eq <1 x i64> %v1, %v2
+ %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3
+ ret <1 x double> %res
+}
+
+define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {
+; CHECK-LABEL: test_br_extr_cmp:
+; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}}
+ %1 = icmp eq <1 x i64> %v1, %v2
+ %2 = extractelement <1 x i1> %1, i32 0
+ br i1 %2, label %if.end, label %if.then
+
+if.then:
+ ret i32 0;
+
+if.end:
+ ret i32 1;
+}