From 79c6a4f3478bd21558e2c779667bec7d69e94ccc Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Wed, 29 Jan 2014 01:57:30 +0000 Subject: [AArch64 NEON] Lower SELECT_CC with vector operand. When the scalar compare is between floating point and operands are vector, we custom lower SELECT_CC to use NEON SIMD compare for generating less instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200365 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 234 ++++++++++++++++++++++------- lib/Target/AArch64/AArch64InstrNEON.td | 4 + test/CodeGen/AArch64/neon-select_cc.ll | 180 ++++++++++++++++++++++ 3 files changed, 362 insertions(+), 56 deletions(-) create mode 100644 test/CodeGen/AArch64/neon-select_cc.ll diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 327547c27c..7f101fffa1 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -458,6 +458,32 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::FREM, MVT::v1f64, Expand); setOperationAction(ISD::FREM, MVT::v2f64, Expand); + setOperationAction(ISD::SELECT, MVT::v8i8, Expand); + setOperationAction(ISD::SELECT, MVT::v16i8, Expand); + setOperationAction(ISD::SELECT, MVT::v4i16, Expand); + setOperationAction(ISD::SELECT, MVT::v8i16, Expand); + setOperationAction(ISD::SELECT, MVT::v2i32, Expand); + setOperationAction(ISD::SELECT, MVT::v4i32, Expand); + setOperationAction(ISD::SELECT, MVT::v1i64, Expand); + setOperationAction(ISD::SELECT, MVT::v2i64, Expand); + setOperationAction(ISD::SELECT, MVT::v2f32, Expand); + setOperationAction(ISD::SELECT, MVT::v4f32, Expand); + setOperationAction(ISD::SELECT, MVT::v1f64, Expand); + setOperationAction(ISD::SELECT, MVT::v2f64, Expand); + + setOperationAction(ISD::SELECT_CC, MVT::v8i8, Custom); + setOperationAction(ISD::SELECT_CC, MVT::v16i8, Custom); + setOperationAction(ISD::SELECT_CC, MVT::v4i16, Custom); + setOperationAction(ISD::SELECT_CC, MVT::v8i16, Custom); + setOperationAction(ISD::SELECT_CC, MVT::v2i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::v4i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::v1i64, Custom); + setOperationAction(ISD::SELECT_CC, MVT::v2i64, Custom); + setOperationAction(ISD::SELECT_CC, MVT::v2f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::v4f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::v1f64, Custom); + setOperationAction(ISD::SELECT_CC, MVT::v2f64, Custom); + // Vector ExtLoad and TruncStore are expanded. for (unsigned I = MVT::FIRST_VECTOR_VALUETYPE; I <= MVT::LAST_VECTOR_VALUETYPE; ++I) { @@ -2661,62 +2687,6 @@ AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { } } -// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode) -SDValue -AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue IfTrue = Op.getOperand(2); - SDValue IfFalse = Op.getOperand(3); - ISD::CondCode CC = cast(Op.getOperand(4))->get(); - - if (LHS.getValueType() == MVT::f128) { - // f128 comparisons are lowered to libcalls, but slot in nicely here - // afterwards. - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); - - // If softenSetCCOperands returned a scalar, we need to compare the result - // against zero to select between true and false values. - if (RHS.getNode() == 0) { - RHS = DAG.getConstant(0, LHS.getValueType()); - CC = ISD::SETNE; - } - } - - if (LHS.getValueType().isInteger()) { - SDValue A64cc; - - // Integers are handled in a separate function because the combinations of - // immediates and tests can get hairy and we may want to fiddle things. - SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); - - return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), - CmpOp, IfTrue, IfFalse, A64cc); - } - - // Note that some LLVM floating-point CondCodes can't be lowered to a single - // conditional branch, hence FPCCToA64CC can set a second test, where either - // passing is sufficient. - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; - CondCode = FPCCToA64CC(CC, Alternative); - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); - SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); - SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, - Op.getValueType(), - SetCC, IfTrue, IfFalse, A64cc); - - if (Alternative != A64CC::Invalid) { - A64cc = DAG.getConstant(Alternative, MVT::i32); - A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), - SetCC, IfTrue, A64SELECT_CC, A64cc); - - } - - return A64SELECT_CC; -} - // (SELECT testbit, iftrue, iffalse) SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { @@ -3004,6 +2974,158 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return A64SELECT_CC; } +static SDValue LowerVectorSELECT_CC(SDValue Op, SelectionDAG &DAG) { + SDLoc dl(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue IfTrue = Op.getOperand(2); + SDValue IfFalse = Op.getOperand(3); + ISD::CondCode CC = cast(Op.getOperand(4))->get(); + + // If LHS & RHS are floating point and IfTrue & IfFalse are vectors, we will + // use NEON compare. + if ((LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)) { + EVT EltVT = LHS.getValueType(); + unsigned EltNum = 128 / EltVT.getSizeInBits(); + EVT VT = EVT::getVectorVT(*DAG.getContext(), EltVT, EltNum); + unsigned SubConstant = + (LHS.getValueType() == MVT::f32) ? AArch64::sub_32 :AArch64::sub_64; + EVT CEltT = (LHS.getValueType() == MVT::f32) ? MVT::i32 : MVT::i64; + EVT CVT = EVT::getVectorVT(*DAG.getContext(), CEltT, EltNum); + + LHS + = SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, + VT, DAG.getTargetConstant(0, MVT::i32), LHS, + DAG.getTargetConstant(SubConstant, MVT::i32)), 0); + RHS + = SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, + VT, DAG.getTargetConstant(0, MVT::i32), RHS, + DAG.getTargetConstant(SubConstant, MVT::i32)), 0); + + SDValue VSetCC = DAG.getSetCC(dl, CVT, LHS, RHS, CC); + SDValue ResCC = LowerVectorSETCC(VSetCC, DAG); + EVT IfTrueVT = IfTrue.getValueType(); + EVT CastEltT = + MVT::getIntegerVT(IfTrueVT.getVectorElementType().getSizeInBits()); + EVT CastVT = EVT::getVectorVT(*DAG.getContext(), CastEltT, + IfTrueVT.getVectorNumElements()); + if (CEltT.getSizeInBits() < IfTrueVT.getSizeInBits()) { + EVT DUPVT = + EVT::getVectorVT(*DAG.getContext(), CEltT, + IfTrueVT.getSizeInBits() / CEltT.getSizeInBits()); + ResCC = DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, DUPVT, ResCC, + DAG.getConstant(0, MVT::i64, false)); + + ResCC = DAG.getNode(ISD::BITCAST, dl, CastVT, ResCC); + } else { + // FIXME: If IfTrue & IfFalse hold v1i8, v1i16 or v1i32, this function + // can't handle them and will hit this assert. + assert(CEltT.getSizeInBits() == IfTrueVT.getSizeInBits() && + "Vector of IfTrue & IfFalse is too small."); + + unsigned ExEltNum = + EltNum * IfTrueVT.getSizeInBits() / ResCC.getValueSizeInBits(); + EVT ExVT = EVT::getVectorVT(*DAG.getContext(), CEltT, ExEltNum); + ResCC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExVT, ResCC, + DAG.getConstant(0, MVT::i64, false)); + ResCC = DAG.getNode(ISD::BITCAST, dl, CastVT, ResCC); + } + SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(), + ResCC, IfTrue, IfFalse); + return VSelect; + } + + // Here we handle the case that LHS & RHS are integer and IfTrue & IfFalse are + // vectors. + A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; + CondCode = FPCCToA64CC(CC, Alternative); + SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); + SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); + EVT SEVT = MVT::i32; + if (IfTrue.getValueType().getVectorElementType().getSizeInBits() > 32) + SEVT = MVT::i64; + SDValue AllOne = DAG.getConstant(-1, SEVT); + SDValue AllZero = DAG.getConstant(0, SEVT); + SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, SEVT, SetCC, + AllOne, AllZero, A64cc); + + if (Alternative != A64CC::Invalid) { + A64cc = DAG.getConstant(Alternative, MVT::i32); + A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + SetCC, AllOne, A64SELECT_CC, A64cc); + } + SDValue VDup; + if (IfTrue.getValueType().getVectorNumElements() == 1) + VDup = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, IfTrue.getValueType(), + A64SELECT_CC); + else + VDup = DAG.getNode(AArch64ISD::NEON_VDUP, dl, IfTrue.getValueType(), + A64SELECT_CC); + SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(), + VDup, IfTrue, IfFalse); + return VSelect; +} + +// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode) +SDValue +AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue IfTrue = Op.getOperand(2); + SDValue IfFalse = Op.getOperand(3); + ISD::CondCode CC = cast(Op.getOperand(4))->get(); + + if (IfTrue.getValueType().isVector()) + return LowerVectorSELECT_CC(Op, DAG); + + if (LHS.getValueType() == MVT::f128) { + // f128 comparisons are lowered to libcalls, but slot in nicely here + // afterwards. + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (RHS.getNode() == 0) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + + if (LHS.getValueType().isInteger()) { + SDValue A64cc; + + // Integers are handled in a separate function because the combinations of + // immediates and tests can get hairy and we may want to fiddle things. + SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + + return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), CmpOp, + IfTrue, IfFalse, A64cc); + } + + // Note that some LLVM floating-point CondCodes can't be lowered to a single + // conditional branch, hence FPCCToA64CC can set a second test, where either + // passing is sufficient. + A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; + CondCode = FPCCToA64CC(CC, Alternative); + SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); + SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); + SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, + Op.getValueType(), + SetCC, IfTrue, IfFalse, A64cc); + + if (Alternative != A64CC::Invalid) { + A64cc = DAG.getConstant(Alternative, MVT::i32); + A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + SetCC, IfTrue, A64SELECT_CC, A64cc); + + } + + return A64SELECT_CC; +} + SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { const Value *DestSV = cast(Op.getOperand(3))->getValue(); diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 3056343abb..1309bf12b5 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -461,10 +461,14 @@ multiclass Neon_bitwise3V_patterns; def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v2f32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), + (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v1f64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), + (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), diff --git a/test/CodeGen/AArch64/neon-select_cc.ll b/test/CodeGen/AArch64/neon-select_cc.ll new file mode 100644 index 0000000000..537ec97d36 --- /dev/null +++ b/test/CodeGen/AArch64/neon-select_cc.ll @@ -0,0 +1,180 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s + +define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) { +; CHECK-LABEL: test_select_cc_v8i8_i8: +; CHECK: and w0, w0, #0xff +; CHECK-NEXT: cmp w0, w1, uxtb +; CHECK-NEXT: csinv w0, wzr, wzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.8b, w0 +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b + %cmp31 = icmp eq i8 %a, %b + %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d + ret <8x i8> %e +} + +define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) { +; CHECK-LABEL: test_select_cc_v8i8_f32: +; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b + %cmp31 = fcmp oeq float %a, %b + %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d + ret <8x i8> %e +} + +define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) { +; CHECK-LABEL: test_select_cc_v8i8_f64: +; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b + %cmp31 = fcmp oeq double %a, %b + %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d + ret <8x i8> %e +} + +define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) { +; CHECK-LABEL: test_select_cc_v16i8_i8: +; CHECK: and w0, w0, #0xff +; CHECK-NEXT: cmp w0, w1, uxtb +; CHECK-NEXT: csinv w0, wzr, wzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.16b, w0 +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b + %cmp31 = icmp eq i8 %a, %b + %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d + ret <16x i8> %e +} + +define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) { +; CHECK-LABEL: test_select_cc_v16i8_f32: +; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0] +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b + %cmp31 = fcmp oeq float %a, %b + %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d + ret <16x i8> %e +} + +define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) { +; CHECK-LABEL: test_select_cc_v16i8_f64: +; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0] +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b + %cmp31 = fcmp oeq double %a, %b + %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d + ret <16x i8> %e +} + +define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) { +; CHECK-LABEL: test_select_cc_v4i16: +; CHECK: and w0, w0, #0xffff +; CHECK-NEXT: cmp w0, w1, uxth +; CHECK-NEXT: csinv w0, wzr, wzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.4h, w0 +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b + %cmp31 = icmp eq i16 %a, %b + %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d + ret <4x i16> %e +} + +define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) { +; CHECK-LABEL: test_select_cc_v8i16: +; CHECK: and w0, w0, #0xffff +; CHECK-NEXT: cmp w0, w1, uxth +; CHECK-NEXT: csinv w0, wzr, wzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.8h, w0 +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b + %cmp31 = icmp eq i16 %a, %b + %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d + ret <8x i16> %e +} + +define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) { +; CHECK-LABEL: test_select_cc_v2i32: +; CHECK: cmp w0, w1, uxtw +; CHECK-NEXT: csinv w0, wzr, wzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.2s, w0 +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b + %cmp31 = icmp eq i32 %a, %b + %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d + ret <2x i32> %e +} + +define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) { +; CHECK-LABEL: test_select_cc_v4i32: +; CHECK: cmp w0, w1, uxtw +; CHECK-NEXT: csinv w0, wzr, wzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.4s, w0 +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b + %cmp31 = icmp eq i32 %a, %b + %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d + ret <4x i32> %e +} + +define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) { +; CHECK-LABEL: test_select_cc_v1i64: +; CHECK: cmp x0, x1 +; CHECK-NEXT: csinv x0, xzr, xzr, ne +; CHECK-NEXT: fmov d{{[0-9]+}}, x0 +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b + %cmp31 = icmp eq i64 %a, %b + %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d + ret <1x i64> %e +} + +define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) { +; CHECK-LABEL: test_select_cc_v2i64: +; CHECK: cmp x0, x1 +; CHECK-NEXT: csinv x0, xzr, xzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.2d, x0 +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b + %cmp31 = icmp eq i64 %a, %b + %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d + ret <2x i64> %e +} + +define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) { +; CHECK-LABEL: test_select_cc_v1f32: +; CHECK: fcmp s0, s1 +; CHECK-NEXT: fcsel s0, s2, s3, eq + %cmp31 = fcmp oeq float %a, %b + %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d + ret <1 x float> %e +} + +define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) { +; CHECK-LABEL: test_select_cc_v2f32: +; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b + %cmp31 = fcmp oeq float %a, %b + %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d + ret <2 x float> %e +} + +define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) { +; CHECK-LABEL: test_select_cc_v4f32: +; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0] +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b + %cmp31 = fcmp oeq float %a, %b + %e = select i1 %cmp31, <4x float> %c, <4x float> %d + ret <4x float> %e +} + +define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) { +; CHECK-LABEL: test_select_cc_v1f64: +; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b + %cmp31 = fcmp oeq double %a, %b + %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d + ret <1 x double> %e +} + +define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) { +; CHECK-LABEL: test_select_cc_v2f64: +; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0] +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b + %cmp31 = fcmp oeq double %a, %b + %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d + ret <2 x double> %e +} -- cgit v1.2.3