diff options
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 88 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.h | 3 | ||||
-rw-r--r-- | test/CodeGen/AArch64/i128-shift.ll | 43 |
3 files changed, 134 insertions, 0 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 86fa341bc2..388973a758 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -269,6 +269,11 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); + // i128 shift operation support + setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); + // This prevents LLVM trying to compress double constants into a floating // constant-pool entry and trying to load from there. It's of doubtful benefit // for A64: we'd need LDR followed by FCVT, I believe. @@ -3296,6 +3301,10 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); + case ISD::SRL_PARTS: + case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); @@ -4524,6 +4533,85 @@ bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, return true; } +// LowerShiftRightParts - Lower SRL_PARTS and SRA_PARTS, which returns two +/// i64 values and take a 2 x i64 value to shift plus a shift amount. +SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getNumOperands() == 3 && "Not a quad-shift!"); + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getSizeInBits(); + SDLoc dl(Op); + SDValue ShOpLo = Op.getOperand(0); + SDValue ShOpHi = Op.getOperand(1); + SDValue ShAmt = Op.getOperand(2); + unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; + + assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); + SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, + DAG.getConstant(VTBits, MVT::i64), ShAmt); + SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, + DAG.getConstant(VTBits, MVT::i64)); + SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); + SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); + SDValue Tmp3 = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); + + SDValue A64cc; + SDValue CmpOp = getSelectableIntSetCC(ExtraShAmt, + DAG.getConstant(0, MVT::i64), + ISD::SETGE, A64cc, + DAG, dl); + + SDValue Hi = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, + DAG.getConstant(0, Tmp3.getValueType()), Tmp3, + A64cc); + SDValue Lo = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, + TrueVal, FalseVal, A64cc); + + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, 2, dl); +} + +/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two +/// i64 values and take a 2 x i64 value to shift plus a shift amount. +SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getNumOperands() == 3 && "Not a quad-shift!"); + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getSizeInBits(); + SDLoc dl(Op); + SDValue ShOpLo = Op.getOperand(0); + SDValue ShOpHi = Op.getOperand(1); + SDValue ShAmt = Op.getOperand(2); + + assert(Op.getOpcode() == ISD::SHL_PARTS); + SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, + DAG.getConstant(VTBits, MVT::i64), ShAmt); + SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, + DAG.getConstant(VTBits, MVT::i64)); + SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); + SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); + SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue Tmp4 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); + + SDValue A64cc; + SDValue CmpOp = getSelectableIntSetCC(ExtraShAmt, + DAG.getConstant(0, MVT::i64), + ISD::SETGE, A64cc, + DAG, dl); + + SDValue Lo = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, + DAG.getConstant(0, Tmp4.getValueType()), Tmp4, + A64cc); + SDValue Hi = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, + Tmp3, FalseVal, A64cc); + + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, 2, dl); +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. SDValue diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 85e25ad6c4..e946b250e3 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -232,6 +232,9 @@ public: SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; + bool isConcatVector(SDValue Op, SelectionDAG &DAG, SDValue V0, SDValue V1, const int *Mask, SDValue &Res) const; diff --git a/test/CodeGen/AArch64/i128-shift.ll b/test/CodeGen/AArch64/i128-shift.ll new file mode 100644 index 0000000000..d786d44896 --- /dev/null +++ b/test/CodeGen/AArch64/i128-shift.ll @@ -0,0 +1,43 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +define i128 @test_i128_lsl(i128 %a, i32 %shift) { +; CHECK-LABEL: test_i128_lsl: + + %sh_prom = zext i32 %shift to i128 + %shl = shl i128 %a, %sh_prom + +; CHECK: movz [[SIXTYFOUR:x[0-9]+]], #64 +; CHECK-NEXT: sub [[REVSHAMT:x[0-9]+]], [[SIXTYFOUR]], [[SHAMT_32:w[0-9]+]], uxtw +; CHECK-NEXT: lsr [[TMP1:x[0-9]+]], [[LO:x[0-9]+]], [[REVSHAMT]] +; CHECK: lsl [[TMP2:x[0-9]+]], [[HI:x[0-9]+]], [[SHAMT:x[0-9]+]] +; CHECK-NEXT: orr [[FALSEVAL:x[0-9]+]], [[TMP1]], [[TMP2]] +; CHECK-NEXT: sub [[EXTRASHAMT:x[0-9]+]], [[SHAMT]], #64 +; CHECK-NEXT: lsl [[TMP3:x[0-9]+]], [[LO]], [[EXTRASHAMT]] +; CHECK-NEXT: cmp [[EXTRASHAMT]], #0 +; CHECK-NEXT: csel [[RESULTHI:x[0-9]+]], [[TMP3]], [[FALSEVAL]], ge +; CHECK-NEXT: lsl [[TMP4:x[0-9]+]], [[LO]], [[SHAMT]] +; CHECK-NEXT: csel [[RESULTLO:x[0-9]+]], xzr, [[TMP4]], ge + + ret i128 %shl +} + +define i128 @test_i128_shr(i128 %a, i32 %shift) { +; CHECK-LABEL: test_i128_shr: + + %sh_prom = zext i32 %shift to i128 + %shr = lshr i128 %a, %sh_prom + +; CHECK: movz [[SIXTYFOUR]], #64 +; CHECK-NEXT: sub [[REVSHAMT:x[0-9]+]], [[SIXTYFOUR]], [[SHAMT_32:w[0-9]+]], uxtw +; CHECK-NEXT: lsl [[TMP2:x[0-9]+]], [[HI:x[0-9]+]], [[REVSHAMT]] +; CHECK: lsr [[TMP1:x[0-9]+]], [[LO:x[0-9]+]], [[SHAMT:x[0-9]+]] +; CHECK-NEXT: orr [[FALSEVAL:x[0-9]+]], [[TMP1]], [[TMP2]] +; CHECK-NEXT: sub [[EXTRASHAMT:x[0-9]+]], [[SHAMT]], #64 +; CHECK-NEXT: lsr [[TRUEVAL:x[0-9]+]], [[HI]], [[EXTRASHAMT]] +; CHECK-NEXT: cmp [[EXTRASHAMT]], #0 +; CHECK-NEXT: csel [[RESULTLO:x[0-9]+]], [[TRUEVAL]], [[FALSEVAL]], ge +; CHECK-NEXT: lsr [[TMP3:x[0-9]+]], [[HI]], [[SHAMT]] +; CHECK-NEXT: csel [[RESULTHI:x[0-9]+]], xzr, [[TMP3]], ge + + ret i128 %shr +} |