diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-03-17 18:58:11 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-03-17 18:58:11 +0000 |
commit | 2683baa8acbcfc44b94a7af781c43674d28d9a2e (patch) | |
tree | 4b2490eb3097989f32c98c52612b51b5674878fe /lib | |
parent | 94bdb453a40e53e4318380b5a262ce3c324d10ce (diff) | |
download | llvm-2683baa8acbcfc44b94a7af781c43674d28d9a2e.tar.gz llvm-2683baa8acbcfc44b94a7af781c43674d28d9a2e.tar.bz2 llvm-2683baa8acbcfc44b94a7af781c43674d28d9a2e.tar.xz |
R600: Match sign_extend_inreg to BFE instructions
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204072 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 111 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.h | 6 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUInstrInfo.td | 4 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUSubtarget.h | 9 | ||||
-rw-r--r-- | lib/Target/R600/AMDILISelLowering.cpp | 35 | ||||
-rw-r--r-- | lib/Target/R600/AMDILIntrinsics.td | 4 | ||||
-rw-r--r-- | lib/Target/R600/R600ISelLowering.cpp | 5 | ||||
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 17 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 10 |
9 files changed, 154 insertions, 47 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 4e4b12eacc..ddf251f38b 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -211,6 +211,20 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FSUB, VT, Expand); setOperationAction(ISD::SELECT, VT, Expand); } + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom); } //===----------------------------------------------------------------------===// @@ -927,6 +941,101 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, } +SDValue AMDGPUTargetLowering::ExpandSIGN_EXTEND_INREG(SDValue Op, + unsigned BitsDiff, + SelectionDAG &DAG) const { + MVT VT = Op.getSimpleValueType(); + SDLoc DL(Op); + SDValue Shift = DAG.getConstant(BitsDiff, VT); + // Shift left by 'Shift' bits. + SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Op.getOperand(0), Shift); + // Signed shift Right by 'Shift' bits. + return DAG.getNode(ISD::SRA, DL, VT, Shl, Shift); +} + +SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, + SelectionDAG &DAG) const { + EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + MVT VT = Op.getSimpleValueType(); + MVT ScalarVT = VT.getScalarType(); + + unsigned SrcBits = ExtraVT.getScalarType().getSizeInBits(); + unsigned DestBits = ScalarVT.getSizeInBits(); + unsigned BitsDiff = DestBits - SrcBits; + + if (!Subtarget->hasBFE()) + return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG); + + SDValue Src = Op.getOperand(0); + if (VT.isVector()) { + SDLoc DL(Op); + // Need to scalarize this, and revisit each of the scalars later. + // TODO: Don't scalarize on Evergreen? + unsigned NElts = VT.getVectorNumElements(); + SmallVector<SDValue, 8> Args; + ExtractVectorElements(Src, DAG, Args, 0, NElts); + + SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType()); + for (unsigned I = 0; I < NElts; ++I) + Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp); + + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size()); + } + + if (SrcBits == 32) { + SDLoc DL(Op); + + // If the source is 32-bits, this is really half of a 2-register pair, and + // we need to discard the unused half of the pair. + SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src); + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, TruncSrc); + } + + unsigned NElts = VT.isVector() ? VT.getVectorNumElements() : 1; + + // TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it + // might not be worth the effort, and will need to expand to shifts when + // fixing SGPR copies. + if (SrcBits < 32 && DestBits <= 32) { + SDLoc DL(Op); + MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts); + + if (DestBits != 32) + Src = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Src); + + // FIXME: This should use TargetConstant, but that hits assertions for + // Evergreen. + SDValue Ext = DAG.getNode(AMDGPUISD::BFE_I32, DL, ExtVT, + Op.getOperand(0), // Operand + DAG.getConstant(0, ExtVT), // Offset + DAG.getConstant(SrcBits, ExtVT)); // Width + + // Truncate to the original type if necessary. + if (ScalarVT == MVT::i32) + return Ext; + return DAG.getNode(ISD::TRUNCATE, DL, VT, Ext); + } + + // For small types, extend to 32-bits first. + if (SrcBits < 32) { + SDLoc DL(Op); + MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts); + + SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, Src); + SDValue Ext32 = DAG.getNode(AMDGPUISD::BFE_I32, + DL, + ExtVT, + TruncSrc, // Operand + DAG.getConstant(0, ExtVT), // Offset + DAG.getConstant(SrcBits, ExtVT)); // Width + + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Ext32); + } + + // For everything else, use the standard bitshift expansion. + return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG); +} + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// @@ -1019,6 +1128,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FMIN) NODE_NAME_CASE(SMIN) NODE_NAME_CASE(UMIN) + NODE_NAME_CASE(BFE_U32) + NODE_NAME_CASE(BFE_I32) NODE_NAME_CASE(URECIP) NODE_NAME_CASE(DOT4) NODE_NAME_CASE(EXPORT) diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 2efb9c78a3..2595c51d16 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -142,6 +142,10 @@ private: SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const; + + SDValue ExpandSIGN_EXTEND_INREG(SDValue Op, + unsigned BitsDiff, + SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; @@ -171,6 +175,8 @@ enum { UMIN, URECIP, DOT4, + BFE_U32, // Extract range of bits with zero extension to 32-bits. + BFE_I32, // Extract range of bits with sign extension to 32-bits. TEXTURE_FETCH, EXPORT, CONST_ADDRESS, diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index fccede01ab..2138bd23a3 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -86,3 +86,7 @@ def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR", def AMDGPUround : SDNode<"ISD::FROUND", SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>; + +def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>; +def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>; + diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 7e7f4d0c00..8874d14c18 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -68,6 +68,15 @@ public: enum Generation getGeneration() const; bool hasHWFP64() const; bool hasCaymanISA() const; + + bool hasBFE() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasBFM() const { + return hasBFE(); + } + bool IsIRStructurizerEnabled() const; bool isIfCvtEnabled() const; unsigned getWavefrontSize() const; diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp index 970787ef31..5dfaad4c1c 100644 --- a/lib/Target/R600/AMDILISelLowering.cpp +++ b/lib/Target/R600/AMDILISelLowering.cpp @@ -94,9 +94,6 @@ void AMDGPUTargetLowering::InitAMDILLowering() { for (unsigned int x = 0; x < NumTypes; ++x) { MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; - //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types - // We cannot sextinreg, expand to shifts - setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); setOperationAction(ISD::SUBE, VT, Expand); setOperationAction(ISD::SUBC, VT, Expand); setOperationAction(ISD::ADDE, VT, Expand); @@ -191,14 +188,12 @@ void AMDGPUTargetLowering::InitAMDILLowering() { setOperationAction(ISD::UDIV, MVT::v4i8, Expand); setOperationAction(ISD::UDIV, MVT::v2i16, Expand); setOperationAction(ISD::UDIV, MVT::v4i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); setOperationAction(ISD::SUBC, MVT::Other, Expand); setOperationAction(ISD::ADDE, MVT::Other, Expand); setOperationAction(ISD::ADDC, MVT::Other, Expand); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BRIND, MVT::Other, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); // Use the default implementation. @@ -322,36 +317,6 @@ AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const { return DST; } -SDValue -AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { - SDValue Data = Op.getOperand(0); - VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); - SDLoc DL(Op); - EVT DVT = Data.getValueType(); - EVT BVT = BaseType->getVT(); - unsigned baseBits = BVT.getScalarType().getSizeInBits(); - unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; - unsigned shiftBits = srcBits - baseBits; - if (srcBits < 32) { - // If the op is less than 32 bits, then it needs to extend to 32bits - // so it can properly keep the upper bits valid. - EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); - Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); - shiftBits = 32 - baseBits; - DVT = IVT; - } - SDValue Shift = DAG.getConstant(shiftBits, DVT); - // Shift left by 'Shift' bits. - Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); - // Signed shift Right by 'Shift' bits. - Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); - if (srcBits < 32) { - // Once the sign extension is done, the op needs to be converted to - // its original type. - Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); - } - return Data; -} EVT AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const { int iSize = (size * numEle); diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td index 6ec3559af2..658deb5bc0 100644 --- a/lib/Target/R600/AMDILIntrinsics.td +++ b/lib/Target/R600/AMDILIntrinsics.td @@ -68,10 +68,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in { let TargetPrefix = "AMDIL", isTarget = 1 in { def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt; - def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">, - TernaryIntInt; - def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">, - TernaryIntInt; def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">, UnaryIntInt; def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">, diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 8c737125c8..4d15321fd0 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1383,6 +1383,11 @@ SDValue R600TargetLowering::LowerFormalArguments( PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), AMDGPUAS::CONSTANT_BUFFER_0); + // i64 isn't a legal type, so the register type used ends up as i32, which + // isn't expected here. It attempts to create this sextload, but it ends up + // being invalid. Somehow this seems to work with i64 arguments, but breaks + // for <1 x i64>. + // The first 36 bytes of the input buffer contains information about // thread group and global sizes. SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 698ad4afe6..ae3d8747a4 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1517,15 +1517,20 @@ let Predicates = [isEGorCayman] in { // Example Usage: // (Offset, Width) // - // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 - // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 - // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 - // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 + // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 + // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 + // (16, 8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 + // (24, 8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", - [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1, - i32:$src2))], + [(set i32:$dst, (AMDGPUbfe_u32 i32:$src0, i32:$src1, i32:$src2))], VecALU >; + + def BFE_INT_eg : R600_3OP <0x4, "BFE_INT", + [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))], + VecALU + >; + // XXX: This pattern is broken, disabling for now. See comment in // AMDGPUInstructions.td for more info. // def : BFEPattern <BFE_UINT_eg>; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 9a18f7bc35..68b89a8c35 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1074,8 +1074,14 @@ def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>; def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>; def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>; def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>; -def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>; -def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>; + +let neverHasSideEffects = 1, mayLoad = 0, mayStore = 0 in { +def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", + [(set i32:$dst, (AMDGPUbfe_u32 i32:$src0, i32:$src1, i32:$src2))]>; +def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", + [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))]>; +} + def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>; defm : BFIPatterns <V_BFI_B32>; def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", |