diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-04-22 03:49:30 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-04-22 03:49:30 +0000 |
commit | 3ddf868b04ce4344952482b1383651f810677fce (patch) | |
tree | cbba8d795b2f6084702399469b31ae7744be0b17 | |
parent | 0240286c23735cbfd2e0507bb74a2688a9208b65 (diff) | |
download | llvm-3ddf868b04ce4344952482b1383651f810677fce.tar.gz llvm-3ddf868b04ce4344952482b1383651f810677fce.tar.bz2 llvm-3ddf868b04ce4344952482b1383651f810677fce.tar.xz |
R600: Make sign_extend_inreg legal.
Don't know why I didn't just do this in the first place.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206862 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 81 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 9 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 49 | ||||
-rw-r--r-- | test/CodeGen/R600/sext-in-reg.ll | 13 |
4 files changed, 61 insertions, 91 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index c1a607d25e..2990d0b91a 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1017,81 +1017,22 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, MVT VT = Op.getSimpleValueType(); MVT ScalarVT = VT.getScalarType(); - unsigned SrcBits = ExtraVT.getScalarType().getSizeInBits(); - unsigned DestBits = ScalarVT.getSizeInBits(); - unsigned BitsDiff = DestBits - SrcBits; - - if (!Subtarget->hasBFE()) - return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG); + if (!VT.isVector()) + return SDValue(); SDValue Src = Op.getOperand(0); - if (VT.isVector()) { - SDLoc DL(Op); - // Need to scalarize this, and revisit each of the scalars later. - // TODO: Don't scalarize on Evergreen? - unsigned NElts = VT.getVectorNumElements(); - SmallVector<SDValue, 8> Args; - DAG.ExtractVectorElements(Src, Args); - - SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType()); - for (unsigned I = 0; I < NElts; ++I) - Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp); - - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size()); - } - - if (SrcBits == 32) { - SDLoc DL(Op); - - // If the source is 32-bits, this is really half of a 2-register pair, and - // we need to discard the unused half of the pair. - SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src); - return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, TruncSrc); - } - - unsigned NElts = VT.isVector() ? VT.getVectorNumElements() : 1; - - // TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it - // might not be worth the effort, and will need to expand to shifts when - // fixing SGPR copies. - if (SrcBits < 32 && DestBits <= 32) { - SDLoc DL(Op); - MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts); - - if (DestBits != 32) - Src = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Src); - - // FIXME: This should use TargetConstant, but that hits assertions for - // Evergreen. - SDValue Ext = DAG.getNode(AMDGPUISD::BFE_I32, DL, ExtVT, - Op.getOperand(0), // Operand - DAG.getConstant(0, ExtVT), // Offset - DAG.getConstant(SrcBits, ExtVT)); // Width - - // Truncate to the original type if necessary. - if (ScalarVT == MVT::i32) - return Ext; - return DAG.getNode(ISD::TRUNCATE, DL, VT, Ext); - } - - // For small types, extend to 32-bits first. - if (SrcBits < 32) { - SDLoc DL(Op); - MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts); + SDLoc DL(Op); - SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, Src); - SDValue Ext32 = DAG.getNode(AMDGPUISD::BFE_I32, - DL, - ExtVT, - TruncSrc, // Operand - DAG.getConstant(0, ExtVT), // Offset - DAG.getConstant(SrcBits, ExtVT)); // Width + // TODO: Don't scalarize on Evergreen? + unsigned NElts = VT.getVectorNumElements(); + SmallVector<SDValue, 8> Args; + DAG.ExtractVectorElements(Src, Args, 0, NElts); - return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Ext32); - } + SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType()); + for (unsigned I = 0; I < NElts; ++I) + Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp); - // For everything else, use the standard bitshift expansion. - return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size()); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 6c2d3139a6..b7c35d71aa 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -115,15 +115,15 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom); @@ -165,9 +165,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::i1, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index a4ffd48287..a1d2cf43f7 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -74,6 +74,7 @@ def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", [(set i32:$dst, (sext_inreg i32:$src0, i16))] >; + ////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>; ////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>; ////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>; @@ -128,21 +129,6 @@ def S_CMPK_EQ_I32 : SOPK < >; */ -// Handle sext_inreg in i64 -def : Pat < - (i64 (sext_inreg i64:$src, i8)), - (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0), - (S_MOV_B32 -1), sub1) ->; - -def : Pat < - (i64 (sext_inreg i64:$src, i16)), - (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0), - (S_MOV_B32 -1), sub1) ->; - let isCompare = 1 in { def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>; def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>; @@ -2253,6 +2239,39 @@ def : Pat< >; //===----------------------------------------------------------------------===// +// Conversion Patterns +//===----------------------------------------------------------------------===// + +def : Pat<(i32 (sext_inreg i32:$src, i1)), + (S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16 + +// TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it +// might not be worth the effort, and will need to expand to shifts when +// fixing SGPR copies. + +// Handle sext_inreg in i64 +def : Pat < + (i64 (sext_inreg i64:$src, i1)), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (S_BFE_I32 (EXTRACT_SUBREG i64:$src, sub0), 65536), sub0), // 0 | 1 << 16 + (S_MOV_B32 -1), sub1) +>; + +def : Pat < + (i64 (sext_inreg i64:$src, i8)), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0), + (S_MOV_B32 -1), sub1) +>; + +def : Pat < + (i64 (sext_inreg i64:$src, i16)), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0), + (S_MOV_B32 -1), sub1) +>; + +//===----------------------------------------------------------------------===// // Miscellaneous Patterns //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll index b722959aad..6ba7011b59 100644 --- a/test/CodeGen/R600/sext-in-reg.ll +++ b/test/CodeGen/R600/sext-in-reg.ll @@ -74,6 +74,19 @@ define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, ret void } +; FUNC-LABEL: @sext_in_reg_i1_to_i64 +; SI: S_ADD_I32 [[VAL:s[0-9]+]], +; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000 +; SI: S_MOV_B32 {{s[0-9]+}}, -1 +; SI: BUFFER_STORE_DWORDX2 +define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %c = add i64 %a, %b + %shl = shl i64 %c, 63 + %ashr = ashr i64 %shl, 63 + store i64 %ashr, i64 addrspace(1)* %out, align 8 + ret void +} + ; FUNC-LABEL: @sext_in_reg_i8_to_i64 ; SI: S_ADD_I32 [[VAL:s[0-9]+]], ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]] |