diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-06-10 19:18:24 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-06-10 19:18:24 +0000 |
commit | 69891c0115542d191a43023f60eca4e0dfd8dbcb (patch) | |
tree | a711f08bf4cb55ac9815ab00cb00dada1913223e /lib | |
parent | ee9772d9ddbc62ca9f965ed2c0b40a3326bcd2b0 (diff) | |
download | llvm-69891c0115542d191a43023f60eca4e0dfd8dbcb.tar.gz llvm-69891c0115542d191a43023f60eca4e0dfd8dbcb.tar.bz2 llvm-69891c0115542d191a43023f60eca4e0dfd8dbcb.tar.xz |
R600/SI: Implement i64 ctpop
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210568 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 1 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.cpp | 45 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.h | 3 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.td | 6 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 10 |
5 files changed, 64 insertions, 1 deletions
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 48ce7d8b7e..d05d71add8 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -212,6 +212,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : } setOperationAction(ISD::CTPOP, MVT::i32, Legal); + setOperationAction(ISD::CTPOP, MVT::i64, Legal); setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index fdebb2ffb4..5436fc0e84 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -1178,6 +1178,11 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { Inst->eraseFromParent(); continue; + case AMDGPU::S_BCNT1_I32_B64: + splitScalar64BitBCNT(Worklist, Inst); + Inst->eraseFromParent(); + continue; + case AMDGPU::S_BFE_U64: case AMDGPU::S_BFE_I64: case AMDGPU::S_BFM_B64: @@ -1419,6 +1424,46 @@ void SIInstrInfo::splitScalar64BitBinaryOp( Worklist.push_back(HiHalf); } +void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, + MachineInstr *Inst) const { + MachineBasicBlock &MBB = *Inst->getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + MachineBasicBlock::iterator MII = Inst; + DebugLoc DL = Inst->getDebugLoc(); + + MachineOperand &Dest = Inst->getOperand(0); + MachineOperand &Src = Inst->getOperand(1); + + const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32); + const TargetRegisterClass *SrcRC = Src.isReg() ? + MRI.getRegClass(Src.getReg()) : + &AMDGPU::SGPR_32RegClass; + + unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); + + MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, + AMDGPU::sub0, SrcSubRC); + MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, + AMDGPU::sub1, SrcSubRC); + + MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg) + .addOperand(SrcRegSub0) + .addImm(0); + + MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg) + .addOperand(SrcRegSub1) + .addReg(MidReg); + + MRI.replaceRegWith(Dest.getReg(), ResultReg); + + Worklist.push_back(First); + Worklist.push_back(Second); +} + void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc, MachineInstr *Inst) const { // Add the implict and explicit register definitions. diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index a9b014f880..a9de2d7635 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -50,6 +50,9 @@ private: void splitScalar64BitBinaryOp(SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr *Inst, unsigned Opcode) const; + void splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, + MachineInstr *Inst) const; + void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const; public: diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 3368d49ab4..77ef19084d 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -187,6 +187,12 @@ class SOP1_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 < opName#" $dst, $src0", pattern >; +// 64-bit input, 32-bit output. +class SOP1_32_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 < + op, (outs SReg_32:$dst), (ins SSrc_64:$src0), + opName#" $dst, $src0", pattern +>; + class SOP2_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 < op, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 6525b49f3a..06b09fedfc 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -110,7 +110,8 @@ def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>; def S_BCNT1_I32_B32 : SOP1_32 <0x0000000f, "S_BCNT1_I32_B32", [(set i32:$dst, (ctpop i32:$src0))] >; -////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>; +def S_BCNT1_I32_B64 : SOP1_32_64 <0x00000010, "S_BCNT1_I32_B64", []>; + ////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>; ////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>; ////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>; @@ -2515,6 +2516,13 @@ def : Pat < (V_BCNT_U32_B32_e32 $popcnt, $val) >; +def : Pat < + (i64 (ctpop i64:$src)), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (S_BCNT1_I32_B64 $src), sub0), + (S_MOV_B32 0), sub1) +>; + //============================================================================// // Miscellaneous Optimization Patterns //============================================================================// |