diff options
author | Justin Holewinski <justin.holewinski@gmail.com> | 2011-09-26 16:20:25 +0000 |
---|---|---|
committer | Justin Holewinski <justin.holewinski@gmail.com> | 2011-09-26 16:20:25 +0000 |
commit | 6b8990df42c3e9814cc60c3072f85b5a38bbb410 (patch) | |
tree | b01eebab47c39b73748f8a2b64e693d8d77cb93a /lib/Target | |
parent | 6b97870746c3704427ce33e24336ceb6415d4698 (diff) | |
download | llvm-6b8990df42c3e9814cc60c3072f85b5a38bbb410.tar.gz llvm-6b8990df42c3e9814cc60c3072f85b5a38bbb410.tar.bz2 llvm-6b8990df42c3e9814cc60c3072f85b5a38bbb410.tar.xz |
PTX: Handle FrameIndex nodes
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140532 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/PTX/PTXAsmPrinter.cpp | 4 | ||||
-rw-r--r-- | lib/Target/PTX/PTXISelDAGToDAG.cpp | 58 | ||||
-rw-r--r-- | lib/Target/PTX/PTXInstrInfo.td | 137 | ||||
-rw-r--r-- | lib/Target/PTX/PTXRegisterInfo.cpp | 25 | ||||
-rw-r--r-- | lib/Target/PTX/PTXRegisterInfo.h | 6 | ||||
-rw-r--r-- | lib/Target/PTX/PTXTargetMachine.cpp | 4 |
6 files changed, 204 insertions, 30 deletions
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index e57d5dad8e..beabd77005 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -295,9 +295,9 @@ void PTXAsmPrinter::EmitFunctionBodyStart() { for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) { DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n"); if (FrameInfo->getObjectSize(i) > 0) { - std::string def = "\t.reg .b"; + std::string def = "\t.local .b"; def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits - def += " s"; + def += " __local_"; def += utostr(i); def += ";"; OutStreamer.EmitRawText(Twine(def)); diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp index 685b24ecfc..383ba44f3f 100644 --- a/lib/Target/PTX/PTXISelDAGToDAG.cpp +++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp @@ -37,6 +37,7 @@ class PTXDAGToDAGISel : public SelectionDAGISel { bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2); bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset); bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset); // Include the pieces auto'gened from the target description #include "PTXGenDAGISel.inc" @@ -48,6 +49,7 @@ class PTXDAGToDAGISel : public SelectionDAGISel { SDNode *SelectREADPARAM(SDNode *Node); SDNode *SelectWRITEPARAM(SDNode *Node); + SDNode *SelectFrameIndex(SDNode *Node); bool isImm(const SDValue &operand); bool SelectImm(const SDValue &operand, SDValue &imm); @@ -75,6 +77,8 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { return SelectREADPARAM(Node); case PTXISD::WRITE_PARAM: return SelectWRITEPARAM(Node); + case ISD::FrameIndex: + return SelectFrameIndex(Node); default: return SelectCode(Node); } @@ -173,6 +177,25 @@ SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) { return Ret; } +SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) { + int FI = cast<FrameIndexSDNode>(Node)->getIndex(); + //dbgs() << "Selecting FrameIndex at index " << FI << "\n"; + SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0)); + + //unsigned OpCode = PTX::LOAD_LOCAL_F32; + + //for (SDNode::use_iterator i = Node->use_begin(), e = Node->use_end(); + // i != e; ++i) { + // SDNode *Use = *i; + // dbgs() << "USE: "; + // Use->dumpr(CurDAG); + //} + + return Node; + //return CurDAG->getMachineNode(OpCode, Node->getDebugLoc(), + // Node->getValueType(0), TFI); +} + // Match memory operand of the form [reg+reg] bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 || @@ -243,6 +266,41 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base, return false; } +// Match memory operand of the form [reg], [imm+reg], and [reg+imm] +bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() != ISD::ADD) { + // let SelectADDRii handle the [imm] case + if (isImm(Addr)) + return false; + // it is [reg] + + assert(Addr.getValueType().isSimple() && "Type must be simple"); + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + + return true; + } + + if (Addr.getNumOperands() < 2) + return false; + + // let SelectADDRii handle the [imm+imm] case + if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1))) + return false; + + // try [reg+imm] and [imm+reg] + for (int i = 0; i < 2; i ++) + if (SelectImm(Addr.getOperand(1-i), Offset)) { + Base = Addr.getOperand(i); + return true; + } + + // neither [reg+imm] nor [imm+reg] + return false; +} + bool PTXDAGToDAGISel::isImm(const SDValue &operand) { return ConstantSDNode::classof(operand.getNode()); } diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index f43d1a17df..50499a56be 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -50,7 +50,9 @@ def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">; def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{ const Value *Src; const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + const SDValue &MemOp = N->getOperand(1); + if ((MemOp.getOpcode() != ISD::FrameIndex) && + (Src = cast<LoadSDNode>(N)->getSrcValue()) && (PT = dyn_cast<PointerType>(Src->getType()))) return PT->getAddressSpace() == PTX::GLOBAL; return false; @@ -66,12 +68,8 @@ def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{ }]>; def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::LOCAL; - return false; + const SDValue &MemOp = N->getOperand(1); + return MemOp.getOpcode() == ISD::FrameIndex; }]>; def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{ @@ -96,7 +94,9 @@ def store_global : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ const Value *Src; const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + const SDValue &MemOp = N->getOperand(2); + if ((MemOp.getOpcode() != ISD::FrameIndex) && + (Src = cast<StoreSDNode>(N)->getSrcValue()) && (PT = dyn_cast<PointerType>(Src->getType()))) return PT->getAddressSpace() == PTX::GLOBAL; return false; @@ -104,12 +104,8 @@ def store_global def store_local : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::LOCAL; - return false; + const SDValue &MemOp = N->getOperand(2); + return MemOp.getOpcode() == ISD::FrameIndex; }]>; def store_parameter @@ -133,12 +129,14 @@ def store_shared }]>; // Addressing modes. -def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; -def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>; -def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>; -def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>; -def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>; -def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>; +def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; +def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>; +def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>; +def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>; +def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>; +def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>; +def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>; +def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>; // Address operands def MEMri32 : Operand<i32> { @@ -903,7 +901,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in { // Loads defm LDg : PTX_LD_ALL<"ld.global", load_global>; defm LDc : PTX_LD_ALL<"ld.const", load_constant>; -defm LDl : PTX_LD_ALL<"ld.local", load_local>; +//defm LDl : PTX_LD_ALL<"ld.local", load_local>; defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; // These instructions are used to load/store from the .param space for @@ -949,11 +947,101 @@ let hasSideEffects = 1 in { [(PTXstoreparam timm:$d, RegF64:$a)]>; } +/* + def ri64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri64:$a))]>, + Requires<[Use64BitAddresses]>; + + def ri64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri64:$a)]>, + Requires<[Use64BitAddresses]>; + */ +let hasSideEffects = 1 in { + def LDLOCALpiPred : InstPTX<(outs RegPred:$d), (ins MEMri32:$a), + "ld.local.pred\t$d, [__local_$a]", + [(set RegPred:$d, (load_local ADDRlocal32:$a))]>; + def LDLOCALpiU16 : InstPTX<(outs RegI16:$d), (ins MEMri32:$a), + "ld.local.u16\t$d, [__local_$a]", + [(set RegI16:$d, (load_local ADDRlocal32:$a))]>; + def LDLOCALpiU32 : InstPTX<(outs RegI32:$d), (ins MEMri32:$a), + "ld.local.u32\t$d, [__local_$a]", + [(set RegI32:$d, (load_local ADDRlocal32:$a))]>; + def LDLOCALpiU64 : InstPTX<(outs RegI64:$d), (ins MEMri32:$a), + "ld.local.u64\t$d, [__local_$a]", + [(set RegI64:$d, (load_local ADDRlocal32:$a))]>; + def LDLOCALpiF32 : InstPTX<(outs RegF32:$d), (ins MEMri32:$a), + "ld.local.f32\t$d, [__local_$a]", + [(set RegF32:$d, (load_local ADDRlocal32:$a))]>; + def LDLOCALpiF64 : InstPTX<(outs RegF64:$d), (ins MEMri32:$a), + "ld.local.f64\t$d, [__local_$a]", + [(set RegF64:$d, (load_local ADDRlocal32:$a))]>; + + def STLOCALpiPred : InstPTX<(outs), (ins RegPred:$d, MEMri32:$a), + "st.local.pred\t[__local_$a], $d", + [(store_local RegPred:$d, ADDRlocal32:$a)]>; + def STLOCALpiU16 : InstPTX<(outs), (ins RegI16:$d, MEMri32:$a), + "st.local.u16\t[__local_$a], $d", + [(store_local RegI16:$d, ADDRlocal32:$a)]>; + def STLOCALpiU32 : InstPTX<(outs), (ins RegI32:$d, MEMri32:$a), + "st.local.u32\t[__local_$a], $d", + [(store_local RegI32:$d, ADDRlocal32:$a)]>; + def STLOCALpiU64 : InstPTX<(outs), (ins RegI64:$d, MEMri32:$a), + "st.local.u64\t[__local_$a], $d", + [(store_local RegI64:$d, ADDRlocal32:$a)]>; + def STLOCALpiF32 : InstPTX<(outs), (ins RegF32:$d, MEMri32:$a), + "st.local.f32\t[__local_$a], $d", + [(store_local RegF32:$d, ADDRlocal32:$a)]>; + def STLOCALpiF64 : InstPTX<(outs), (ins RegF64:$d, MEMri32:$a), + "st.local.f64\t[__local_$a], $d", + [(store_local RegF64:$d, ADDRlocal32:$a)]>; + + /*def LDLOCALpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a), + "ld.param.u16\t$d, [$a]", + [(set RegI16:$d, (PTXloadparam timm:$a))]>; + def LDLOCALpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a), + "ld.param.u32\t$d, [$a]", + [(set RegI32:$d, (PTXloadparam timm:$a))]>; + def LDLOCALpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a), + "ld.param.u64\t$d, [$a]", + [(set RegI64:$d, (PTXloadparam timm:$a))]>; + def LDLOCALpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a), + "ld.param.f32\t$d, [$a]", + [(set RegF32:$d, (PTXloadparam timm:$a))]>; + def LDLOCALpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a), + "ld.param.f64\t$d, [$a]", + [(set RegF64:$d, (PTXloadparam timm:$a))]>; + + def STLOCALpiPred : InstPTX<(outs), (ins MEMpi:$d, RegPred:$a), + "st.param.pred\t[$d], $a", + [(PTXstoreparam timm:$d, RegPred:$a)]>; + def STLOCALpiU16 : InstPTX<(outs), (ins MEMpi:$d, RegI16:$a), + "st.param.u16\t[$d], $a", + [(PTXstoreparam timm:$d, RegI16:$a)]>; + def STLOCALpiU32 : InstPTX<(outs), (ins MEMpi:$d, RegI32:$a), + "st.param.u32\t[$d], $a", + [(PTXstoreparam timm:$d, RegI32:$a)]>; + def STLOCALpiU64 : InstPTX<(outs), (ins MEMpi:$d, RegI64:$a), + "st.param.u64\t[$d], $a", + [(PTXstoreparam timm:$d, RegI64:$a)]>; + def STLOCALpiF32 : InstPTX<(outs), (ins MEMpi:$d, RegF32:$a), + "st.param.f32\t[$d], $a", + [(PTXstoreparam timm:$d, RegF32:$a)]>; + def STLOCALpiF64 : InstPTX<(outs), (ins MEMpi:$d, RegF64:$a), + "st.param.f64\t[$d], $a", + [(PTXstoreparam timm:$d, RegF64:$a)]>;*/ +} + // Stores defm STg : PTX_ST_ALL<"st.global", store_global>; -defm STl : PTX_ST_ALL<"st.local", store_local>; +//defm STl : PTX_ST_ALL<"st.local", store_local>; defm STs : PTX_ST_ALL<"st.shared", store_shared>; + + // defm STp : PTX_ST_ALL<"st.param", store_parameter>; // defm LDp : PTX_LD_ALL<"ld.param", load_parameter>; // TODO: Do something with st.param if/when it is needed. @@ -1199,6 +1287,11 @@ def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>; def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>; def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>; +///===- Stack Variable Loads/Stores ---------------------------------------===// + +def LOAD_LOCAL_F32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a), + "ld.local.f32\t$d, [%a]", []>; + // Call handling // def ADJCALLSTACKUP : // InstPTX<(outs), (ins i32imm:$amt1, i32imm:$amt2), "", diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp index d19b63e317..6f2e8762f6 100644 --- a/lib/Target/PTX/PTXRegisterInfo.cpp +++ b/lib/Target/PTX/PTXRegisterInfo.cpp @@ -14,6 +14,9 @@ #include "PTX.h" #include "PTXRegisterInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -23,16 +26,21 @@ using namespace llvm; PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &TII) + const TargetInstrInfo &tii) // PTX does not have a return address register. - : PTXGenRegisterInfo(0) { + : PTXGenRegisterInfo(0), TII(tii) { } void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { unsigned Index; - MachineInstr& MI = *II; + MachineInstr &MI = *II; + //MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + //MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + + //unsigned Reg = MRI.createVirtualRegister(PTX::RegF32RegisterClass); Index = 0; while (!MI.getOperand(Index).isFI()) { @@ -47,6 +55,15 @@ void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n"); DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n"); + //MachineInstr* MI2 = BuildMI(MBB, II, dl, TII.get(PTX::LOAD_LOCAL_F32)) + //.addReg(Reg, RegState::Define).addImm(FrameIndex); + //if (MI2->findFirstPredOperandIdx() == -1) { + // MI2->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false)); + // MI2->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + //} + //MI2->dump(); + // This frame index is post stack slot re-use assignments - MI.getOperand(Index).ChangeToImmediate(FrameIndex); + //MI.getOperand(Index).ChangeToRegister(Reg, false); + MI.getOperand(Index).ChangeToImmediate(0); } diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h index c3f1196462..55fafe47bf 100644 --- a/lib/Target/PTX/PTXRegisterInfo.h +++ b/lib/Target/PTX/PTXRegisterInfo.h @@ -25,8 +25,12 @@ class PTXTargetMachine; class MachineFunction; struct PTXRegisterInfo : public PTXGenRegisterInfo { +private: + const TargetInstrInfo &TII; + +public: PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &TII); + const TargetInstrInfo &tii); virtual const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const { diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 4811dd2d49..a6db0b7f2e 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -118,7 +118,7 @@ bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // PTXMFInfoExtract must after register allocation! - PM.add(createPTXMFInfoExtract(*this, OptLevel)); + //PM.add(createPTXMFInfoExtract(*this, OptLevel)); return false; } @@ -365,5 +365,7 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (addPreEmitPass(PM, OptLevel)) printNoVerify(PM, "After PreEmit passes"); + PM.add(createPTXMFInfoExtract(*this, OptLevel)); + return false; } |