summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJustin Holewinski <justin.holewinski@gmail.com>2011-06-20 15:56:20 +0000
committerJustin Holewinski <justin.holewinski@gmail.com>2011-06-20 15:56:20 +0000
commitdf1c8d837d2aa1405a5523f6293ccbed38f9c446 (patch)
tree6911213343b8212f2c4a7c020b4871bff9ce13c1 /lib
parent8e9d6720c3b62ad45bb97d43b47867a3097b433a (diff)
downloadllvm-df1c8d837d2aa1405a5523f6293ccbed38f9c446.tar.gz
llvm-df1c8d837d2aa1405a5523f6293ccbed38f9c446.tar.bz2
llvm-df1c8d837d2aa1405a5523f6293ccbed38f9c446.tar.xz
PTX: Add basic register spilling code
The current implementation generates stack loads/stores, which are really just mov instructions from/to "special" registers. This may not be the most efficient implementation, compared to an approach where the stack registers are directly folded into instructions, but this is easier to implement and I have yet to see a case where ptxas is unable to see through this kind of register usage and know what is really going on. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133443 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.cpp13
-rw-r--r--lib/Target/PTX/PTXInstrInfo.cpp75
-rw-r--r--lib/Target/PTX/PTXInstrInfo.h23
-rw-r--r--lib/Target/PTX/PTXInstrInfo.td24
4 files changed, 135 insertions, 0 deletions
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index df973fdeb7..6f6fc29673 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -194,6 +195,18 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
def += ';';
OutStreamer.EmitRawText(Twine(def));
}
+
+ const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
+ DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects() << " frame object(s)\n");
+ for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
+ DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
+ std::string def = "\t.reg .b";
+ def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
+ def += " s";
+ def += utostr(i);
+ def += ";";
+ OutStreamer.EmitRawText(Twine(def));
+ }
}
void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index c305c05e10..c8278197f0 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -288,6 +288,81 @@ InsertBranch(MachineBasicBlock &MBB,
}
}
+// Memory operand folding for spills
+void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineInstr& MI = *MII;
+ DebugLoc DL = MI.getDebugLoc();
+
+ DEBUG(dbgs() << "storeRegToStackSlot: " << MI);
+
+ int OpCode;
+
+ // Select the appropriate opcode based on the register class
+ if (RC == PTX::RegI16RegisterClass) {
+ OpCode = PTX::STACKSTOREI16;
+ }
+ else if (RC == PTX::RegI32RegisterClass) {
+ OpCode = PTX::STACKSTOREI32;
+ }
+ else if (RC == PTX::RegI64RegisterClass) {
+ OpCode = PTX::STACKSTOREI32;
+ }
+ else if (RC == PTX::RegF32RegisterClass) {
+ OpCode = PTX::STACKSTOREF32;
+ }
+ else if (RC == PTX::RegF64RegisterClass) {
+ OpCode = PTX::STACKSTOREF64;
+ }
+
+ // Build the store instruction (really a mov)
+ MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
+ MIB.addImm(FrameIdx);
+ MIB.addReg(SrcReg);
+
+ AddDefaultPredicate(MIB);
+}
+
+void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineInstr& MI = *MII;
+ DebugLoc DL = MI.getDebugLoc();
+
+ DEBUG(dbgs() << "loadRegToStackSlot: " << MI);
+
+ int OpCode;
+
+ // Select the appropriate opcode based on the register class
+ if (RC == PTX::RegI16RegisterClass) {
+ OpCode = PTX::STACKLOADI16;
+ }
+ else if (RC == PTX::RegI32RegisterClass) {
+ OpCode = PTX::STACKLOADI32;
+ }
+ else if (RC == PTX::RegI64RegisterClass) {
+ OpCode = PTX::STACKLOADI32;
+ }
+ else if (RC == PTX::RegF32RegisterClass) {
+ OpCode = PTX::STACKLOADF32;
+ }
+ else if (RC == PTX::RegF64RegisterClass) {
+ OpCode = PTX::STACKLOADF64;
+ }
+
+ // Build the load instruction (really a mov)
+ MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
+ MIB.addReg(DestReg);
+ MIB.addImm(FrameIdx);
+
+ AddDefaultPredicate(MIB);
+}
+
// static helper routines
MachineSDNode *PTXInstrInfo::
diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h
index a04be7728f..b08c159741 100644
--- a/lib/Target/PTX/PTXInstrInfo.h
+++ b/lib/Target/PTX/PTXInstrInfo.h
@@ -84,6 +84,29 @@ public:
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
+ // Memory operand folding for spills
+ // TODO: Implement this eventually and get rid of storeRegToStackSlot and
+ // loadRegFromStackSlot. Doing so will get rid of the "stack" registers
+ // we currently use to spill, though I doubt the overall effect on ptxas
+ // output will be large. I have yet to see a case where ptxas is unable
+ // to see through the "stack" register usage and hence generates
+ // efficient code anyway.
+ // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ // MachineInstr* MI,
+ // const SmallVectorImpl<unsigned> &Ops,
+ // int FrameIndex) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass* RC,
+ const TargetRegisterInfo* TRI) const;
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
// static helper routines
static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index 8477cd71d3..d497283b72 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -977,6 +977,30 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
}
+///===- Spill Instructions ------------------------------------------------===//
+// Special instructions used for stack spilling
+def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
+ "mov.u16\ts$d, $a", []>;
+def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
+ "mov.u32\ts$d, $a", []>;
+def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a),
+ "mov.u64\ts$d, $a", []>;
+def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a),
+ "mov.f32\ts$d, $a", []>;
+def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
+ "mov.f64\ts$d, $a", []>;
+
+def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
+ "mov.u16\t$d, s$a", []>;
+def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),
+ "mov.u32\t$d, s$a", []>;
+def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a),
+ "mov.u64\t$d, s$a", []>;
+def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a),
+ "mov.f32\t$d, s$a", []>;
+def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a),
+ "mov.f64\t$d, s$a", []>;
+
///===- Intrinsic Instructions --------------------------------------------===//
include "PTXIntrinsicInstrInfo.td"