summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--lib/Target/NVPTX/NVPTX.h1
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp103
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h26
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp436
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td30
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp8
7 files changed, 525 insertions, 80 deletions
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 7da2fed4cd..735ca9b6b7 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -23,6 +23,7 @@ set(NVPTXCodeGen_sources
NVPTXAsmPrinter.cpp
NVPTXUtilities.cpp
NVVMReflect.cpp
+ NVPTXGenericToNVVM.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index 6a53a443bf..fa29de8e56 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -62,6 +62,7 @@ createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
+ModulePass *createGenericToNVVMPass();
bool isImageOrSamplerVal(const Value *, const Module *);
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index e2832b02b5..88b9aa06e5 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -68,11 +68,12 @@ InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
namespace {
/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
/// depends.
-void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+void DiscoverDependentGlobals(const Value *V,
+ DenseSet<const GlobalVariable *> &Globals) {
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
Globals.insert(GV);
else {
- if (User *U = dyn_cast<User>(V)) {
+ if (const User *U = dyn_cast<User>(V)) {
for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
DiscoverDependentGlobals(U->getOperand(i), Globals);
}
@@ -84,8 +85,9 @@ void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
/// instances to be emitted, but only after any dependents have been added
/// first.
void VisitGlobalVariableForEmission(
- GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order,
- DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) {
+ const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
+ DenseSet<const GlobalVariable *> &Visited,
+ DenseSet<const GlobalVariable *> &Visiting) {
// Have we already visited this one?
if (Visited.count(GV))
return;
@@ -98,12 +100,12 @@ void VisitGlobalVariableForEmission(
Visiting.insert(GV);
// Make sure we visit all dependents first
- DenseSet<GlobalVariable *> Others;
+ DenseSet<const GlobalVariable *> Others;
for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
DiscoverDependentGlobals(GV->getOperand(i), Others);
- for (DenseSet<GlobalVariable *>::iterator I = Others.begin(),
- E = Others.end();
+ for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
+ E = Others.end();
I != E; ++I)
VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
@@ -405,6 +407,11 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
SmallString<128> Str;
raw_svector_ostream O(Str);
+ if (!GlobalsEmitted) {
+ emitGlobals(*MF->getFunction()->getParent());
+ GlobalsEmitted = true;
+ }
+
// Set up
MRI = &MF->getRegInfo();
F = MF->getFunction();
@@ -795,7 +802,7 @@ static bool useFuncSeen(const Constant *C,
return false;
}
-void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
llvm::DenseMap<const Function *, bool> seenMap;
for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
const Function *F = FI;
@@ -921,6 +928,12 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
recordAndEmitFilenames(M);
+ GlobalsEmitted = false;
+
+ return false; // success
+}
+
+void NVPTXAsmPrinter::emitGlobals(const Module &M) {
SmallString<128> Str2;
raw_svector_ostream OS2(Str2);
@@ -931,13 +944,13 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
// global variable in order, and ensure that we emit it *after* its dependent
// globals. We use a little extra memory maintaining both a set and a list to
// have fast searches while maintaining a strict ordering.
- SmallVector<GlobalVariable *, 8> Globals;
- DenseSet<GlobalVariable *> GVVisited;
- DenseSet<GlobalVariable *> GVVisiting;
+ SmallVector<const GlobalVariable *, 8> Globals;
+ DenseSet<const GlobalVariable *> GVVisited;
+ DenseSet<const GlobalVariable *> GVVisiting;
// Visit each global variable, in order
- for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
- ++I)
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
assert(GVVisited.size() == M.getGlobalList().size() &&
@@ -951,7 +964,6 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
OS2 << '\n';
OutStreamer.EmitRawText(OS2.str());
- return false; // success
}
void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
@@ -989,6 +1001,14 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
}
bool NVPTXAsmPrinter::doFinalization(Module &M) {
+
+ // If we did not emit any functions, then the global declarations have not
+ // yet been emitted.
+ if (!GlobalsEmitted) {
+ emitGlobals(M);
+ GlobalsEmitted = true;
+ }
+
// XXX Temproarily remove global variables so that doFinalization() will not
// emit them again (global variables are emitted at beginning).
@@ -1063,7 +1083,8 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
}
}
-void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
+void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
+ raw_ostream &O,
bool processDemoted) {
// Skip meta data
@@ -1107,10 +1128,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
if (llvm::isSampler(*GVar)) {
O << ".global .samplerref " << llvm::getSamplerName(*GVar);
- Constant *Initializer = NULL;
+ const Constant *Initializer = NULL;
if (GVar->hasInitializer())
Initializer = GVar->getInitializer();
- ConstantInt *CI = NULL;
+ const ConstantInt *CI = NULL;
if (Initializer)
CI = dyn_cast<ConstantInt>(Initializer);
if (CI) {
@@ -1183,7 +1204,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
if (localDecls.find(demotedFunc) != localDecls.end())
localDecls[demotedFunc].push_back(GVar);
else {
- std::vector<GlobalVariable *> temp;
+ std::vector<const GlobalVariable *> temp;
temp.push_back(GVar);
localDecls[demotedFunc] = temp;
}
@@ -1213,7 +1234,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
GVar->hasInitializer()) {
- Constant *Initializer = GVar->getInitializer();
+ const Constant *Initializer = GVar->getInitializer();
if (!Initializer->isNullValue()) {
O << " = ";
printScalarConstant(Initializer, O);
@@ -1237,7 +1258,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
GVar->hasInitializer()) {
- Constant *Initializer = GVar->getInitializer();
+ const Constant *Initializer = GVar->getInitializer();
if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
AggBuffer aggBuffer(ElementSize, O, *this);
bufferAggregateConstant(Initializer, &aggBuffer);
@@ -1287,7 +1308,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
if (localDecls.find(f) == localDecls.end())
return;
- std::vector<GlobalVariable *> &gvars = localDecls[f];
+ std::vector<const GlobalVariable *> &gvars = localDecls[f];
for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
O << "\t// demoted variable\n\t";
@@ -1761,12 +1782,12 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
O << utohexstr(API.getZExtValue());
}
-void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
O << CI->getValue();
return;
}
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
printFPConstant(CFP, O);
return;
}
@@ -1774,13 +1795,13 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
O << "0";
return;
}
- if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+ if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
O << *Mang->getSymbol(GVar);
return;
}
- if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- Value *v = Cexpr->stripPointerCasts();
- if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+ if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ const Value *v = Cexpr->stripPointerCasts();
+ if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
O << *Mang->getSymbol(GVar);
return;
} else {
@@ -1791,7 +1812,7 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
llvm_unreachable("Not scalar type found in printScalarConstant()");
}
-void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
+void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
const DataLayout *TD = TM.getDataLayout();
@@ -1819,13 +1840,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
ptr = (unsigned char *)&int16;
aggBuffer->addBytes(ptr, 2, Bytes);
} else if (ETy == Type::getInt32Ty(CPV->getContext())) {
- if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+ if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
int int32 = (int)(constInt->getZExtValue());
ptr = (unsigned char *)&int32;
aggBuffer->addBytes(ptr, 4, Bytes);
break;
- } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+ } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstantExpression(Cexpr, TD))) {
int int32 = (int)(constInt->getZExtValue());
ptr = (unsigned char *)&int32;
@@ -1841,13 +1862,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
}
llvm_unreachable("unsupported integer const type");
} else if (ETy == Type::getInt64Ty(CPV->getContext())) {
- if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+ if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
long long int64 = (long long)(constInt->getZExtValue());
ptr = (unsigned char *)&int64;
aggBuffer->addBytes(ptr, 8, Bytes);
break;
- } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+ } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstantExpression(Cexpr, TD))) {
long long int64 = (long long)(constInt->getZExtValue());
ptr = (unsigned char *)&int64;
@@ -1868,7 +1889,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
}
case Type::FloatTyID:
case Type::DoubleTyID: {
- ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
+ const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
const Type *Ty = CFP->getType();
if (Ty == Type::getFloatTy(CPV->getContext())) {
float float32 = (float) CFP->getValueAPF().convertToFloat();
@@ -1884,10 +1905,10 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
break;
}
case Type::PointerTyID: {
- if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+ if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
aggBuffer->addSymbol(GVar);
- } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- Value *v = Cexpr->stripPointerCasts();
+ } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ const Value *v = Cexpr->stripPointerCasts();
aggBuffer->addSymbol(v);
}
unsigned int s = TD->getTypeAllocSize(CPV->getType());
@@ -1916,7 +1937,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
}
}
-void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
+void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
AggBuffer *aggBuffer) {
const DataLayout *TD = TM.getDataLayout();
int Bytes;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 6dc9fc0ffe..7faa6b265b 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -91,7 +91,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
unsigned char *buffer; // the buffer
unsigned numSymbols; // number of symbol addresses
SmallVector<unsigned, 4> symbolPosInBuffer;
- SmallVector<Value *, 4> Symbols;
+ SmallVector<const Value *, 4> Symbols;
private:
unsigned curpos;
@@ -128,7 +128,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
}
return curpos;
}
- void addSymbol(Value *GVar) {
+ void addSymbol(const Value *GVar) {
symbolPosInBuffer.push_back(curpos);
Symbols.push_back(GVar);
numSymbols++;
@@ -153,11 +153,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
if (pos)
O << ", ";
if (pos == nextSymbolPos) {
- Value *v = Symbols[nSym];
- if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+ const Value *v = Symbols[nSym];
+ if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
MCSymbol *Name = AP.Mang->getSymbol(GVar);
O << *Name;
- } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
+ } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
O << *nvptx::LowerConstant(Cexpr, AP);
} else
llvm_unreachable("symbol type unknown");
@@ -205,10 +205,12 @@ private:
void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
// definition autogenerated.
void printInstruction(const MachineInstr *MI, raw_ostream &O);
- void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false);
+ void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
+ bool = false);
void printParamName(int paramIndex, raw_ostream &O);
void printParamName(Function::const_arg_iterator I, int paramIndex,
raw_ostream &O);
+ void emitGlobals(const Module &M);
void emitHeader(Module &M, raw_ostream &O);
void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
@@ -234,6 +236,8 @@ protected:
private:
std::string CurrentBankselLabelInBasicBlock;
+ bool GlobalsEmitted;
+
// This is specific per MachineFunction.
const MachineRegisterInfo *MRI;
// The contents are specific for each
@@ -247,7 +251,7 @@ private:
std::map<const Type *, std::string> TypeNameMap;
// List of variables demoted to a function scope.
- std::map<const Function *, std::vector<GlobalVariable *> > localDecls;
+ std::map<const Function *, std::vector<const GlobalVariable *> > localDecls;
// To record filename to ID mapping
std::map<std::string, unsigned> filenameMap;
@@ -256,15 +260,15 @@ private:
void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
- void printScalarConstant(Constant *CPV, raw_ostream &O);
+ void printScalarConstant(const Constant *CPV, raw_ostream &O);
void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
- void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer);
- void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer);
+ void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer);
+ void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer);
void printOperandProper(const MachineOperand &MO);
void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
- void emitDeclarations(Module &, raw_ostream &O);
+ void emitDeclarations(const Module &, raw_ostream &O);
void emitDeclaration(const Function *, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
new file mode 100644
index 0000000000..1077c46fb4
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -0,0 +1,436 @@
+//===-- GenericToNVVM.cpp - Convert generic module to NVVM module - C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Convert generic global variables into either .global or .const access based
+// on the variable's "constant" qualifier.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
+
+#include "llvm/PassManager.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/IRBuilder.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeGenericToNVVMPass(PassRegistry &);
+}
+
+namespace {
+class GenericToNVVM : public ModulePass {
+public:
+ static char ID;
+
+ GenericToNVVM() : ModulePass(ID) {}
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ }
+
+private:
+ Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV,
+ IRBuilder<> &Builder);
+ Value *remapConstant(Module *M, Function *F, Constant *C,
+ IRBuilder<> &Builder);
+ Value *remapConstantVectorOrConstantAggregate(Module *M, Function *F,
+ Constant *C,
+ IRBuilder<> &Builder);
+ Value *remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
+ IRBuilder<> &Builder);
+ void remapNamedMDNode(Module *M, NamedMDNode *N);
+ MDNode *remapMDNode(Module *M, MDNode *N);
+
+ typedef ValueMap<GlobalVariable *, GlobalVariable *> GVMapTy;
+ typedef ValueMap<Constant *, Value *> ConstantToValueMapTy;
+ GVMapTy GVMap;
+ ConstantToValueMapTy ConstantToValueMap;
+};
+}
+
+char GenericToNVVM::ID = 0;
+
+ModulePass *llvm::createGenericToNVVMPass() { return new GenericToNVVM(); }
+
+INITIALIZE_PASS(
+ GenericToNVVM, "generic-to-nvvm",
+ "Ensure that the global variables are in the global address space", false,
+ false)
+
+bool GenericToNVVM::runOnModule(Module &M) {
+ // Create a clone of each global variable that has the default address space.
+ // The clone is created with the global address space specifier, and the pair
+ // of original global variable and its clone is placed in the GVMap for later
+ // use.
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E;) {
+ GlobalVariable *GV = I++;
+ if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
+ !llvm::isTexture(*GV) && !llvm::isSurface(*GV) &&
+ !GV->getName().startswith("llvm.")) {
+ GlobalVariable *NewGV = new GlobalVariable(
+ M, GV->getType()->getElementType(), GV->isConstant(),
+ GV->getLinkage(), GV->hasInitializer() ? GV->getInitializer() : NULL,
+ "", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL);
+ NewGV->copyAttributesFrom(GV);
+ GVMap[GV] = NewGV;
+ }
+ }
+
+ // Return immediately, if every global variable has a specific address space
+ // specifier.
+ if (GVMap.empty()) {
+ return false;
+ }
+
+ // Walk through the instructions in function defitinions, and replace any use
+ // of original global variables in GVMap with a use of the corresponding
+ // copies in GVMap. If necessary, promote constants to instructions.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (I->isDeclaration()) {
+ continue;
+ }
+ IRBuilder<> Builder(I->getEntryBlock().getFirstNonPHIOrDbg());
+ for (Function::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE;
+ ++BBI) {
+ for (BasicBlock::iterator II = BBI->begin(), IE = BBI->end(); II != IE;
+ ++II) {
+ for (unsigned i = 0, e = II->getNumOperands(); i < e; ++i) {
+ Value *Operand = II->getOperand(i);
+ if (isa<Constant>(Operand)) {
+ II->setOperand(
+ i, remapConstant(&M, I, cast<Constant>(Operand), Builder));
+ }
+ }
+ }
+ }
+ ConstantToValueMap.clear();
+ }
+
+ // Walk through the metadata section and update the debug information
+ // associated with the global variables in the default address space.
+ for (Module::named_metadata_iterator I = M.named_metadata_begin(),
+ E = M.named_metadata_end();
+ I != E; I++) {
+ remapNamedMDNode(&M, I);
+ }
+
+ // Walk through the global variable initializers, and replace any use of
+ // original global variables in GVMap with a use of the corresponding copies
+ // in GVMap. The copies need to be bitcast to the original global variable
+ // types, as we cannot use cvta in global variable initializers.
+ for (GVMapTy::iterator I = GVMap.begin(), E = GVMap.end(); I != E;) {
+ GlobalVariable *GV = I->first;
+ GlobalVariable *NewGV = I->second;
+ ++I;
+ Constant *BitCastNewGV = ConstantExpr::getBitCast(NewGV, GV->getType());
+ // At this point, the remaining uses of GV should be found only in global
+ // variable initializers, as other uses have been already been removed
+ // while walking through the instructions in function definitions.
+ for (Value::use_iterator UI = GV->use_begin(), UE = GV->use_end();
+ UI != UE;) {
+ Use &U = (UI++).getUse();
+ U.set(BitCastNewGV);
+ }
+ std::string Name = GV->getName();
+ GV->removeDeadConstantUsers();
+ GV->eraseFromParent();
+ NewGV->setName(Name);
+ }
+ GVMap.clear();
+
+ return true;
+}
+
+Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F,
+ GlobalVariable *GV,
+ IRBuilder<> &Builder) {
+ PointerType *GVType = GV->getType();
+ Value *CVTA = NULL;
+
+ // See if the address space conversion requires the operand to be bitcast
+ // to i8 addrspace(n)* first.
+ EVT ExtendedGVType = EVT::getEVT(GVType->getElementType(), true);
+ if (!ExtendedGVType.isInteger() && !ExtendedGVType.isFloatingPoint()) {
+ // A bitcast to i8 addrspace(n)* on the operand is needed.
+ LLVMContext &Context = M->getContext();
+ unsigned int AddrSpace = GVType->getAddressSpace();
+ Type *DestTy = PointerType::get(Type::getInt8Ty(Context), AddrSpace);
+ CVTA = Builder.CreateBitCast(GV, DestTy, "cvta");
+ // Insert the address space conversion.
+ Type *ResultType =
+ PointerType::get(Type::getInt8Ty(Context), llvm::ADDRESS_SPACE_GENERIC);
+ SmallVector<Type *, 2> ParamTypes;
+ ParamTypes.push_back(ResultType);
+ ParamTypes.push_back(DestTy);
+ Function *CVTAFunction = Intrinsic::getDeclaration(
+ M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
+ CVTA = Builder.CreateCall(CVTAFunction, CVTA, "cvta");
+ // Another bitcast from i8 * to <the element type of GVType> * is
+ // required.
+ DestTy =
+ PointerType::get(GVType->getElementType(), llvm::ADDRESS_SPACE_GENERIC);
+ CVTA = Builder.CreateBitCast(CVTA, DestTy, "cvta");
+ } else {
+ // A simple CVTA is enough.
+ SmallVector<Type *, 2> ParamTypes;
+ ParamTypes.push_back(PointerType::get(GVType->getElementType(),
+ llvm::ADDRESS_SPACE_GENERIC));
+ ParamTypes.push_back(GVType);
+ Function *CVTAFunction = Intrinsic::getDeclaration(
+ M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
+ CVTA = Builder.CreateCall(CVTAFunction, GV, "cvta");
+ }
+
+ return CVTA;
+}
+
+Value *GenericToNVVM::remapConstant(Module *M, Function *F, Constant *C,
+ IRBuilder<> &Builder) {
+ // If the constant C has been converted already in the given function F, just
+ // return the converted value.
+ ConstantToValueMapTy::iterator CTII = ConstantToValueMap.find(C);
+ if (CTII != ConstantToValueMap.end()) {
+ return CTII->second;
+ }
+
+ Value *NewValue = C;
+ if (isa<GlobalVariable>(C)) {
+ // If the constant C is a global variable and is found in GVMap, generate a
+ // set set of instructions that convert the clone of C with the global
+ // address space specifier to a generic pointer.
+ // The constant C cannot be used here, as it will be erased from the
+ // module eventually. And the clone of C with the global address space
+ // specifier cannot be used here either, as it will affect the types of
+ // other instructions in the function. Hence, this address space conversion
+ // is required.
+ GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(C));
+ if (I != GVMap.end()) {
+ NewValue = getOrInsertCVTA(M, F, I->second, Builder);
+ }
+ } else if (isa<ConstantVector>(C) || isa<ConstantArray>(C) ||
+ isa<ConstantStruct>(C)) {
+ // If any element in the constant vector or aggregate C is or uses a global
+ // variable in GVMap, the constant C needs to be reconstructed, using a set
+ // of instructions.
+ NewValue = remapConstantVectorOrConstantAggregate(M, F, C, Builder);
+ } else if (isa<ConstantExpr>(C)) {
+ // If any operand in the constant expression C is or uses a global variable
+ // in GVMap, the constant expression C needs to be reconstructed, using a
+ // set of instructions.
+ NewValue = remapConstantExpr(M, F, cast<ConstantExpr>(C), Builder);
+ }
+
+ ConstantToValueMap[C] = NewValue;
+ return NewValue;
+}
+
+Value *GenericToNVVM::remapConstantVectorOrConstantAggregate(
+ Module *M, Function *F, Constant *C, IRBuilder<> &Builder) {
+ bool OperandChanged = false;
+ SmallVector<Value *, 4> NewOperands;
+ unsigned NumOperands = C->getNumOperands();
+
+ // Check if any element is or uses a global variable in GVMap, and thus
+ // converted to another value.
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Value *Operand = C->getOperand(i);
+ Value *NewOperand = remapConstant(M, F, cast<Constant>(Operand), Builder);
+ OperandChanged |= Operand != NewOperand;
+ NewOperands.push_back(NewOperand);
+ }
+
+ // If none of the elements has been modified, return C as it is.
+ if (!OperandChanged) {
+ return C;
+ }
+
+ // If any of the elements has been modified, construct the equivalent
+ // vector or aggregate value with a set instructions and the converted
+ // elements.
+ Value *NewValue = UndefValue::get(C->getType());
+ if (isa<ConstantVector>(C)) {
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Value *Idx = ConstantInt::get(Type::getInt32Ty(M->getContext()), i);
+ NewValue = Builder.CreateInsertElement(NewValue, NewOperands[i], Idx);
+ }
+ } else {
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ NewValue =
+ Builder.CreateInsertValue(NewValue, NewOperands[i], makeArrayRef(i));
+ }
+ }
+
+ return NewValue;
+}
+
+Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
+ IRBuilder<> &Builder) {
+ bool OperandChanged = false;
+ SmallVector<Value *, 4> NewOperands;
+ unsigned NumOperands = C->getNumOperands();
+
+ // Check if any operand is or uses a global variable in GVMap, and thus
+ // converted to another value.
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Value *Operand = C->getOperand(i);
+ Value *NewOperand = remapConstant(M, F, cast<Constant>(Operand), Builder);
+ OperandChanged |= Operand != NewOperand;
+ NewOperands.push_back(NewOperand);
+ }
+
+ // If none of the operands has been modified, return C as it is.
+ if (!OperandChanged) {
+ return C;
+ }
+
+ // If any of the operands has been modified, construct the instruction with
+ // the converted operands.
+ unsigned Opcode = C->getOpcode();
+ switch (Opcode) {
+ case Instruction::ICmp:
+ // CompareConstantExpr (icmp)
+ return Builder.CreateICmp(CmpInst::Predicate(C->getPredicate()),
+ NewOperands[0], NewOperands[1]);
+ case Instruction::FCmp:
+ // CompareConstantExpr (fcmp)
+ assert(false && "Address space conversion should have no effect "
+ "on float point CompareConstantExpr (fcmp)!");
+ return C;
+ case Instruction::ExtractElement:
+ // ExtractElementConstantExpr
+ return Builder.CreateExtractElement(NewOperands[0], NewOperands[1]);
+ case Instruction::InsertElement:
+ // InsertElementConstantExpr
+ return Builder.CreateInsertElement(NewOperands[0], NewOperands[1],
+ NewOperands[2]);
+ case Instruction::ShuffleVector:
+ // ShuffleVector
+ return Builder.CreateShuffleVector(NewOperands[0], NewOperands[1],
+ NewOperands[2]);
+ case Instruction::ExtractValue:
+ // ExtractValueConstantExpr
+ return Builder.CreateExtractValue(NewOperands[0], C->getIndices());
+ case Instruction::InsertValue:
+ // InsertValueConstantExpr
+ return Builder.CreateInsertValue(NewOperands[0], NewOperands[1],
+ C->getIndices());
+ case Instruction::GetElementPtr:
+ // GetElementPtrConstantExpr
+ return cast<GEPOperator>(C)->isInBounds()
+ ? Builder.CreateGEP(
+ NewOperands[0],
+ makeArrayRef(&NewOperands[1], NumOperands - 1))
+ : Builder.CreateInBoundsGEP(
+ NewOperands[0],
+ makeArrayRef(&NewOperands[1], NumOperands - 1));
+ case Instruction::Select:
+ // SelectConstantExpr
+ return Builder.CreateSelect(NewOperands[0], NewOperands[1], NewOperands[2]);
+ default:
+ // BinaryConstantExpr
+ if (Instruction::isBinaryOp(Opcode)) {
+ return Builder.CreateBinOp(Instruction::BinaryOps(C->getOpcode()),
+ NewOperands[0], NewOperands[1]);
+ }
+ // UnaryConstantExpr
+ if (Instruction::isCast(Opcode)) {
+ return Builder.CreateCast(Instruction::CastOps(C->getOpcode()),
+ NewOperands[0], C->getType());
+ }
+ assert(false && "GenericToNVVM encountered an unsupported ConstantExpr");
+ return C;
+ }
+}
+
+void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
+
+ bool OperandChanged = false;
+ SmallVector<MDNode *, 16> NewOperands;
+ unsigned NumOperands = N->getNumOperands();
+
+ // Check if any operand is or contains a global variable in GVMap, and thus
+ // converted to another value.
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ MDNode *Operand = N->getOperand(i);
+ MDNode *NewOperand = remapMDNode(M, Operand);
+ OperandChanged |= Operand != NewOperand;
+ NewOperands.push_back(NewOperand);
+ }
+
+ // If none of the operands has been modified, return immediately.
+ if (!OperandChanged) {
+ return;
+ }
+
+ // Replace the old operands with the new operands.
+ N->dropAllReferences();
+ for (SmallVector<MDNode *, 16>::iterator I = NewOperands.begin(),
+ E = NewOperands.end();
+ I != E; ++I) {
+ N->addOperand(*I);
+ }
+}
+
+MDNode *GenericToNVVM::remapMDNode(Module *M, MDNode *N) {
+
+ bool OperandChanged = false;
+ SmallVector<Value *, 8> NewOperands;
+ unsigned NumOperands = N->getNumOperands();
+
+ // Check if any operand is or contains a global variable in GVMap, and thus
+ // converted to another value.
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Value *Operand = N->getOperand(i);
+ Value *NewOperand = Operand;
+ if (Operand) {
+ if (isa<GlobalVariable>(Operand)) {
+ GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(Operand));
+ if (I != GVMap.end()) {
+ NewOperand = I->second;
+ if (++i < NumOperands) {
+ NewOperands.push_back(NewOperand);
+ // Address space of the global variable follows the global variable
+ // in the global variable debug info (see createGlobalVariable in
+ // lib/Analysis/DIBuilder.cpp).
+ NewOperand =
+ ConstantInt::get(Type::getInt32Ty(M->getContext()),
+ I->second->getType()->getAddressSpace());
+ }
+ }
+ } else if (isa<MDNode>(Operand)) {
+ NewOperand = remapMDNode(M, cast<MDNode>(Operand));
+ }
+ }
+ OperandChanged |= Operand != NewOperand;
+ NewOperands.push_back(NewOperand);
+ }
+
+ // If none of the operands has been modified, return N as it is.
+ if (!OperandChanged) {
+ return N;
+ }
+
+ // If any of the operands has been modified, create a new MDNode with the new
+ // operands.
+ return MDNode::get(M->getContext(), makeArrayRef(NewOperands));
+}
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 49e2568dfa..2780ef4036 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1510,38 +1510,12 @@ multiclass G_TO_NG<string Str, Intrinsic Intrin> {
defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
+defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
-
-def cvta_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
- "mov.u32 \t$result, $src;",
- [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen Int32Regs:$src))]>;
-def cvta_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
- "mov.u64 \t$result, $src;",
- [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen Int64Regs:$src))]>;
-
-
-
-// @TODO: Revisit this. There is a type
-// contradiction between iPTRAny and iPTR for the def.
-/*def cvta_const_addr : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
- "mov.u32 \t$result, $src;",
- [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen
- (Wrapper tglobaladdr:$src)))]>;
-def cvta_const_addr_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
- "mov.u64 \t$result, $src;",
- [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen
- (Wrapper tglobaladdr:$src)))]>;*/
-
-
-def cvta_to_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
- "mov.u32 \t$result, $src;",
- [(set Int32Regs:$result, (int_nvvm_ptr_gen_to_constant Int32Regs:$src))]>;
-def cvta_to_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
- "mov.u64 \t$result, $src;",
- [(set Int64Regs:$result, (int_nvvm_ptr_gen_to_constant Int64Regs:$src))]>;
+defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
// nvvm.ptr.gen.to.param
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 67ca6b58e5..1ae2a7cc86 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -49,6 +49,7 @@ using namespace llvm;
namespace llvm {
void initializeNVVMReflectPass(PassRegistry&);
+void initializeGenericToNVVMPass(PassRegistry&);
}
extern "C" void LLVMInitializeNVPTXTarget() {
@@ -62,6 +63,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
// FIXME: This pass is really intended to be invoked during IR optimization,
// but it's very NVPTX-specific.
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
+ initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
}
NVPTXTargetMachine::NVPTXTargetMachine(
@@ -100,6 +102,7 @@ public:
return getTM<NVPTXTargetMachine>();
}
+ virtual void addIRPasses();
virtual bool addInstSelector();
virtual bool addPreRegAlloc();
};
@@ -110,6 +113,11 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
return PassConfig;
}
+void NVPTXPassConfig::addIRPasses() {
+ TargetPassConfig::addIRPasses();
+ addPass(createGenericToNVVMPass());
+}
+
bool NVPTXPassConfig::addInstSelector() {
addPass(createLowerAggrCopies());
addPass(createSplitBBatBarPass());