summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPawel Wodnicki <pawel@32bitmicro.com>2012-11-19 22:25:44 +0000
committerPawel Wodnicki <pawel@32bitmicro.com>2012-11-19 22:25:44 +0000
commitc8a344e16acf6df8d402729da2e1dff7c5187633 (patch)
treec5cfb4f0ece5077d4c0629e216e2e7388e2104ba
parent97b07299fad0d019224912afe63fa916c4a0c507 (diff)
downloadllvm-c8a344e16acf6df8d402729da2e1dff7c5187633.tar.gz
llvm-c8a344e16acf6df8d402729da2e1dff7c5187633.tar.bz2
llvm-c8a344e16acf6df8d402729da2e1dff7c5187633.tar.xz
Merging r167948, r168198: into the 3.2 release branch
r168198 [NVPTX] Order global variables in def-use order before emiting them in the final assembly r167948 [NVPTX] Implement custom lowering of loads/stores for i1 Loads from i1 become loads from i8 followed by trunc Stores to i1 become zext to i8 followed by store to i8 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_32@168335 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp70
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp60
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h3
-rw-r--r--test/CodeGen/NVPTX/global-ordering.ll20
-rw-r--r--test/CodeGen/NVPTX/pr13291-i1-store.ll26
5 files changed, 174 insertions, 5 deletions
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 3dd9bf5613..0a885ce1c4 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -68,7 +68,54 @@ static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src",
cl::location(llvm::InterleaveSrcInPtx));
+namespace {
+/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
+/// depends.
+void DiscoverDependentGlobals(Value *V,
+ DenseSet<GlobalVariable*> &Globals) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ Globals.insert(GV);
+ else {
+ if (User *U = dyn_cast<User>(V)) {
+ for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
+ DiscoverDependentGlobals(U->getOperand(i), Globals);
+ }
+ }
+ }
+}
+/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
+/// instances to be emitted, but only after any dependents have been added
+/// first.
+void VisitGlobalVariableForEmission(GlobalVariable *GV,
+ SmallVectorImpl<GlobalVariable*> &Order,
+ DenseSet<GlobalVariable*> &Visited,
+ DenseSet<GlobalVariable*> &Visiting) {
+ // Have we already visited this one?
+ if (Visited.count(GV)) return;
+
+ // Do we have a circular dependency?
+ if (Visiting.count(GV))
+ report_fatal_error("Circular dependency found in global variable set");
+
+ // Start visiting this global
+ Visiting.insert(GV);
+
+ // Make sure we visit all dependents first
+ DenseSet<GlobalVariable*> Others;
+ for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
+ DiscoverDependentGlobals(GV->getOperand(i), Others);
+
+ for (DenseSet<GlobalVariable*>::iterator I = Others.begin(),
+ E = Others.end(); I != E; ++I)
+ VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
+
+ // Now we can visit ourself
+ Order.push_back(GV);
+ Visited.insert(GV);
+ Visiting.erase(GV);
+}
+}
// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we
// cannot just link to the existing version.
@@ -893,10 +940,27 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
emitDeclarations(M, OS2);
- // Print out module-level global variables here.
+ // As ptxas does not support forward references of globals, we need to first
+ // sort the list of module-level globals in def-use order. We visit each
+ // global variable in order, and ensure that we emit it *after* its dependent
+ // globals. We use a little extra memory maintaining both a set and a list to
+ // have fast searches while maintaining a strict ordering.
+ SmallVector<GlobalVariable*,8> Globals;
+ DenseSet<GlobalVariable*> GVVisited;
+ DenseSet<GlobalVariable*> GVVisiting;
+
+ // Visit each global variable, in order
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- printModuleLevelGV(I, OS2);
+ I != E; ++I)
+ VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
+
+ assert(GVVisited.size() == M.getGlobalList().size() &&
+ "Missed a global variable");
+ assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
+
+ // Print out module-level global variables in proper order
+ for (unsigned i = 0, e = Globals.size(); i != e; ++i)
+ printModuleLevelGV(Globals[i], OS2);
OS2 << '\n';
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index be771e3567..8430e21204 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -174,10 +174,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PTX does not support load / store predicate registers
- setOperationAction(ISD::LOAD, MVT::i1, Expand);
+ setOperationAction(ISD::LOAD, MVT::i1, Custom);
+ setOperationAction(ISD::STORE, MVT::i1, Custom);
+
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setOperationAction(ISD::STORE, MVT::i1, Expand);
setTruncStoreAction(MVT::i64, MVT::i1, Expand);
setTruncStoreAction(MVT::i32, MVT::i1, Expand);
setTruncStoreAction(MVT::i16, MVT::i1, Expand);
@@ -856,11 +857,66 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::EXTRACT_SUBVECTOR:
return Op;
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
}
}
+
+// v = ld i1* addr
+// =>
+// v1 = ld i8* addr
+// v = trunc v1 to i1
+SDValue NVPTXTargetLowering::
+LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ DebugLoc dl = Node->getDebugLoc();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ assert(ExtType == ISD::NON_EXTLOAD) ;
+ EVT VT = Node->getValueType(0);
+ assert(VT == MVT::i1 && "Custom lowering for i1 load only");
+ SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(),
+ LD->getAlignment());
+ SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
+ // The legalizer (the caller) is expecting two values from the legalized
+ // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
+ // in LegalizeDAG.cpp which also uses MergeValues.
+ SDValue Ops[] = {result, LD->getChain()};
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+// st i1 v, addr
+// =>
+// v1 = zxt v to i8
+// st i8, addr
+SDValue NVPTXTargetLowering::
+LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ SDValue Tmp1 = ST->getChain();
+ SDValue Tmp2 = ST->getBasePtr();
+ SDValue Tmp3 = ST->getValue();
+ EVT VT = Tmp3.getValueType();
+ assert(VT == MVT::i1 && "Custom lowering for i1 store only");
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl,
+ MVT::i8, Tmp3);
+ SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
+ return Result;
+}
+
+
SDValue
NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
EVT v) const {
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 86246e6449..94a177ceb0 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -138,6 +138,9 @@ private:
SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
};
} // namespace llvm
diff --git a/test/CodeGen/NVPTX/global-ordering.ll b/test/CodeGen/NVPTX/global-ordering.ll
new file mode 100644
index 0000000000..43394a79e9
--- /dev/null
+++ b/test/CodeGen/NVPTX/global-ordering.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+; Make sure we emit these globals in def-use order
+
+
+; PTX32: .visible .global .align 1 .u8 a = 2;
+; PTX32-NEXT: .visible .global .align 4 .u32 a2 = a;
+; PTX64: .visible .global .align 1 .u8 a = 2;
+; PTX64-NEXT: .visible .global .align 8 .u64 a2 = a;
+@a2 = addrspace(1) global i8 addrspace(1)* @a
+@a = addrspace(1) global i8 2
+
+
+; PTX32: .visible .global .align 1 .u8 b = 1;
+; PTX32-NEXT: .visible .global .align 4 .u32 b2[2] = {b, b};
+; PTX64: .visible .global .align 1 .u8 b = 1;
+; PTX64-NEXT: .visible .global .align 8 .u64 b2[2] = {b, b};
+@b2 = addrspace(1) global [2 x i8 addrspace(1)*] [i8 addrspace(1)* @b, i8 addrspace(1)* @b]
+@b = addrspace(1) global i8 1
diff --git a/test/CodeGen/NVPTX/pr13291-i1-store.ll b/test/CodeGen/NVPTX/pr13291-i1-store.ll
new file mode 100644
index 0000000000..779f7798d8
--- /dev/null
+++ b/test/CodeGen/NVPTX/pr13291-i1-store.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+define ptx_kernel void @t1(i1* %a) {
+; PTX32: mov.u16 %rc{{[0-9]+}}, 0;
+; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}};
+; PTX64: mov.u16 %rc{{[0-9]+}}, 0;
+; PTX64-NEXT: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}};
+ store i1 false, i1* %a
+ ret void
+}
+
+
+define ptx_kernel void @t2(i1* %a, i8* %b) {
+; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: and.b16 temp, %rc{{[0-9]+}}, 1;
+; PTX32: setp.b16.eq %p{{[0-9]+}}, temp, 1;
+; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: and.b16 temp, %rc{{[0-9]+}}, 1;
+; PTX64: setp.b16.eq %p{{[0-9]+}}, temp, 1;
+
+ %t1 = load i1* %a
+ %t2 = select i1 %t1, i8 1, i8 2
+ store i8 %t2, i8* %b
+ ret void
+}