diff options
author | Pawel Wodnicki <pawel@32bitmicro.com> | 2012-11-19 22:25:44 +0000 |
---|---|---|
committer | Pawel Wodnicki <pawel@32bitmicro.com> | 2012-11-19 22:25:44 +0000 |
commit | c8a344e16acf6df8d402729da2e1dff7c5187633 (patch) | |
tree | c5cfb4f0ece5077d4c0629e216e2e7388e2104ba | |
parent | 97b07299fad0d019224912afe63fa916c4a0c507 (diff) | |
download | llvm-c8a344e16acf6df8d402729da2e1dff7c5187633.tar.gz llvm-c8a344e16acf6df8d402729da2e1dff7c5187633.tar.bz2 llvm-c8a344e16acf6df8d402729da2e1dff7c5187633.tar.xz |
Merging r167948, r168198: into the 3.2 release branch
r168198
[NVPTX] Order global variables in def-use order before emiting them in the final assembly
r167948
[NVPTX] Implement custom lowering of loads/stores for i1
Loads from i1 become loads from i8 followed by trunc
Stores to i1 become zext to i8 followed by store to i8
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_32@168335 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 70 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXISelLowering.cpp | 60 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXISelLowering.h | 3 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/global-ordering.ll | 20 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/pr13291-i1-store.ll | 26 |
5 files changed, 174 insertions, 5 deletions
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 3dd9bf5613..0a885ce1c4 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -68,7 +68,54 @@ static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src", cl::location(llvm::InterleaveSrcInPtx)); +namespace { +/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V +/// depends. +void DiscoverDependentGlobals(Value *V, + DenseSet<GlobalVariable*> &Globals) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + Globals.insert(GV); + else { + if (User *U = dyn_cast<User>(V)) { + for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { + DiscoverDependentGlobals(U->getOperand(i), Globals); + } + } + } +} +/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable +/// instances to be emitted, but only after any dependents have been added +/// first. +void VisitGlobalVariableForEmission(GlobalVariable *GV, + SmallVectorImpl<GlobalVariable*> &Order, + DenseSet<GlobalVariable*> &Visited, + DenseSet<GlobalVariable*> &Visiting) { + // Have we already visited this one? + if (Visited.count(GV)) return; + + // Do we have a circular dependency? + if (Visiting.count(GV)) + report_fatal_error("Circular dependency found in global variable set"); + + // Start visiting this global + Visiting.insert(GV); + + // Make sure we visit all dependents first + DenseSet<GlobalVariable*> Others; + for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) + DiscoverDependentGlobals(GV->getOperand(i), Others); + + for (DenseSet<GlobalVariable*>::iterator I = Others.begin(), + E = Others.end(); I != E; ++I) + VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); + + // Now we can visit ourself + Order.push_back(GV); + Visited.insert(GV); + Visiting.erase(GV); +} +} // @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we // cannot just link to the existing version. @@ -893,10 +940,27 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { emitDeclarations(M, OS2); - // Print out module-level global variables here. + // As ptxas does not support forward references of globals, we need to first + // sort the list of module-level globals in def-use order. We visit each + // global variable in order, and ensure that we emit it *after* its dependent + // globals. We use a little extra memory maintaining both a set and a list to + // have fast searches while maintaining a strict ordering. + SmallVector<GlobalVariable*,8> Globals; + DenseSet<GlobalVariable*> GVVisited; + DenseSet<GlobalVariable*> GVVisiting; + + // Visit each global variable, in order for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - printModuleLevelGV(I, OS2); + I != E; ++I) + VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); + + assert(GVVisited.size() == M.getGlobalList().size() && + "Missed a global variable"); + assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); + + // Print out module-level global variables in proper order + for (unsigned i = 0, e = Globals.size(); i != e; ++i) + printModuleLevelGV(Globals[i], OS2); OS2 << '\n'; diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index be771e3567..8430e21204 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -174,10 +174,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PTX does not support load / store predicate registers - setOperationAction(ISD::LOAD, MVT::i1, Expand); + setOperationAction(ISD::LOAD, MVT::i1, Custom); + setOperationAction(ISD::STORE, MVT::i1, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setOperationAction(ISD::STORE, MVT::i1, Expand); setTruncStoreAction(MVT::i64, MVT::i1, Expand); setTruncStoreAction(MVT::i32, MVT::i1, Expand); setTruncStoreAction(MVT::i16, MVT::i1, Expand); @@ -856,11 +857,66 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EXTRACT_SUBVECTOR: return Op; case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); default: llvm_unreachable("Custom lowering not defined for operation"); } } + +// v = ld i1* addr +// => +// v1 = ld i8* addr +// v = trunc v1 to i1 +SDValue NVPTXTargetLowering:: +LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + LoadSDNode *LD = cast<LoadSDNode>(Node); + DebugLoc dl = Node->getDebugLoc(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + assert(ExtType == ISD::NON_EXTLOAD) ; + EVT VT = Node->getValueType(0); + assert(VT == MVT::i1 && "Custom lowering for i1 load only"); + SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), + LD->getAlignment()); + SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); + // The legalizer (the caller) is expecting two values from the legalized + // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() + // in LegalizeDAG.cpp which also uses MergeValues. + SDValue Ops[] = {result, LD->getChain()}; + return DAG.getMergeValues(Ops, 2, dl); +} + +// st i1 v, addr +// => +// v1 = zxt v to i8 +// st i8, addr +SDValue NVPTXTargetLowering:: +LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + StoreSDNode *ST = cast<StoreSDNode>(Node); + SDValue Tmp1 = ST->getChain(); + SDValue Tmp2 = ST->getBasePtr(); + SDValue Tmp3 = ST->getValue(); + EVT VT = Tmp3.getValueType(); + assert(VT == MVT::i1 && "Custom lowering for i1 store only"); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, + MVT::i8, Tmp3); + SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + return Result; +} + + SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx, EVT v) const { diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 86246e6449..94a177ceb0 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -138,6 +138,9 @@ private: SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; }; } // namespace llvm diff --git a/test/CodeGen/NVPTX/global-ordering.ll b/test/CodeGen/NVPTX/global-ordering.ll new file mode 100644 index 0000000000..43394a79e9 --- /dev/null +++ b/test/CodeGen/NVPTX/global-ordering.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 + +; Make sure we emit these globals in def-use order + + +; PTX32: .visible .global .align 1 .u8 a = 2; +; PTX32-NEXT: .visible .global .align 4 .u32 a2 = a; +; PTX64: .visible .global .align 1 .u8 a = 2; +; PTX64-NEXT: .visible .global .align 8 .u64 a2 = a; +@a2 = addrspace(1) global i8 addrspace(1)* @a +@a = addrspace(1) global i8 2 + + +; PTX32: .visible .global .align 1 .u8 b = 1; +; PTX32-NEXT: .visible .global .align 4 .u32 b2[2] = {b, b}; +; PTX64: .visible .global .align 1 .u8 b = 1; +; PTX64-NEXT: .visible .global .align 8 .u64 b2[2] = {b, b}; +@b2 = addrspace(1) global [2 x i8 addrspace(1)*] [i8 addrspace(1)* @b, i8 addrspace(1)* @b] +@b = addrspace(1) global i8 1 diff --git a/test/CodeGen/NVPTX/pr13291-i1-store.ll b/test/CodeGen/NVPTX/pr13291-i1-store.ll new file mode 100644 index 0000000000..779f7798d8 --- /dev/null +++ b/test/CodeGen/NVPTX/pr13291-i1-store.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 + +define ptx_kernel void @t1(i1* %a) { +; PTX32: mov.u16 %rc{{[0-9]+}}, 0; +; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}; +; PTX64: mov.u16 %rc{{[0-9]+}}, 0; +; PTX64-NEXT: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}; + store i1 false, i1* %a + ret void +} + + +define ptx_kernel void @t2(i1* %a, i8* %b) { +; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: and.b16 temp, %rc{{[0-9]+}}, 1; +; PTX32: setp.b16.eq %p{{[0-9]+}}, temp, 1; +; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: and.b16 temp, %rc{{[0-9]+}}, 1; +; PTX64: setp.b16.eq %p{{[0-9]+}}, temp, 1; + + %t1 = load i1* %a + %t2 = select i1 %t1, i8 1, i8 2 + store i8 %t2, i8* %b + ret void +} |