summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-08-26 15:06:04 +0000
committerTom Stellard <thomas.stellard@amd.com>2013-08-26 15:06:04 +0000
commitd08a9303614355cfdcac5f2c27c09ce809565423 (patch)
tree4e2da5d0223dc6249c4f7105cb69864fb4162388
parenta01cdea9c660dc8b295782a4ab560d0039ff7571 (diff)
downloadllvm-d08a9303614355cfdcac5f2c27c09ce809565423.tar.gz
llvm-d08a9303614355cfdcac5f2c27c09ce809565423.tar.bz2
llvm-d08a9303614355cfdcac5f2c27c09ce809565423.tar.xz
R600: Add support for vector local memory loads
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189226 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp23
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h2
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp8
-rw-r--r--lib/Target/R600/SIISelLowering.cpp17
-rw-r--r--test/CodeGen/R600/load.ll14
5 files changed, 64 insertions, 0 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 88867b6e9d..1237323ee8 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -424,6 +424,29 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
return Op;
}
+SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
+ SelectionDAG &DAG) const {
+ LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
+ EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
+ EVT EltVT = Op.getValueType().getVectorElementType();
+ EVT PtrVT = Load->getBasePtr().getValueType();
+ unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
+ SmallVector<SDValue, 8> Loads;
+ SDLoc SL(Op);
+
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
+ DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
+ Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
+ Load->getChain(), Ptr,
+ MachinePointerInfo(Load->getMemOperand()->getValue()),
+ MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
+ Load->getAlignment()));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0],
+ Loads.size());
+}
+
SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
SelectionDAG &DAG) const {
StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index f739aed978..75ac4c2f18 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -50,6 +50,8 @@ protected:
unsigned Reg, EVT VT) const;
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const;
+ /// \brief Split a vector load into multiple scalar loads.
+ SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 9bc8e8a818..f0242b86c0 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -1155,6 +1155,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
SDValue Ptr = Op.getOperand(1);
SDValue LoweredLoad;
+ if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
+ SDValue MergedValues[2] = {
+ SplitVectorLoad(Op, DAG),
+ Chain
+ };
+ return DAG.getMergeValues(MergedValues, 2, DL);
+ }
+
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
if (ConstantBlock > -1) {
SDValue Result;
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 9cbba6c54e..f196059b89 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -66,6 +66,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::BITCAST, MVT::i128, Legal);
+ // We need to custom lower vector stores from local memory
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
@@ -368,6 +372,19 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::LOAD: {
+ LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
+ if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ Op.getValueType().isVector()) {
+ SDValue MergedValues[2] = {
+ SplitVectorLoad(Op, DAG),
+ Load->getChain()
+ };
+ return DAG.getMergeValues(MergedValues, 2, SDLoc(Op));
+ } else {
+ return SDValue();
+ }
+ }
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index ba8250650f..6cf1af71ae 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -516,3 +516,17 @@ entry:
store float %0, float addrspace(1)* %out
ret void
}
+
+; load a v2f32 value from the local address space
+; R600-CHECK: @load_v2f32_local
+; R600-CHECK: LDS_READ_RET
+; R600-CHECK: LDS_READ_RET
+; SI-CHECK: @load_v2f32_local
+; SI-CHECK: DS_READ_B32
+; SI-CHECK: DS_READ_B32
+define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
+entry:
+ %0 = load <2 x float> addrspace(3)* %in
+ store <2 x float> %0, <2 x float> addrspace(1)* %out
+ ret void
+}