diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-08-14 23:24:53 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-08-14 23:24:53 +0000 |
commit | e8e33f448e8830590c498ac5101ef8b27446ca3b (patch) | |
tree | 2aada1408e415c2de01557c54d4804568f3bdd09 | |
parent | 68db37b952be497c94c7aa98cf26f3baadb5afd3 (diff) | |
download | llvm-e8e33f448e8830590c498ac5101ef8b27446ca3b.tar.gz llvm-e8e33f448e8830590c498ac5101ef8b27446ca3b.tar.bz2 llvm-e8e33f448e8830590c498ac5101ef8b27446ca3b.tar.xz |
R600/SI: Replace v1i32 type with i32 in imageload and sample intrinsics
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188430 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 2 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.td | 2 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 2 | ||||
-rw-r--r-- | lib/Target/R600/SITypeRewriter.cpp | 16 | ||||
-rw-r--r-- | test/CodeGen/R600/llvm.SI.sample.ll | 17 |
5 files changed, 35 insertions, 4 deletions
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 40f082723a..30a510de91 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -43,8 +43,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::i32, &AMDGPU::VSrc_32RegClass); addRegisterClass(MVT::f32, &AMDGPU::VSrc_32RegClass); - addRegisterClass(MVT::v1i32, &AMDGPU::VSrc_32RegClass); - addRegisterClass(MVT::f64, &AMDGPU::VSrc_64RegClass); addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass); addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass); diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index b7419782d3..2639456223 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -27,7 +27,7 @@ def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT", >; class SDSample<string opcode> : SDNode <opcode, - SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVec<1>, SDTCisVT<2, v32i8>, + SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>, SDTCisVT<3, i128>, SDTCisVT<4, i32>]> >; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 4704217489..e719cb3270 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1326,7 +1326,7 @@ def : Pat < /* SIsample for simple 1D texture lookup */ def : Pat < - (SIsample v1i32:$addr, v32i8:$rsrc, i128:$sampler, imm), + (SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm), (IMAGE_SAMPLE_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp index 9da11e88eb..f194d8b56d 100644 --- a/lib/Target/R600/SITypeRewriter.cpp +++ b/lib/Target/R600/SITypeRewriter.cpp @@ -16,6 +16,9 @@ /// legal for some compute APIs, and we don't want to declare it as legal /// in the backend, because we want the legalizer to expand all v16i8 /// operations. +/// v1* => * +/// - Having v1* types complicates the legalizer and we can easily replace +/// - them with the element type. //===----------------------------------------------------------------------===// #include "AMDGPU.h" @@ -109,6 +112,19 @@ void SITypeRewriter::visitCallInst(CallInst &I) { Types.push_back(i128); NeedToReplace = true; Name = Name + ".i128"; + } else if (Arg->getType()->isVectorTy() && + Arg->getType()->getVectorNumElements() == 1 && + Arg->getType()->getVectorElementType() == + Type::getInt32Ty(I.getContext())){ + Type *ElementTy = Arg->getType()->getVectorElementType(); + std::string TypeName = "i32"; + InsertElementInst *Def = dyn_cast<InsertElementInst>(Arg); + assert(Def); + Args.push_back(Def->getOperand(1)); + Types.push_back(ElementTy); + std::string VecTypeName = "v1" + TypeName; + Name = Name.replace(Name.find(VecTypeName), VecTypeName.length(), TypeName); + NeedToReplace = true; } else { Args.push_back(Arg); Types.push_back(Arg->getType()); diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll index 1c830a9919..2651b99a42 100644 --- a/test/CodeGen/R600/llvm.SI.sample.ll +++ b/test/CodeGen/R600/llvm.SI.sample.ll @@ -135,6 +135,23 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { ret void } +; CHECK: @v1 +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 15 +define void @v1(i32 %a1) { +entry: + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0) + %2 = extractelement <4 x float> %1, i32 0 + %3 = extractelement <4 x float> %1, i32 1 + %4 = extractelement <4 x float> %1, i32 2 + %5 = extractelement <4 x float> %1, i32 3 + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %5) + ret void +} + + +declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone + declare <4 x float> @llvm.SI.sample.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) |