diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2014-04-17 21:00:11 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2014-04-17 21:00:11 +0000 |
commit | 93ea1378d22929c3e0e49b68f7f8b9bf3f2ad221 (patch) | |
tree | 55dc16980d35cbffb1a7136f2ebde2c9f17fe2d9 | |
parent | ec6e62ec98afe0397c006a4036dff57cccee163e (diff) | |
download | llvm-93ea1378d22929c3e0e49b68f7f8b9bf3f2ad221.tar.gz llvm-93ea1378d22929c3e0e49b68f7f8b9bf3f2ad221.tar.bz2 llvm-93ea1378d22929c3e0e49b68f7f8b9bf3f2ad221.tar.xz |
R600/SI: Stop using i128 as the resource descriptor type
Having i128 as a legal type complicates the legalization phase. v4i32
is already a legal type, so we will use that instead.
This fixes several piglit tests.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206500 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 31 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.h | 1 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.td | 8 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 40 | ||||
-rw-r--r-- | lib/Target/R600/SIRegisterInfo.td | 12 | ||||
-rw-r--r-- | lib/Target/R600/SITypeRewriter.cpp | 18 | ||||
-rw-r--r-- | test/CodeGen/R600/store.ll | 26 |
7 files changed, 70 insertions, 66 deletions
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 1c7f5f00c4..c3ad46a4fa 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -42,9 +42,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass); addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass); - addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass); - addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); - addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass); + addRegisterClass(MVT::v4i32, &AMDGPU::VSrc_128RegClass); + addRegisterClass(MVT::v4f32, &AMDGPU::VSrc_128RegClass); addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass); addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass); @@ -78,8 +77,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::ADDC, MVT::i32, Legal); setOperationAction(ISD::ADDE, MVT::i32, Legal); - setOperationAction(ISD::BITCAST, MVT::i128, Legal); - // We need to custom lower vector stores from local memory setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); @@ -99,7 +96,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::i64, Custom); - setOperationAction(ISD::STORE, MVT::i128, Custom); setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); @@ -164,7 +160,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setTruncStoreAction(MVT::i32, MVT::i16, Custom); setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::i64, MVT::i32, Expand); - setTruncStoreAction(MVT::i128, MVT::i64, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); @@ -595,7 +590,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { AMDGPU::VGPR2, VT); case AMDGPUIntrinsic::SI_load_const: { SDValue Ops [] = { - ResourceDescriptorToi128(Op.getOperand(1), DAG), + Op.getOperand(1), Op.getOperand(2) }; @@ -616,7 +611,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG); case AMDGPUIntrinsic::SI_vs_load_input: return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT, - ResourceDescriptorToi128(Op.getOperand(1), DAG), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } @@ -631,7 +626,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Ops [] = { Chain, - ResourceDescriptorToi128(Op.getOperand(2), DAG), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(4), Op.getOperand(5), @@ -799,26 +794,12 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } -SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op, - SelectionDAG &DAG) const { - - if (Op.getValueType() == MVT::i128) { - return Op; - } - - assert(Op.getOpcode() == ISD::UNDEF); - - return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), MVT::i128, - DAG.getConstant(0, MVT::i64), - DAG.getConstant(0, MVT::i64)); -} - SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op, SelectionDAG &DAG) const { return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), - ResourceDescriptorToi128(Op.getOperand(3), DAG), + Op.getOperand(3), Op.getOperand(4)); } diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index f3a52cb7f9..ff90831067 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -33,7 +33,6 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; - SDValue ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const; bool foldImm(SDValue &Operand, int32_t &Immediate, bool &ScalarSlotUsed) const; const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG, diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index ac72739904..93de9aa460 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -17,13 +17,13 @@ def SIadd64bit32bit : SDNode<"ISD::ADD", >; def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT", - SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, i128>, SDTCisVT<2, i32>]>, + SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>, [SDNPMayLoad, SDNPMemOperand] >; def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTypeProfile<0, 13, - [SDTCisVT<0, i128>, // rsrc(SGPR) + [SDTCisVT<0, v4i32>, // rsrc(SGPR) SDTCisVT<1, iAny>, // vdata(VGPR) SDTCisVT<2, i32>, // num_channels(imm) SDTCisVT<3, i32>, // vaddr(VGPR) @@ -41,13 +41,13 @@ def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", >; def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT", - SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>, + SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i16>, SDTCisVT<3, i32>]> >; class SDSample<string opcode> : SDNode <opcode, SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>, - SDTCisVT<3, i128>, SDTCisVT<4, i32>]> + SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]> >; def SIsample : SDSample<"AMDGPUISD::SAMPLE">; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index e872bd8cc6..036b5aa1d7 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1456,7 +1456,7 @@ def : Pat < /* int_SI_vs_load_input */ def : Pat< - (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr), + (SIload_input v4i32:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr), (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0) >; @@ -1479,34 +1479,34 @@ def : Pat < /* SIsample for simple 1D texture lookup */ def : Pat < - (SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm), + (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm), (IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, imm), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, imm), (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_RECT), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_RECT), (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_ARRAY), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_ARRAY), (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleShadowPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW), (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleShadowArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW_ARRAY), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY), (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; @@ -1695,8 +1695,6 @@ def : BitConvert <i64, v2i32, VReg_64>; def : BitConvert <v4f32, v4i32, VReg_128>; def : BitConvert <v4i32, v4f32, VReg_128>; -def : BitConvert <v4i32, i128, VReg_128>; -def : BitConvert <i128, v4i32, VReg_128>; def : BitConvert <v8f32, v8i32, SReg_256>; def : BitConvert <v8i32, v8f32, SReg_256>; @@ -1865,19 +1863,19 @@ def : Ext32Pat <anyext>; // 1. Offset as 8bit DWORD immediate def : Pat < - (SIload_constant i128:$sbase, IMM8bitDWORD:$offset), + (SIload_constant v4i32:$sbase, IMM8bitDWORD:$offset), (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset)) >; // 2. Offset loaded in an 32bit SGPR def : Pat < - (SIload_constant i128:$sbase, imm:$offset), + (SIload_constant v4i32:$sbase, imm:$offset), (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset)) >; // 3. Offset in an 32Bit VGPR def : Pat < - (SIload_constant i128:$sbase, i32:$voff), + (SIload_constant v4i32:$sbase, i32:$voff), (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0) >; @@ -1979,7 +1977,6 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>; defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>; defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>; defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>; -defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, i128>; defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>; defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>; defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>; @@ -2071,7 +2068,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe MUBUF bothen> { def : Pat < - (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm:$offset, 0, 0, imm:$glc, imm:$slc, imm:$tfe)), (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), @@ -2079,7 +2076,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe >; def : Pat < - (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm, 1, 0, imm:$glc, imm:$slc, imm:$tfe)), (offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc), @@ -2087,7 +2084,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe >; def : Pat < - (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm:$offset, 0, 1, imm:$glc, imm:$slc, imm:$tfe)), (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), @@ -2095,7 +2092,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe >; def : Pat < - (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset, + (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset, imm, 1, 1, imm:$glc, imm:$slc, imm:$tfe)), (bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc), @@ -2116,7 +2113,7 @@ defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_ // TBUFFER_STORE_FORMAT_*, addr64=0 class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat< - (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr, + (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr, i32:$soffset, imm:$inst_offset, imm:$dfmt, imm:$nfmt, imm:$offen, imm:$idxen, imm:$glc, imm:$slc, imm:$tfe), @@ -2241,13 +2238,6 @@ def : Pat< //===----------------------------------------------------------------------===// def : Pat < - (i64 (trunc i128:$x)), - (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (i32 (EXTRACT_SUBREG $x, sub0)), sub0), - (i32 (EXTRACT_SUBREG $x, sub1)), sub1) ->; - -def : Pat < (i32 (trunc i64:$a)), (EXTRACT_SUBREG $a, sub0) >; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 65cf311dd9..6d6d8b9bd8 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -168,7 +168,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64, (add SGPR_64Regs, VCCReg, EXECReg) >; -def SReg_128 : RegisterClass<"AMDGPU", [i128, v4i32], 128, (add SGPR_128)>; +def SReg_128 : RegisterClass<"AMDGPU", [v4i32], 128, (add SGPR_128)>; def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>; @@ -183,14 +183,14 @@ def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> { let Size = 96; } -def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, i128], 128, (add VGPR_128)>; +def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>; def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>; def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>; //===----------------------------------------------------------------------===// -// [SV]Src_* register classes, can have either an immediate or an register +// [SV]Src_(32|64) register classes, can have either an immediate or an register //===----------------------------------------------------------------------===// def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; @@ -201,3 +201,9 @@ def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>; def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>; +//===----------------------------------------------------------------------===// +// SGPR and VGPR register classes +//===----------------------------------------------------------------------===// + +def VSrc_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, + (add VReg_128, SReg_128)>; diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp index 9bf2caf217..fb374ca42d 100644 --- a/lib/Target/R600/SITypeRewriter.cpp +++ b/lib/Target/R600/SITypeRewriter.cpp @@ -35,7 +35,7 @@ class SITypeRewriter : public FunctionPass, static char ID; Module *Mod; Type *v16i8; - Type *i128; + Type *v4i32; public: SITypeRewriter() : FunctionPass(ID) { } @@ -56,7 +56,7 @@ char SITypeRewriter::ID = 0; bool SITypeRewriter::doInitialization(Module &M) { Mod = &M; v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16); - i128 = Type::getIntNTy(M.getContext(), 128); + v4i32 = VectorType::get(Type::getInt32Ty(M.getContext()), 4); return false; } @@ -84,7 +84,8 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) { Type *ElemTy = PtrTy->getPointerElementType(); IRBuilder<> Builder(&I); if (ElemTy == v16i8) { - Value *BitCast = Builder.CreateBitCast(Ptr, Type::getIntNPtrTy(I.getContext(), 128, 2)); + Value *BitCast = Builder.CreateBitCast(Ptr, + PointerType::get(v4i32,PtrTy->getPointerAddressSpace())); LoadInst *Load = Builder.CreateLoad(BitCast); SmallVector <std::pair<unsigned, MDNode*>, 8> MD; I.getAllMetadataOtherThanDebugLoc(MD); @@ -99,6 +100,7 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) { void SITypeRewriter::visitCallInst(CallInst &I) { IRBuilder<> Builder(&I); + SmallVector <Value*, 8> Args; SmallVector <Type*, 8> Types; bool NeedToReplace = false; @@ -107,10 +109,10 @@ void SITypeRewriter::visitCallInst(CallInst &I) { for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { Value *Arg = I.getArgOperand(i); if (Arg->getType() == v16i8) { - Args.push_back(Builder.CreateBitCast(Arg, i128)); - Types.push_back(i128); + Args.push_back(Builder.CreateBitCast(Arg, v4i32)); + Types.push_back(v4i32); NeedToReplace = true; - Name = Name + ".i128"; + Name = Name + ".v4i32"; } else if (Arg->getType()->isVectorTy() && Arg->getType()->getVectorNumElements() == 1 && Arg->getType()->getVectorElementType() == @@ -144,12 +146,12 @@ void SITypeRewriter::visitCallInst(CallInst &I) { void SITypeRewriter::visitBitCast(BitCastInst &I) { IRBuilder<> Builder(&I); - if (I.getDestTy() != i128) { + if (I.getDestTy() != v4i32) { return; } if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) { - if (Op->getSrcTy() == i128) { + if (Op->getSrcTy() == v4i32) { I.replaceAllUsesWith(Op->getOperand(0)); I.eraseFromParent(); } diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll index a3c5331f3f..b29ad7e3ab 100644 --- a/test/CodeGen/R600/store.ll +++ b/test/CodeGen/R600/store.ll @@ -297,3 +297,29 @@ entry: } attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +; When i128 was a legal type this program generated cannot select errors: + +; FUNC-LABEL: @i128-const-store +; FIXME: We should be able to to this with one store instruction +; EG-CHECK: STORE_RAW +; EG-CHECK: STORE_RAW +; EG-CHECK: STORE_RAW +; EG-CHECK: STORE_RAW +; CM-CHECK: STORE_DWORD +; CM-CHECK: STORE_DWORD +; CM-CHECK: STORE_DWORD +; CM-CHECK: STORE_DWORD +; SI: BUFFER_STORE_DWORDX2 +; SI: BUFFER_STORE_DWORDX2 +define void @i128-const-store(i32 addrspace(1)* %out) { +entry: + store i32 1, i32 addrspace(1)* %out, align 4 + %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i64 1 + store i32 1, i32 addrspace(1)* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds i32 addrspace(1)* %out, i64 2 + store i32 2, i32 addrspace(1)* %arrayidx4, align 4 + %arrayidx6 = getelementptr inbounds i32 addrspace(1)* %out, i64 3 + store i32 2, i32 addrspace(1)* %arrayidx6, align 4 + ret void +} |