summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-04-17 21:00:11 +0000
committerTom Stellard <thomas.stellard@amd.com>2014-04-17 21:00:11 +0000
commit93ea1378d22929c3e0e49b68f7f8b9bf3f2ad221 (patch)
tree55dc16980d35cbffb1a7136f2ebde2c9f17fe2d9
parentec6e62ec98afe0397c006a4036dff57cccee163e (diff)
downloadllvm-93ea1378d22929c3e0e49b68f7f8b9bf3f2ad221.tar.gz
llvm-93ea1378d22929c3e0e49b68f7f8b9bf3f2ad221.tar.bz2
llvm-93ea1378d22929c3e0e49b68f7f8b9bf3f2ad221.tar.xz
R600/SI: Stop using i128 as the resource descriptor type
Having i128 as a legal type complicates the legalization phase. v4i32 is already a legal type, so we will use that instead. This fixes several piglit tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206500 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/SIISelLowering.cpp31
-rw-r--r--lib/Target/R600/SIISelLowering.h1
-rw-r--r--lib/Target/R600/SIInstrInfo.td8
-rw-r--r--lib/Target/R600/SIInstructions.td40
-rw-r--r--lib/Target/R600/SIRegisterInfo.td12
-rw-r--r--lib/Target/R600/SITypeRewriter.cpp18
-rw-r--r--test/CodeGen/R600/store.ll26
7 files changed, 70 insertions, 66 deletions
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 1c7f5f00c4..c3ad46a4fa 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -42,9 +42,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass);
addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass);
- addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
- addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
- addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass);
+ addRegisterClass(MVT::v4i32, &AMDGPU::VSrc_128RegClass);
+ addRegisterClass(MVT::v4f32, &AMDGPU::VSrc_128RegClass);
addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
@@ -78,8 +77,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::ADDC, MVT::i32, Legal);
setOperationAction(ISD::ADDE, MVT::i32, Legal);
- setOperationAction(ISD::BITCAST, MVT::i128, Legal);
-
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
@@ -99,7 +96,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);
- setOperationAction(ISD::STORE, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
@@ -164,7 +160,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setTruncStoreAction(MVT::i32, MVT::i16, Custom);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
- setTruncStoreAction(MVT::i128, MVT::i64, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
@@ -595,7 +590,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
AMDGPU::VGPR2, VT);
case AMDGPUIntrinsic::SI_load_const: {
SDValue Ops [] = {
- ResourceDescriptorToi128(Op.getOperand(1), DAG),
+ Op.getOperand(1),
Op.getOperand(2)
};
@@ -616,7 +611,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
case AMDGPUIntrinsic::SI_vs_load_input:
return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
- ResourceDescriptorToi128(Op.getOperand(1), DAG),
+ Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
}
@@ -631,7 +626,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Ops [] = {
Chain,
- ResourceDescriptorToi128(Op.getOperand(2), DAG),
+ Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(4),
Op.getOperand(5),
@@ -799,26 +794,12 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
-SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op,
- SelectionDAG &DAG) const {
-
- if (Op.getValueType() == MVT::i128) {
- return Op;
- }
-
- assert(Op.getOpcode() == ISD::UNDEF);
-
- return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), MVT::i128,
- DAG.getConstant(0, MVT::i64),
- DAG.getConstant(0, MVT::i64));
-}
-
SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
const SDValue &Op,
SelectionDAG &DAG) const {
return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
Op.getOperand(2),
- ResourceDescriptorToi128(Op.getOperand(3), DAG),
+ Op.getOperand(3),
Op.getOperand(4));
}
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index f3a52cb7f9..ff90831067 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -33,7 +33,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
- SDValue ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const;
bool foldImm(SDValue &Operand, int32_t &Immediate,
bool &ScalarSlotUsed) const;
const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG,
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index ac72739904..93de9aa460 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -17,13 +17,13 @@ def SIadd64bit32bit : SDNode<"ISD::ADD",
>;
def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
- SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, i128>, SDTCisVT<2, i32>]>,
+ SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>,
[SDNPMayLoad, SDNPMemOperand]
>;
def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
SDTypeProfile<0, 13,
- [SDTCisVT<0, i128>, // rsrc(SGPR)
+ [SDTCisVT<0, v4i32>, // rsrc(SGPR)
SDTCisVT<1, iAny>, // vdata(VGPR)
SDTCisVT<2, i32>, // num_channels(imm)
SDTCisVT<3, i32>, // vaddr(VGPR)
@@ -41,13 +41,13 @@ def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
>;
def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
- SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>,
+ SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i16>,
SDTCisVT<3, i32>]>
>;
class SDSample<string opcode> : SDNode <opcode,
SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>,
- SDTCisVT<3, i128>, SDTCisVT<4, i32>]>
+ SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
>;
def SIsample : SDSample<"AMDGPUISD::SAMPLE">;
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index e872bd8cc6..036b5aa1d7 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1456,7 +1456,7 @@ def : Pat <
/* int_SI_vs_load_input */
def : Pat<
- (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
+ (SIload_input v4i32:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
>;
@@ -1479,34 +1479,34 @@ def : Pat <
/* SIsample for simple 1D texture lookup */
def : Pat <
- (SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm),
+ (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
(IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, imm),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_RECT),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_RECT),
(opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_ARRAY),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleShadowPattern<SDNode name, MIMG opcode,
ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleShadowArrayPattern<SDNode name, MIMG opcode,
ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW_ARRAY),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
@@ -1695,8 +1695,6 @@ def : BitConvert <i64, v2i32, VReg_64>;
def : BitConvert <v4f32, v4i32, VReg_128>;
def : BitConvert <v4i32, v4f32, VReg_128>;
-def : BitConvert <v4i32, i128, VReg_128>;
-def : BitConvert <i128, v4i32, VReg_128>;
def : BitConvert <v8f32, v8i32, SReg_256>;
def : BitConvert <v8i32, v8f32, SReg_256>;
@@ -1865,19 +1863,19 @@ def : Ext32Pat <anyext>;
// 1. Offset as 8bit DWORD immediate
def : Pat <
- (SIload_constant i128:$sbase, IMM8bitDWORD:$offset),
+ (SIload_constant v4i32:$sbase, IMM8bitDWORD:$offset),
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset))
>;
// 2. Offset loaded in an 32bit SGPR
def : Pat <
- (SIload_constant i128:$sbase, imm:$offset),
+ (SIload_constant v4i32:$sbase, imm:$offset),
(S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
>;
// 3. Offset in an 32Bit VGPR
def : Pat <
- (SIload_constant i128:$sbase, i32:$voff),
+ (SIload_constant v4i32:$sbase, i32:$voff),
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)
>;
@@ -1979,7 +1977,6 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>;
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, i128>;
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
@@ -2071,7 +2068,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
MUBUF bothen> {
def : Pat <
- (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm:$offset, 0, 0, imm:$glc, imm:$slc,
imm:$tfe)),
(offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
@@ -2079,7 +2076,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
>;
def : Pat <
- (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm, 1, 0, imm:$glc, imm:$slc,
imm:$tfe)),
(offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
@@ -2087,7 +2084,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
>;
def : Pat <
- (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm:$offset, 0, 1, imm:$glc, imm:$slc,
imm:$tfe)),
(idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
@@ -2095,7 +2092,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
>;
def : Pat <
- (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset,
+ (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
imm, 1, 1, imm:$glc, imm:$slc,
imm:$tfe)),
(bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
@@ -2116,7 +2113,7 @@ defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_
// TBUFFER_STORE_FORMAT_*, addr64=0
class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<
- (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
+ (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
i32:$soffset, imm:$inst_offset, imm:$dfmt,
imm:$nfmt, imm:$offen, imm:$idxen,
imm:$glc, imm:$slc, imm:$tfe),
@@ -2241,13 +2238,6 @@ def : Pat<
//===----------------------------------------------------------------------===//
def : Pat <
- (i64 (trunc i128:$x)),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 (EXTRACT_SUBREG $x, sub0)), sub0),
- (i32 (EXTRACT_SUBREG $x, sub1)), sub1)
->;
-
-def : Pat <
(i32 (trunc i64:$a)),
(EXTRACT_SUBREG $a, sub0)
>;
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 65cf311dd9..6d6d8b9bd8 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -168,7 +168,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
(add SGPR_64Regs, VCCReg, EXECReg)
>;
-def SReg_128 : RegisterClass<"AMDGPU", [i128, v4i32], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32], 128, (add SGPR_128)>;
def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
@@ -183,14 +183,14 @@ def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
let Size = 96;
}
-def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, i128], 128, (add VGPR_128)>;
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>;
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
//===----------------------------------------------------------------------===//
-// [SV]Src_* register classes, can have either an immediate or an register
+// [SV]Src_(32|64) register classes, can have either an immediate or an register
//===----------------------------------------------------------------------===//
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
@@ -201,3 +201,9 @@ def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
+//===----------------------------------------------------------------------===//
+// SGPR and VGPR register classes
+//===----------------------------------------------------------------------===//
+
+def VSrc_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128,
+ (add VReg_128, SReg_128)>;
diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp
index 9bf2caf217..fb374ca42d 100644
--- a/lib/Target/R600/SITypeRewriter.cpp
+++ b/lib/Target/R600/SITypeRewriter.cpp
@@ -35,7 +35,7 @@ class SITypeRewriter : public FunctionPass,
static char ID;
Module *Mod;
Type *v16i8;
- Type *i128;
+ Type *v4i32;
public:
SITypeRewriter() : FunctionPass(ID) { }
@@ -56,7 +56,7 @@ char SITypeRewriter::ID = 0;
bool SITypeRewriter::doInitialization(Module &M) {
Mod = &M;
v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16);
- i128 = Type::getIntNTy(M.getContext(), 128);
+ v4i32 = VectorType::get(Type::getInt32Ty(M.getContext()), 4);
return false;
}
@@ -84,7 +84,8 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) {
Type *ElemTy = PtrTy->getPointerElementType();
IRBuilder<> Builder(&I);
if (ElemTy == v16i8) {
- Value *BitCast = Builder.CreateBitCast(Ptr, Type::getIntNPtrTy(I.getContext(), 128, 2));
+ Value *BitCast = Builder.CreateBitCast(Ptr,
+ PointerType::get(v4i32,PtrTy->getPointerAddressSpace()));
LoadInst *Load = Builder.CreateLoad(BitCast);
SmallVector <std::pair<unsigned, MDNode*>, 8> MD;
I.getAllMetadataOtherThanDebugLoc(MD);
@@ -99,6 +100,7 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) {
void SITypeRewriter::visitCallInst(CallInst &I) {
IRBuilder<> Builder(&I);
+
SmallVector <Value*, 8> Args;
SmallVector <Type*, 8> Types;
bool NeedToReplace = false;
@@ -107,10 +109,10 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
Value *Arg = I.getArgOperand(i);
if (Arg->getType() == v16i8) {
- Args.push_back(Builder.CreateBitCast(Arg, i128));
- Types.push_back(i128);
+ Args.push_back(Builder.CreateBitCast(Arg, v4i32));
+ Types.push_back(v4i32);
NeedToReplace = true;
- Name = Name + ".i128";
+ Name = Name + ".v4i32";
} else if (Arg->getType()->isVectorTy() &&
Arg->getType()->getVectorNumElements() == 1 &&
Arg->getType()->getVectorElementType() ==
@@ -144,12 +146,12 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
void SITypeRewriter::visitBitCast(BitCastInst &I) {
IRBuilder<> Builder(&I);
- if (I.getDestTy() != i128) {
+ if (I.getDestTy() != v4i32) {
return;
}
if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) {
- if (Op->getSrcTy() == i128) {
+ if (Op->getSrcTy() == v4i32) {
I.replaceAllUsesWith(Op->getOperand(0));
I.eraseFromParent();
}
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
index a3c5331f3f..b29ad7e3ab 100644
--- a/test/CodeGen/R600/store.ll
+++ b/test/CodeGen/R600/store.ll
@@ -297,3 +297,29 @@ entry:
}
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; When i128 was a legal type this program generated cannot select errors:
+
+; FUNC-LABEL: @i128-const-store
+; FIXME: We should be able to to this with one store instruction
+; EG-CHECK: STORE_RAW
+; EG-CHECK: STORE_RAW
+; EG-CHECK: STORE_RAW
+; EG-CHECK: STORE_RAW
+; CM-CHECK: STORE_DWORD
+; CM-CHECK: STORE_DWORD
+; CM-CHECK: STORE_DWORD
+; CM-CHECK: STORE_DWORD
+; SI: BUFFER_STORE_DWORDX2
+; SI: BUFFER_STORE_DWORDX2
+define void @i128-const-store(i32 addrspace(1)* %out) {
+entry:
+ store i32 1, i32 addrspace(1)* %out, align 4
+ %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i64 1
+ store i32 1, i32 addrspace(1)* %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds i32 addrspace(1)* %out, i64 2
+ store i32 2, i32 addrspace(1)* %arrayidx4, align 4
+ %arrayidx6 = getelementptr inbounds i32 addrspace(1)* %out, i64 3
+ store i32 2, i32 addrspace(1)* %arrayidx6, align 4
+ ret void
+}