summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2014-02-02 00:05:35 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2014-02-02 00:05:35 +0000
commitb2abb9752eaba1395b5b6773fbf645036eb992f2 (patch)
tree698fdb39539e3c8775dca90288f7502e185fa054
parenteb97c0499bda650d84bba2079d4007a4d3ec0246 (diff)
downloadllvm-b2abb9752eaba1395b5b6773fbf645036eb992f2.tar.gz
llvm-b2abb9752eaba1395b5b6773fbf645036eb992f2.tar.bz2
llvm-b2abb9752eaba1395b5b6773fbf645036eb992f2.tar.xz
R600/SI: Fix insertelement with dynamic indices.
This didn't work for any integer vectors, and didn't work with some sizes of float vectors. This should now work with all sizes of float and i32 vectors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200619 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/SIInstructions.td24
-rw-r--r--test/CodeGen/R600/insert_vector_elt.ll180
2 files changed, 186 insertions, 18 deletions
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 912b59a9e7..a5a0dbb711 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1671,8 +1671,13 @@ def : BitConvert <i128, v4i32, VReg_128>;
def : BitConvert <v8i32, v32i8, SReg_256>;
def : BitConvert <v32i8, v8i32, SReg_256>;
def : BitConvert <v8i32, v32i8, VReg_256>;
+def : BitConvert <v8i32, v8f32, VReg_256>;
+def : BitConvert <v8f32, v8i32, VReg_256>;
def : BitConvert <v32i8, v8i32, VReg_256>;
+def : BitConvert <v16i32, v16f32, VReg_512>;
+def : BitConvert <v16f32, v16i32, VReg_512>;
+
/********** =================== **********/
/********** Src & Dst modifiers **********/
/********** =================== **********/
@@ -2064,7 +2069,7 @@ def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
/********** Indirect adressing **********/
/********** ====================== **********/
-multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> {
+multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST IndDst> {
// 1. Extract with offset
def : Pat<
@@ -2080,21 +2085,26 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> {
// 3. Insert with offset
def : Pat<
- (vector_insert vt:$vec, f32:$val, (add i32:$idx, imm:$off)),
+ (vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
(IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val)
>;
// 4. Insert without offset
def : Pat<
- (vector_insert vt:$vec, f32:$val, i32:$idx),
+ (vector_insert vt:$vec, eltvt:$val, i32:$idx),
(IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val)
>;
}
-defm : SI_INDIRECT_Pattern <v2f32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <v4f32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <v8f32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <v16f32, SI_INDIRECT_DST_V16>;
+defm : SI_INDIRECT_Pattern <v2f32, f32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <v4f32, f32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <v8f32, f32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <v16f32, f32, SI_INDIRECT_DST_V16>;
+
+defm : SI_INDIRECT_Pattern <v2i32, i32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <v4i32, i32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <v8i32, i32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>;
/********** =============== **********/
/********** Conditions **********/
diff --git a/test/CodeGen/R600/insert_vector_elt.ll b/test/CodeGen/R600/insert_vector_elt.ll
index 7e04a1f4bc..530d1ccbbc 100644
--- a/test/CodeGen/R600/insert_vector_elt.ll
+++ b/test/CodeGen/R600/insert_vector_elt.ll
@@ -1,17 +1,175 @@
-; REQUIRES: asserts
-; XFAIL: *
-; RUN: llc < %s -march=r600 -mcpu=redwood -o %t
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
-define void @var_insert(<4 x i32> addrspace(1)* %out, <4 x i32> %x, i32 %val, i32 %idx) nounwind {
-entry:
- %tmp3 = insertelement <4 x i32> %x, i32 %val, i32 %idx ; <<4 x i32>> [#uses=1]
- store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out
+; FIXME: Broken on evergreen
+; FIXME: For some reason the 8 and 16 vectors are being stored as
+; individual elements instead of 128-bit stores.
+
+
+; FIXME: Why is the constant moved into the intermediate register and
+; not just directly into the vector component?
+
+; SI-LABEL: @insertelement_v4f32_0:
+; S_LOAD_DWORDX4 s{{[}}[[LOW_REG:[0-9]+]]:
+; V_MOV_B32_e32
+; V_MOV_B32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
+; V_MOV_B32_e32 v[[LOW_REG]], [[CONSTREG]]
+; BUFFER_STORE_DWORDX4 v{{[}}[[LOW_REG]]:
+define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
+ %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
+ store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: @insertelement_v4f32_1:
+define void @insertelement_v4f32_1(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
+ %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 1
+ store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: @insertelement_v4f32_2:
+define void @insertelement_v4f32_2(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
+ %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 2
+ store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: @insertelement_v4f32_3:
+define void @insertelement_v4f32_3(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
+ %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 3
+ store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: @insertelement_v4i32_0:
+define void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) nounwind {
+ %vecins = insertelement <4 x i32> %a, i32 999, i32 0
+ store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: @dynamic_insertelement_v2f32:
+; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00
+; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
+; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[LOW_RESULT_REG]]:
+define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind {
+ %vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 %b
+ store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL: @dynamic_insertelement_v4f32:
+; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00
+; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
+; SI: BUFFER_STORE_DWORDX4 {{v\[}}[[LOW_RESULT_REG]]:
+define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind {
+ %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b
+ store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: @dynamic_insertelement_v8f32:
+; FIXMESI: BUFFER_STORE_DWORDX4
+; FIXMESI: BUFFER_STORE_DWORDX4
+define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
+ %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
+ store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
+ ret void
+}
+
+; SI-LABEL: @dynamic_insertelement_v16f32:
+; FIXMESI: BUFFER_STORE_DWORDX4
+; FIXMESI: BUFFER_STORE_DWORDX4
+; FIXMESI: BUFFER_STORE_DWORDX4
+; FIXMESI: BUFFER_STORE_DWORDX4
+define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
+ %vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b
+ store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64
+ ret void
+}
+
+; SI-LABEL: @dynamic_insertelement_v2i32:
+; SI: BUFFER_STORE_DWORDX2
+define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind {
+ %vecins = insertelement <2 x i32> %a, i32 5, i32 %b
+ store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL: @dynamic_insertelement_v4i32:
+; SI: BUFFER_STORE_DWORDX4
+define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b) nounwind {
+ %vecins = insertelement <4 x i32> %a, i32 5, i32 %b
+ store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: @dynamic_insertelement_v8i32:
+; FIXMESI: BUFFER_STORE_DWORDX4
+; FIXMESI: BUFFER_STORE_DWORDX4
+define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind {
+ %vecins = insertelement <8 x i32> %a, i32 5, i32 %b
+ store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32
+ ret void
+}
+
+; SI-LABEL: @dynamic_insertelement_v16i32:
+; FIXMESI: BUFFER_STORE_DWORDX4
+; FIXMESI: BUFFER_STORE_DWORDX4
+; FIXMESI: BUFFER_STORE_DWORDX4
+; FIXMESI: BUFFER_STORE_DWORDX4
+define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind {
+ %vecins = insertelement <16 x i32> %a, i32 5, i32 %b
+ store <16 x i32> %vecins, <16 x i32> addrspace(1)* %out, align 64
+ ret void
+}
+
+
+; SI-LABEL: @dynamic_insertelement_v2i16:
+; FIXMESI: BUFFER_STORE_DWORDX2
+define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind {
+ %vecins = insertelement <2 x i16> %a, i16 5, i32 %b
+ store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL: @dynamic_insertelement_v4i16:
+; FIXMESI: BUFFER_STORE_DWORDX4
+define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind {
+ %vecins = insertelement <4 x i16> %a, i16 5, i32 %b
+ store <4 x i16> %vecins, <4 x i16> addrspace(1)* %out, align 16
+ ret void
+}
+
+
+; SI-LABEL: @dynamic_insertelement_v2i8:
+; FIXMESI: BUFFER_STORE_USHORT
+define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a, i32 %b) nounwind {
+ %vecins = insertelement <2 x i8> %a, i8 5, i32 %b
+ store <2 x i8> %vecins, <2 x i8> addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL: @dynamic_insertelement_v4i8:
+; FIXMESI: BUFFER_STORE_DWORD
+define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind {
+ %vecins = insertelement <4 x i8> %a, i8 5, i32 %b
+ store <4 x i8> %vecins, <4 x i8> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: @dynamic_insertelement_v8i8:
+; FIXMESI: BUFFER_STORE_DWORDX2
+define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind {
+ %vecins = insertelement <8 x i8> %a, i8 5, i32 %b
+ store <8 x i8> %vecins, <8 x i8> addrspace(1)* %out, align 16
ret void
}
-define void @var_extract(i32 addrspace(1)* %out, <4 x i32> %x, i32 %idx) nounwind {
-entry:
- %tmp3 = extractelement <4 x i32> %x, i32 %idx ; <<i32>> [#uses=1]
- store i32 %tmp3, i32 addrspace(1)* %out
+; SI-LABEL: @dynamic_insertelement_v16i8:
+; FIXMESI: BUFFER_STORE_DWORDX4
+define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind {
+ %vecins = insertelement <16 x i8> %a, i8 5, i32 %b
+ store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16
ret void
}