summaryrefslogtreecommitdiff
path: root/test/CodeGen/R600
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-10-23 00:44:32 +0000
committerTom Stellard <thomas.stellard@amd.com>2013-10-23 00:44:32 +0000
commitf95b1621887e3409ceec2db47e1b44271d934735 (patch)
tree8b2655eb7bf40d51eedf9649d0a929e1fc2c6cb1 /test/CodeGen/R600
parentd0716b064744598ba7df33b8b47de0375c450570 (diff)
downloadllvm-f95b1621887e3409ceec2db47e1b44271d934735.tar.gz
llvm-f95b1621887e3409ceec2db47e1b44271d934735.tar.bz2
llvm-f95b1621887e3409ceec2db47e1b44271d934735.tar.xz
R600: Fix handling of vector kernel arguments
The SelectionDAGBuilder was promoting vector kernel arguments to legal types, but this won't work for R600 and SI since kernel arguments are stored in memory and can't be promoted. In order to handle vector arguments correctly we need to look at the original types from the LLVM IR function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193215 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/R600')
-rw-r--r--test/CodeGen/R600/kernel-args.ll455
-rw-r--r--test/CodeGen/R600/or.ll2
-rw-r--r--test/CodeGen/R600/short-args.ll69
-rw-r--r--test/CodeGen/R600/store.ll90
-rw-r--r--test/CodeGen/R600/trunc.ll1
5 files changed, 502 insertions, 115 deletions
diff --git a/test/CodeGen/R600/kernel-args.ll b/test/CodeGen/R600/kernel-args.ll
new file mode 100644
index 0000000000..8b80e26011
--- /dev/null
+++ b/test/CodeGen/R600/kernel-args.ll
@@ -0,0 +1,455 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; EG-CHECK-LABEL: @i8_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+
+define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
+entry:
+ %0 = zext i8 %in to i32
+ store i32 %0, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @i8_zext_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i8_zext_arg
+; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
+
+define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
+entry:
+ %0 = zext i8 %in to i32
+ store i32 %0, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @i8_sext_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i8_sext_arg
+; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
+
+define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
+entry:
+ %0 = sext i8 %in to i32
+ store i32 %0, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @i16_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i16_arg
+; SI-CHECK: BUFFER_LOAD_USHORT
+
+define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
+entry:
+ %0 = zext i16 %in to i32
+ store i32 %0, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @i16_zext_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i16_zext_arg
+; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
+
+define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
+entry:
+ %0 = zext i16 %in to i32
+ store i32 %0, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @i16_sext_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i16_sext_arg
+; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
+
+define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
+entry:
+ %0 = sext i16 %in to i32
+ store i32 %0, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @i32_arg
+; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i32_arg
+; S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
+define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
+entry:
+ store i32 %in, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @f32_arg
+; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @f32_arg
+; S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
+define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
+entry:
+ store float %in, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @v2i8_arg
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @v2i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
+entry:
+ store <2 x i8> %in, <2 x i8> addrspace(1)* %out
+ ret void
+}
+
+; EG-CHECK-LABEL: @v2i16_arg
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @v2i16_arg
+; SI-CHECK-DAG: BUFFER_LOAD_USHORT
+; SI-CHECK-DAG: BUFFER_LOAD_USHORT
+define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
+entry:
+ store <2 x i16> %in, <2 x i16> addrspace(1)* %out
+ ret void
+}
+
+; EG-CHECK-LABEL: @v2i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
+; SI-CHECK-LABEL: @v2i32_arg
+; SI-CHECK: S_LOAD_DWORDX2 SGPR{{[0-9]}}_SGPR{{[0-9]}}, SGPR0_SGPR1, 11
+define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
+entry:
+ store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @v2f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
+; SI-CHECK-LABEL: @v2f32_arg
+; SI-CHECK: S_LOAD_DWORDX2 SGPR{{[0-9]}}_SGPR{{[0-9]}}, SGPR0_SGPR1, 11
+define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
+entry:
+ store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @v3i8_arg
+; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
+; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
+; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
+; SI-CHECK-LABEL: @v3i8_arg
+define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
+entry:
+ store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @v3i16_arg
+; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
+; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
+; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
+; SI-CHECK-LABEL: @v3i16_arg
+define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
+entry:
+ store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
+ ret void
+}
+; EG-CHECK-LABEL: @v3i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; SI-CHECK-LABEL: @v3i32_arg
+; SI-CHECK: S_LOAD_DWORDX4 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 13
+define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
+entry:
+ store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @v3f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; SI-CHECK-LABEL: @v3f32_arg
+; SI-CHECK: S_LOAD_DWORDX4 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 13
+define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
+entry:
+ store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @v4i8_arg
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @v4i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
+entry:
+ store <4 x i8> %in, <4 x i8> addrspace(1)* %out
+ ret void
+}
+
+; EG-CHECK-LABEL: @v4i16_arg
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @v4i16_arg
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
+entry:
+ store <4 x i16> %in, <4 x i16> addrspace(1)* %out
+ ret void
+}
+
+; EG-CHECK-LABEL: @v4i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
+; SI-CHECK-LABEL: @v4i32_arg
+; SI-CHECK: S_LOAD_DWORDX4 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 13
+define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
+entry:
+ store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @v4f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
+; SI-CHECK-LABEL: @v4f32_arg
+; SI-CHECK: S_LOAD_DWORDX4 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 13
+define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
+entry:
+ store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @v8i8_arg
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @v8i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
+entry:
+ store <8 x i8> %in, <8 x i8> addrspace(1)* %out
+ ret void
+}
+
+; EG-CHECK-LABEL: @v8i16_arg
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @v8i16_arg
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
+entry:
+ store <8 x i16> %in, <8 x i16> addrspace(1)* %out
+ ret void
+}
+
+; EG-CHECK-LABEL: @v8i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
+; SI-CHECK-LABEL: @v8i32_arg
+; SI-CHECK: S_LOAD_DWORDX8 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 17
+define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
+entry:
+ store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @v8f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
+; SI-CHECK-LABEL: @v8f32_arg
+; SI-CHECK: S_LOAD_DWORDX8 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 17
+define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
+entry:
+ store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @v16i8_arg
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @v16i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
+entry:
+ store <16 x i8> %in, <16 x i8> addrspace(1)* %out
+ ret void
+}
+
+; EG-CHECK-LABEL: @v16i16_arg
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @v16i16_arg
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
+entry:
+ store <16 x i16> %in, <16 x i16> addrspace(1)* %out
+ ret void
+}
+
+; EG-CHECK-LABEL: @v16i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
+; SI-CHECK-LABEL: @v16i32_arg
+; SI-CHECK: S_LOAD_DWORDX16 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 25
+define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
+entry:
+ store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-CHECK-LABEL: @v16f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
+; SI-CHECK-LABEL: @v16f32_arg
+; SI-CHECK: S_LOAD_DWORDX16 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 25
+define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
+entry:
+ store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll
index 6950ed0dfb..6c70469e8d 100644
--- a/test/CodeGen/R600/or.ll
+++ b/test/CodeGen/R600/or.ll
@@ -41,7 +41,7 @@ define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in)
; EG-CHECK-LABEL: @or_i64
; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
-; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[3].X
+; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
; SI-CHECK-LABEL: @or_i64
; SI-CHECK: V_OR_B32_e32 VGPR{{[0-9]}}
; SI-CHECK: V_OR_B32_e32 VGPR{{[0-9]}}
diff --git a/test/CodeGen/R600/short-args.ll b/test/CodeGen/R600/short-args.ll
deleted file mode 100644
index 8882978d75..0000000000
--- a/test/CodeGen/R600/short-args.ll
+++ /dev/null
@@ -1,69 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-
-; EG-CHECK: @i8_arg
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK: BUFFER_LOAD_UBYTE
-
-define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
-entry:
- %0 = zext i8 %in to i32
- store i32 %0, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; EG-CHECK: @i8_zext_arg
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
-
-define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
-entry:
- %0 = zext i8 %in to i32
- store i32 %0, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; EG-CHECK: @i8_sext_arg
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
-
-define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
-entry:
- %0 = sext i8 %in to i32
- store i32 %0, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; EG-CHECK: @i16_arg
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK: BUFFER_LOAD_USHORT
-
-define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
-entry:
- %0 = zext i16 %in to i32
- store i32 %0, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; EG-CHECK: @i16_zext_arg
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
-
-define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
-entry:
- %0 = zext i16 %in to i32
- store i32 %0, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; EG-CHECK: @i16_sext_arg
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
-
-define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
-entry:
- %0 = sext i16 %in to i32
- store i32 %0, i32 addrspace(1)* %out, align 4
- ret void
-}
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
index a4c025ab42..5e51d56917 100644
--- a/test/CodeGen/R600/store.ll
+++ b/test/CodeGen/R600/store.ll
@@ -7,7 +7,7 @@
;===------------------------------------------------------------------------===;
; i8 store
-; EG-CHECK: @store_i8
+; EG-CHECK-LABEL: @store_i8
; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
; EG-CHECK: VTX_READ_8 [[VAL:T[0-9]\.X]], [[VAL]]
; IG 0: Get the byte index and truncate the value
@@ -26,7 +26,7 @@
; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
-; SI-CHECK: @store_i8
+; SI-CHECK-LABEL: @store_i8
; SI-CHECK: BUFFER_STORE_BYTE
define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
@@ -36,7 +36,7 @@ entry:
}
; i16 store
-; EG-CHECK: @store_i16
+; EG-CHECK-LABEL: @store_i16
; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
; EG-CHECK: VTX_READ_16 [[VAL:T[0-9]\.X]], [[VAL]]
; IG 0: Get the byte index and truncate the value
@@ -55,7 +55,7 @@ entry:
; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
-; SI-CHECK: @store_i16
+; SI-CHECK-LABEL: @store_i16
; SI-CHECK: BUFFER_STORE_SHORT
define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
entry:
@@ -63,10 +63,10 @@ entry:
ret void
}
-; EG-CHECK: @store_v2i8
+; EG-CHECK-LABEL: @store_v2i8
; EG-CHECK: MEM_RAT MSKOR
; EG-CHECK-NOT: MEM_RAT MSKOR
-; SI-CHECK: @store_v2i8
+; SI-CHECK-LABEL: @store_v2i8
; SI-CHECK: BUFFER_STORE_BYTE
; SI-CHECK: BUFFER_STORE_BYTE
define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
@@ -77,12 +77,13 @@ entry:
}
-; EG-CHECK: @store_v2i16
+; EG-CHECK-LABEL: @store_v2i16
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK: @store_v2i16
+; CM-CHECK-LABEL: @store_v2i16
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK: @store_v2i16
-; SI-CHECK: BUFFER_STORE_DWORD
+; SI-CHECK-LABEL: @store_v2i16
+; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK: BUFFER_STORE_SHORT
define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i16>
@@ -90,11 +91,11 @@ entry:
ret void
}
-; EG-CHECK: @store_v4i8
+; EG-CHECK-LABEL: @store_v4i8
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK: @store_v4i8
+; CM-CHECK-LABEL: @store_v4i8
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK: @store_v4i8
+; SI-CHECK-LABEL: @store_v4i8
; SI-CHECK: BUFFER_STORE_BYTE
; SI-CHECK: BUFFER_STORE_BYTE
; SI-CHECK: BUFFER_STORE_BYTE
@@ -107,11 +108,11 @@ entry:
}
; floating-point store
-; EG-CHECK: @store_f32
+; EG-CHECK-LABEL: @store_f32
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
-; CM-CHECK: @store_f32
+; CM-CHECK-LABEL: @store_f32
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: @store_f32
+; SI-CHECK-LABEL: @store_f32
; SI-CHECK: BUFFER_STORE_DWORD
define void @store_f32(float addrspace(1)* %out, float %in) {
@@ -119,13 +120,13 @@ define void @store_f32(float addrspace(1)* %out, float %in) {
ret void
}
-; EG-CHECK: @store_v4i16
+; EG-CHECK-LABEL: @store_v4i16
; EG-CHECK: MEM_RAT MSKOR
; EG-CHECK: MEM_RAT MSKOR
; EG-CHECK: MEM_RAT MSKOR
; EG-CHECK: MEM_RAT MSKOR
; EG-CHECK-NOT: MEM_RAT MSKOR
-; SI-CHECK: @store_v4i16
+; SI-CHECK-LABEL: @store_v4i16
; SI-CHECK: BUFFER_STORE_SHORT
; SI-CHECK: BUFFER_STORE_SHORT
; SI-CHECK: BUFFER_STORE_SHORT
@@ -139,11 +140,11 @@ entry:
}
; vec2 floating-point stores
-; EG-CHECK: @store_v2f32
+; EG-CHECK-LABEL: @store_v2f32
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK: @store_v2f32
+; CM-CHECK-LABEL: @store_v2f32
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK: @store_v2f32
+; SI-CHECK-LABEL: @store_v2f32
; SI-CHECK: BUFFER_STORE_DWORDX2
define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
@@ -154,13 +155,13 @@ entry:
ret void
}
-; EG-CHECK: @store_v4i32
+; EG-CHECK-LABEL: @store_v4i32
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
; EG-CHECK-NOT: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK: @store_v4i32
+; CM-CHECK-LABEL: @store_v4i32
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
; CM-CHECK-NOT: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK: @store_v4i32
+; SI-CHECK-LABEL: @store_v4i32
; SI-CHECK: BUFFER_STORE_DWORDX4
define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
entry:
@@ -172,41 +173,42 @@ entry:
; Local Address Space
;===------------------------------------------------------------------------===;
-; EG-CHECK: @store_local_i8
+; EG-CHECK-LABEL: @store_local_i8
; EG-CHECK: LDS_BYTE_WRITE
-; SI-CHECK: @store_local_i8
+; SI-CHECK-LABEL: @store_local_i8
; SI-CHECK: DS_WRITE_B8
define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
store i8 %in, i8 addrspace(3)* %out
ret void
}
-; EG-CHECK: @store_local_i16
+; EG-CHECK-LABEL: @store_local_i16
; EG-CHECK: LDS_SHORT_WRITE
-; SI-CHECK: @store_local_i16
+; SI-CHECK-LABEL: @store_local_i16
; SI-CHECK: DS_WRITE_B16
define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
store i16 %in, i16 addrspace(3)* %out
ret void
}
-; EG-CHECK: @store_local_v2i16
+; EG-CHECK-LABEL: @store_local_v2i16
; EG-CHECK: LDS_WRITE
-; CM-CHECK: @store_local_v2i16
+; CM-CHECK-LABEL: @store_local_v2i16
; CM-CHECK: LDS_WRITE
-; SI-CHECK: @store_local_v2i16
-; SI-CHECK: DS_WRITE_B32
+; SI-CHECK-LABEL: @store_local_v2i16
+; SI-CHECK: DS_WRITE_B16
+; SI-CHECK: DS_WRITE_B16
define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
entry:
store <2 x i16> %in, <2 x i16> addrspace(3)* %out
ret void
}
-; EG-CHECK: @store_local_v4i8
+; EG-CHECK-LABEL: @store_local_v4i8
; EG-CHECK: LDS_WRITE
-; CM-CHECK: @store_local_v4i8
+; CM-CHECK-LABEL: @store_local_v4i8
; CM-CHECK: LDS_WRITE
-; SI-CHECK: @store_local_v4i8
+; SI-CHECK-LABEL: @store_local_v4i8
; SI-CHECK: DS_WRITE_B8
; SI-CHECK: DS_WRITE_B8
; SI-CHECK: DS_WRITE_B8
@@ -217,13 +219,13 @@ entry:
ret void
}
-; EG-CHECK: @store_local_v2i32
+; EG-CHECK-LABEL: @store_local_v2i32
; EG-CHECK: LDS_WRITE
; EG-CHECK: LDS_WRITE
-; CM-CHECK: @store_local_v2i32
+; CM-CHECK-LABEL: @store_local_v2i32
; CM-CHECK: LDS_WRITE
; CM-CHECK: LDS_WRITE
-; SI-CHECK: @store_local_v2i32
+; SI-CHECK-LABEL: @store_local_v2i32
; SI-CHECK: DS_WRITE_B32
; SI-CHECK: DS_WRITE_B32
define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
@@ -232,17 +234,17 @@ entry:
ret void
}
-; EG-CHECK: @store_local_v4i32
+; EG-CHECK-LABEL: @store_local_v4i32
; EG-CHECK: LDS_WRITE
; EG-CHECK: LDS_WRITE
; EG-CHECK: LDS_WRITE
; EG-CHECK: LDS_WRITE
-; CM-CHECK: @store_local_v4i32
+; CM-CHECK-LABEL: @store_local_v4i32
; CM-CHECK: LDS_WRITE
; CM-CHECK: LDS_WRITE
; CM-CHECK: LDS_WRITE
; CM-CHECK: LDS_WRITE
-; SI-CHECK: @store_local_v4i32
+; SI-CHECK-LABEL: @store_local_v4i32
; SI-CHECK: DS_WRITE_B32
; SI-CHECK: DS_WRITE_B32
; SI-CHECK: DS_WRITE_B32
@@ -260,11 +262,11 @@ entry:
; Evergreen / Northern Islands don't support 64-bit stores yet, so there should
; be two 32-bit stores.
-; EG-CHECK: @vecload2
+; EG-CHECK-LABEL: @vecload2
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK: @vecload2
+; CM-CHECK-LABEL: @vecload2
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK: @vecload2
+; SI-CHECK-LABEL: @vecload2
; SI-CHECK: BUFFER_STORE_DWORDX2
define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
entry:
diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll
index be7a430885..58709dffc9 100644
--- a/test/CodeGen/R600/trunc.ll
+++ b/test/CodeGen/R600/trunc.ll
@@ -21,7 +21,6 @@ define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) {
; SI: S_LOAD_DWORDX2
; SI: S_LOAD_DWORDX2 [[SREG:SGPR[0-9]+_SGPR[0-9]+]]
; SI: V_LSHL_B64 [[LO_VREG:VGPR[0-9]+]]_VGPR{{[0-9]+}}, [[SREG]], 2
-; SI-NOT: [[LO_VREG]]
; SI: BUFFER_STORE_DWORD [[LO_VREG]],
define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) {
%b = shl i64 %a, 2