summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Lejeune <vljn@ovi.com>2013-06-17 20:16:26 +0000
committerVincent Lejeune <vljn@ovi.com>2013-06-17 20:16:26 +0000
commit98f5cf8000bd67ab97605f3454ae374fff5389c6 (patch)
treef467f97c5ba47bbdf8361ec055ebe013b67056b4
parent4b548ecb012ce27feff9f58aad27775df679b159 (diff)
downloadllvm-98f5cf8000bd67ab97605f3454ae374fff5389c6.tar.gz
llvm-98f5cf8000bd67ab97605f3454ae374fff5389c6.tar.bz2
llvm-98f5cf8000bd67ab97605f3454ae374fff5389c6.tar.xz
R600: Properly set COUNT_3 bit in TEX clause initiating inst for pre EG gen.
Fixes rv7x0 bug in Heaven reported here: https://bugs.freedesktop.org/show_bug.cgi?id=64257 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184116 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/R600Instructions.td30
-rw-r--r--test/CodeGen/R600/rv7x0_count3.ll44
2 files changed, 60 insertions, 14 deletions
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 27b0214b00..83d735f28c 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -575,14 +575,16 @@ class CF_WORD0_R600 {
class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
field bits<64> Inst;
+ bits<4> CNT;
let CF_INST = inst;
let BARRIER = 1;
let CF_CONST = 0;
let VALID_PIXEL_MODE = 0;
let COND = 0;
+ let COUNT = CNT{2-0};
let CALL_COUNT = 0;
- let COUNT_3 = 0;
+ let COUNT_3 = CNT{3};
let END_OF_PROGRAM = 0;
let WHOLE_QUAD_MODE = 0;
@@ -1162,52 +1164,52 @@ let Predicates = [isR600] in {
}
defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
- def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
- "TEX $COUNT @$ADDR"> {
+ def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$CNT),
+ "TEX $CNT @$ADDR"> {
let POP_COUNT = 0;
}
- def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
- "VTX $COUNT @$ADDR"> {
+ def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$CNT),
+ "VTX $CNT @$ADDR"> {
let POP_COUNT = 0;
}
def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
"LOOP_START_DX10 @$ADDR"> {
let POP_COUNT = 0;
- let COUNT = 0;
+ let CNT = 0;
}
def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
let POP_COUNT = 0;
- let COUNT = 0;
+ let CNT = 0;
}
def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
"LOOP_BREAK @$ADDR"> {
let POP_COUNT = 0;
- let COUNT = 0;
+ let CNT = 0;
}
def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
"CONTINUE @$ADDR"> {
let POP_COUNT = 0;
- let COUNT = 0;
+ let CNT = 0;
}
def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"JUMP @$ADDR POP:$POP_COUNT"> {
- let COUNT = 0;
+ let CNT = 0;
}
def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"ELSE @$ADDR POP:$POP_COUNT"> {
- let COUNT = 0;
+ let CNT = 0;
}
def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
let ADDR = 0;
- let COUNT = 0;
+ let CNT = 0;
let POP_COUNT = 0;
}
def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"POP @$ADDR POP:$POP_COUNT"> {
- let COUNT = 0;
+ let CNT = 0;
}
def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
- let COUNT = 0;
+ let CNT = 0;
let POP_COUNT = 0;
let ADDR = 0;
let END_OF_PROGRAM = 1;
diff --git a/test/CodeGen/R600/rv7x0_count3.ll b/test/CodeGen/R600/rv7x0_count3.ll
new file mode 100644
index 0000000000..474d6ba902
--- /dev/null
+++ b/test/CodeGen/R600/rv7x0_count3.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rv710 | FileCheck %s
+
+; CHECK: TEX 9 @4 ; encoding: [0x04,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+ %1 = call float @llvm.R600.load.input(i32 4)
+ %2 = call float @llvm.R600.load.input(i32 5)
+ %3 = call float @llvm.R600.load.input(i32 6)
+ %4 = call float @llvm.R600.load.input(i32 7)
+ %5 = insertelement <4 x float> undef, float %1, i32 0
+ %6 = insertelement <4 x float> %5, float %2, i32 1
+ %7 = insertelement <4 x float> %6, float %3, i32 2
+ %8 = insertelement <4 x float> %7, float %4, i32 3
+ %9 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 0, i32 0, i32 1)
+ %10 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 1, i32 0, i32 1)
+ %11 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 2, i32 0, i32 1)
+ %12 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 3, i32 0, i32 1)
+ %13 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 4, i32 0, i32 1)
+ %14 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 5, i32 0, i32 1)
+ %15 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 6, i32 0, i32 1)
+ %16 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 7, i32 0, i32 1)
+ %17 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 8, i32 0, i32 1)
+ %18 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 9, i32 0, i32 1)
+ %19 = fadd <4 x float> %9, %10
+ %20 = fadd <4 x float> %19, %11
+ %21 = fadd <4 x float> %20, %12
+ %22 = fadd <4 x float> %21, %13
+ %23 = fadd <4 x float> %22, %14
+ %24 = fadd <4 x float> %23, %15
+ %25 = fadd <4 x float> %24, %16
+ %26 = fadd <4 x float> %25, %17
+ %27 = fadd <4 x float> %26, %18
+ call void @llvm.R600.store.swizzle(<4 x float> %27, i32 0, i32 2)
+ ret void
+}
+
+declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
+
+; Function Attrs: readnone
+declare float @llvm.R600.load.input(i32) #1
+
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+attributes #1 = { readnone }