From 7a28d8afa77ac3afce265f2b61fb321e4e0d84d7 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Tue, 23 Apr 2013 17:34:00 +0000 Subject: R600: Add CF_END git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180123 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 6 +- lib/Target/R600/R600ControlFlowFinalizer.cpp | 91 ++++++++++++---------- lib/Target/R600/R600Instructions.td | 24 +++++- test/CodeGen/R600/sdiv.ll | 2 +- test/CodeGen/R600/udiv.ll | 2 +- test/CodeGen/R600/urem.ll | 2 +- 6 files changed, 80 insertions(+), 47 deletions(-) diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 416d71064d..4864b3e1b7 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -281,7 +281,11 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, case AMDGPU::EG_ExportSwz: case AMDGPU::R600_ExportSwz: case AMDGPU::EG_ExportBuf: - case AMDGPU::R600_ExportBuf: { + case AMDGPU::R600_ExportBuf: + case AMDGPU::PAD: + case AMDGPU::CF_END_R600: + case AMDGPU::CF_END_EG: + case AMDGPU::CF_END_CM: { uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); EmitByte(INSTR_NATIVE, OS); Emit(Inst, OS); diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index bc1ca58b86..e683d7594b 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -39,7 +39,8 @@ private: CF_LOOP_CONTINUE, CF_JUMP, CF_ELSE, - CF_POP + CF_POP, + CF_END }; static char ID; @@ -91,49 +92,46 @@ private: } const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { - if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) { - switch (CFI) { - case CF_TC: - return TII->get(AMDGPU::CF_TC_R600); - case CF_CALL_FS: - return TII->get(AMDGPU::CF_CALL_FS_R600); - case CF_WHILE_LOOP: - return TII->get(AMDGPU::WHILE_LOOP_R600); - case CF_END_LOOP: - return TII->get(AMDGPU::END_LOOP_R600); - case CF_LOOP_BREAK: - return TII->get(AMDGPU::LOOP_BREAK_R600); - case CF_LOOP_CONTINUE: - return TII->get(AMDGPU::CF_CONTINUE_R600); - case CF_JUMP: - return TII->get(AMDGPU::CF_JUMP_R600); - case CF_ELSE: - return TII->get(AMDGPU::CF_ELSE_R600); - case CF_POP: - return TII->get(AMDGPU::POP_R600); - } - } else { - switch (CFI) { - case CF_TC: - return TII->get(AMDGPU::CF_TC_EG); - case CF_CALL_FS: - return TII->get(AMDGPU::CF_CALL_FS_EG); - case CF_WHILE_LOOP: - return TII->get(AMDGPU::WHILE_LOOP_EG); - case CF_END_LOOP: - return TII->get(AMDGPU::END_LOOP_EG); - case CF_LOOP_BREAK: - return TII->get(AMDGPU::LOOP_BREAK_EG); - case CF_LOOP_CONTINUE: - return TII->get(AMDGPU::CF_CONTINUE_EG); - case CF_JUMP: - return TII->get(AMDGPU::CF_JUMP_EG); - case CF_ELSE: - return TII->get(AMDGPU::CF_ELSE_EG); - case CF_POP: - return TII->get(AMDGPU::POP_EG); + unsigned Opcode = 0; + bool isEg = (ST.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX); + switch (CFI) { + case CF_TC: + Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600; + break; + case CF_CALL_FS: + Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600; + break; + case CF_WHILE_LOOP: + Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600; + break; + case CF_END_LOOP: + Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600; + break; + case CF_LOOP_BREAK: + Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600; + break; + case CF_LOOP_CONTINUE: + Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600; + break; + case CF_JUMP: + Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600; + break; + case CF_ELSE: + Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600; + break; + case CF_POP: + Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600; + break; + case CF_END: + if (ST.device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX) { + Opcode = AMDGPU::CF_END_CM; + break; } + Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600; + break; } + assert (Opcode && "No opcode selected"); + return TII->get(Opcode); } MachineBasicBlock::iterator @@ -310,6 +308,15 @@ public: CfCount++; break; } + case AMDGPU::RETURN: { + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END)); + CfCount++; + MI->eraseFromParent(); + if (CfCount % 2) { + BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD)); + CfCount++; + } + } default: break; } diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 361fc9816b..e0b2a8c3ff 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -897,6 +897,7 @@ class CF_WORD1_EG { bits<2> COND; bits<6> COUNT; bits<1> VALID_PIXEL_MODE; + bits<1> END_OF_PROGRAM; bits<8> CF_INST; bits<1> BARRIER; @@ -919,6 +920,7 @@ ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { let CF_CONST = 0; let VALID_PIXEL_MODE = 0; let COND = 0; + let END_OF_PROGRAM = 0; let Inst{31-0} = Word0; let Inst{63-32} = Word1; @@ -934,6 +936,10 @@ def STACK_SIZE : AMDGPUInst <(outs), let Inst = num; } +def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > { + field bits<64> Inst; +} + let Predicates = [isR600toCayman] in { //===----------------------------------------------------------------------===// @@ -1486,6 +1492,12 @@ let Predicates = [isR600] in { "POP @$ADDR POP:$POP_COUNT"> { let COUNT = 0; } + def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> { + let COUNT = 0; + let POP_COUNT = 0; + let ADDR = 0; + let END_OF_PROGRAM = 1; + } } @@ -1690,7 +1702,12 @@ let hasSideEffects = 1 in { "POP @$ADDR POP:$POP_COUNT"> { let COUNT = 0; } - + def CF_END_EG : CF_CLAUSE_EG<0, (ins), "CF_END"> { + let COUNT = 0; + let POP_COUNT = 0; + let ADDR = 0; + let END_OF_PROGRAM = 1; + } //===----------------------------------------------------------------------===// // Memory read/write instructions @@ -1935,6 +1952,11 @@ def : Pat < (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) >; + def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { + let ADDR = 0; + let POP_COUNT = 0; + let COUNT = 0; + } def : Pat<(fsqrt R600_Reg32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>; diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll index 3556facfba..3dd10c8a61 100644 --- a/test/CodeGen/R600/sdiv.ll +++ b/test/CodeGen/R600/sdiv.ll @@ -9,7 +9,7 @@ ; This was fixed by adding an additional pattern in R600Instructions.td to ; match this pattern with a CNDGE_INT. -; CHECK: RETURN +; CHECK: CF_END define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1 diff --git a/test/CodeGen/R600/udiv.ll b/test/CodeGen/R600/udiv.ll index 47657a6be7..b81e3667ce 100644 --- a/test/CodeGen/R600/udiv.ll +++ b/test/CodeGen/R600/udiv.ll @@ -3,7 +3,7 @@ ;The code generated by udiv is long and complex and may frequently change. ;The goal of this test is to make sure the ISel doesn't fail when it gets ;a v4i32 udiv -;CHECK: RETURN +;CHECK: CF_END define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 diff --git a/test/CodeGen/R600/urem.ll b/test/CodeGen/R600/urem.ll index 2e7388caa6..a2cc0bd2e8 100644 --- a/test/CodeGen/R600/urem.ll +++ b/test/CodeGen/R600/urem.ll @@ -3,7 +3,7 @@ ;The code generated by urem is long and complex and may frequently change. ;The goal of this test is to make sure the ISel doesn't fail when it gets ;a v4i32 urem -;CHECK: RETURN +;CHECK: CF_END define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 -- cgit v1.2.3