From 2c836f84dba99e7b041909160c739db779760b79 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Tue, 30 Apr 2013 00:14:38 +0000 Subject: R600: use native for alu git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180761 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 7 +- lib/Target/R600/R600ControlFlowFinalizer.cpp | 110 ++++++++++++++++++++- lib/Target/R600/R600Instructions.td | 17 ++++ lib/Target/R600/R600RegisterInfo.td | 5 +- test/CodeGen/R600/alu-split.ll | 1 + .../CodeGen/R600/disconnected-predset-break-bug.ll | 2 +- test/CodeGen/R600/predicates.ll | 8 +- 7 files changed, 141 insertions(+), 9 deletions(-) diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index bc5c9d8e97..7c83d86cb2 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -143,6 +143,7 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitFCInstr(MI, OS); } else if (MI.getOpcode() == AMDGPU::RETURN || MI.getOpcode() == AMDGPU::FETCH_CLAUSE || + MI.getOpcode() == AMDGPU::ALU_CLAUSE || MI.getOpcode() == AMDGPU::BUNDLE || MI.getOpcode() == AMDGPU::KILL) { return; @@ -255,7 +256,7 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, case AMDGPU::CF_ALU: case AMDGPU::CF_ALU_PUSH_BEFORE: { uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); - EmitByte(INSTR_CFALU, OS); + EmitByte(INSTR_NATIVE, OS); Emit(Inst, OS); break; } @@ -294,7 +295,9 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, break; } default: - EmitALUInstr(MI, Fixups, OS); + uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); + EmitByte(INSTR_NATIVE, OS); + Emit(Inst, OS); break; } } diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index e637641e68..0995795e0a 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -165,6 +165,97 @@ private: return ClauseFile(MIb, ClauseContent); } + void getLiteral(MachineInstr *MI, std::vector &Lits) const { + unsigned LiteralRegs[] = { + AMDGPU::ALU_LITERAL_X, + AMDGPU::ALU_LITERAL_Y, + AMDGPU::ALU_LITERAL_Z, + AMDGPU::ALU_LITERAL_W + }; + for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.getReg() != AMDGPU::ALU_LITERAL_X) + continue; + unsigned ImmIdx = TII->getOperandIdx(MI->getOpcode(), R600Operands::IMM); + int64_t Imm = MI->getOperand(ImmIdx).getImm(); + std::vector::iterator It = + std::find(Lits.begin(), Lits.end(), Imm); + if (It != Lits.end()) { + unsigned Index = It - Lits.begin(); + MO.setReg(LiteralRegs[Index]); + } else { + assert(Lits.size() < 4 && "Too many literals in Instruction Group"); + MO.setReg(LiteralRegs[Lits.size()]); + Lits.push_back(Imm); + } + } + } + + MachineBasicBlock::iterator insertLiterals( + MachineBasicBlock::iterator InsertPos, + const std::vector &Literals) const { + MachineBasicBlock *MBB = InsertPos->getParent(); + for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { + unsigned LiteralPair0 = Literals[i]; + unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; + InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), + TII->get(AMDGPU::LITERALS)) + .addImm(LiteralPair0) + .addImm(LiteralPair1); + } + return InsertPos; + } + + ClauseFile + MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) + const { + MachineBasicBlock::iterator ClauseHead = I; + std::vector ClauseContent; + I++; + for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { + if (IsTrivialInst(I)) { + ++I; + continue; + } + if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) + break; + std::vector Literals; + if (I->isBundle()) { + MachineInstr *DeleteMI = I; + MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); + while (++BI != E && BI->isBundledWithPred()) { + BI->unbundleFromPred(); + for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = BI->getOperand(i); + if (MO.isReg() && MO.isInternalRead()) + MO.setIsInternalRead(false); + } + getLiteral(BI, Literals); + ClauseContent.push_back(BI); + } + I = BI; + DeleteMI->eraseFromParent(); + } else { + getLiteral(I, Literals); + ClauseContent.push_back(I); + I++; + } + for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { + unsigned literal0 = Literals[i]; + unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0; + MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(), + TII->get(AMDGPU::LITERALS)) + .addImm(literal0) + .addImm(literal2); + ClauseContent.push_back(MILit); + } + } + ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1); + return ClauseFile(ClauseHead, ClauseContent); + } + void EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause, unsigned &CfCount) { @@ -178,6 +269,19 @@ private: CfCount += 2 * Clause.second.size(); } + void + EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause, + unsigned &CfCount) { + CounterPropagateAddr(Clause.first, CfCount); + MachineBasicBlock *BB = Clause.first->getParent(); + BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE)) + .addImm(CfCount); + for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { + BB->splice(InsertPos, BB, Clause.second[i]); + } + CfCount += Clause.second.size(); + } + void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const { MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm()); } @@ -234,7 +338,7 @@ public: getHWInstrDesc(CF_CALL_FS)); CfCount++; } - std::vector FetchClauses; + std::vector FetchClauses, AluClauses; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) { @@ -252,6 +356,8 @@ public: MaxStack = std::max(MaxStack, CurrentStack); hasPush = true; case AMDGPU::CF_ALU: + I = MI; + AluClauses.push_back(MakeALUClause(MBB, I)); case AMDGPU::EG_ExportBuf: case AMDGPU::EG_ExportSwz: case AMDGPU::R600_ExportBuf: @@ -362,6 +468,8 @@ public: } for (unsigned i = 0, e = FetchClauses.size(); i < e; i++) EmitFetchClause(I, FetchClauses[i], CfCount); + for (unsigned i = 0, e = AluClauses.size(); i < e; i++) + EmitALUClause(I, AluClauses[i], CfCount); } default: break; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 3426831a63..1d25da3622 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -944,6 +944,23 @@ def FETCH_CLAUSE : AMDGPUInst <(outs), let Inst = num; } +def ALU_CLAUSE : AMDGPUInst <(outs), +(ins i32imm:$addr), "ALU clause starting at $addr:", [] > { + field bits<8> Inst; + bits<8> num; + let Inst = num; +} + +def LITERALS : AMDGPUInst <(outs), +(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > { + field bits<64> Inst; + bits<32> literal1; + bits<32> literal2; + + let Inst{31-0} = literal1; + let Inst{63-32} = literal2; +} + def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > { field bits<64> Inst; } diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index 694431921b..5a2e65c87e 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -88,7 +88,10 @@ def NEG_ONE : R600Reg<"-1.0", 249>; def ONE_INT : R600Reg<"1", 250>; def HALF : R600Reg<"0.5", 252>; def NEG_HALF : R600Reg<"-0.5", 252>; -def ALU_LITERAL_X : R600Reg<"literal.x", 253>; +def ALU_LITERAL_X : R600RegWithChan<"literal.x", 253, "X">; +def ALU_LITERAL_Y : R600RegWithChan<"literal.x", 253, "Y">; +def ALU_LITERAL_Z : R600RegWithChan<"literal.x", 253, "Z">; +def ALU_LITERAL_W : R600RegWithChan<"literal.x", 253, "W">; def PV_X : R600RegWithChan<"PV.x", 254, "X">; def PV_Y : R600RegWithChan<"PV.y", 254, "Y">; def PV_Z : R600RegWithChan<"PV.z", 254, "Z">; diff --git a/test/CodeGen/R600/alu-split.ll b/test/CodeGen/R600/alu-split.ll index afefcd9f78..48496f6feb 100644 --- a/test/CodeGen/R600/alu-split.ll +++ b/test/CodeGen/R600/alu-split.ll @@ -4,6 +4,7 @@ ;CHECK: ALU ;CHECK: ALU ;CHECK-NOT: ALU +;CHECK: CF_END define void @main() #0 { main_body: diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll index 09baee7a1d..012c17b8fe 100644 --- a/test/CodeGen/R600/disconnected-predset-break-bug.ll +++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll @@ -6,7 +6,7 @@ ; CHECK: @loop_ge ; CHECK: LOOP_START_DX10 -; CHECK: PRED_SET +; CHECK: ALU_PUSH_BEFORE ; CHECK-NEXT: JUMP ; CHECK-NEXT: LOOP_BREAK define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind { diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll index eb8b052b6f..fb093ed322 100644 --- a/test/CodeGen/R600/predicates.ll +++ b/test/CodeGen/R600/predicates.ll @@ -46,11 +46,11 @@ ENDIF: ; CHECK: @nested_if ; CHECK: ALU_PUSH_BEFORE -; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec ; CHECK: JUMP +; CHECK: POP +; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec ; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, ; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel -; CHECK: POP define void @nested_if(i32 addrspace(1)* %out, i32 %in) { entry: %0 = icmp sgt i32 %in, 0 @@ -73,12 +73,12 @@ ENDIF: ; CHECK: @nested_if_else ; CHECK: ALU_PUSH_BEFORE -; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec ; CHECK: JUMP +; CHECK: POP +; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec ; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, ; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel ; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel -; CHECK: POP define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) { entry: %0 = icmp sgt i32 %in, 0 -- cgit v1.2.3