summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Lejeune <vljn@ovi.com>2013-04-30 00:14:38 +0000
committerVincent Lejeune <vljn@ovi.com>2013-04-30 00:14:38 +0000
commit2c836f84dba99e7b041909160c739db779760b79 (patch)
tree2b301c461ed7af3526067c20a69c3d0d4903a843
parent25f259cde28860ea76c2f5628010968945a28edb (diff)
downloadllvm-2c836f84dba99e7b041909160c739db779760b79.tar.gz
llvm-2c836f84dba99e7b041909160c739db779760b79.tar.bz2
llvm-2c836f84dba99e7b041909160c739db779760b79.tar.xz
R600: use native for alu
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180761 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp7
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp110
-rw-r--r--lib/Target/R600/R600Instructions.td17
-rw-r--r--lib/Target/R600/R600RegisterInfo.td5
-rw-r--r--test/CodeGen/R600/alu-split.ll1
-rw-r--r--test/CodeGen/R600/disconnected-predset-break-bug.ll2
-rw-r--r--test/CodeGen/R600/predicates.ll8
7 files changed, 141 insertions, 9 deletions
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index bc5c9d8e97..7c83d86cb2 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -143,6 +143,7 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitFCInstr(MI, OS);
} else if (MI.getOpcode() == AMDGPU::RETURN ||
MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
+ MI.getOpcode() == AMDGPU::ALU_CLAUSE ||
MI.getOpcode() == AMDGPU::BUNDLE ||
MI.getOpcode() == AMDGPU::KILL) {
return;
@@ -255,7 +256,7 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case AMDGPU::CF_ALU:
case AMDGPU::CF_ALU_PUSH_BEFORE: {
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
- EmitByte(INSTR_CFALU, OS);
+ EmitByte(INSTR_NATIVE, OS);
Emit(Inst, OS);
break;
}
@@ -294,7 +295,9 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
break;
}
default:
- EmitALUInstr(MI, Fixups, OS);
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_NATIVE, OS);
+ Emit(Inst, OS);
break;
}
}
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index e637641e68..0995795e0a 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -165,6 +165,97 @@ private:
return ClauseFile(MIb, ClauseContent);
}
+ void getLiteral(MachineInstr *MI, std::vector<unsigned> &Lits) const {
+ unsigned LiteralRegs[] = {
+ AMDGPU::ALU_LITERAL_X,
+ AMDGPU::ALU_LITERAL_Y,
+ AMDGPU::ALU_LITERAL_Z,
+ AMDGPU::ALU_LITERAL_W
+ };
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.getReg() != AMDGPU::ALU_LITERAL_X)
+ continue;
+ unsigned ImmIdx = TII->getOperandIdx(MI->getOpcode(), R600Operands::IMM);
+ int64_t Imm = MI->getOperand(ImmIdx).getImm();
+ std::vector<unsigned>::iterator It =
+ std::find(Lits.begin(), Lits.end(), Imm);
+ if (It != Lits.end()) {
+ unsigned Index = It - Lits.begin();
+ MO.setReg(LiteralRegs[Index]);
+ } else {
+ assert(Lits.size() < 4 && "Too many literals in Instruction Group");
+ MO.setReg(LiteralRegs[Lits.size()]);
+ Lits.push_back(Imm);
+ }
+ }
+ }
+
+ MachineBasicBlock::iterator insertLiterals(
+ MachineBasicBlock::iterator InsertPos,
+ const std::vector<unsigned> &Literals) const {
+ MachineBasicBlock *MBB = InsertPos->getParent();
+ for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
+ unsigned LiteralPair0 = Literals[i];
+ unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
+ InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
+ TII->get(AMDGPU::LITERALS))
+ .addImm(LiteralPair0)
+ .addImm(LiteralPair1);
+ }
+ return InsertPos;
+ }
+
+ ClauseFile
+ MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
+ const {
+ MachineBasicBlock::iterator ClauseHead = I;
+ std::vector<MachineInstr *> ClauseContent;
+ I++;
+ for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
+ if (IsTrivialInst(I)) {
+ ++I;
+ continue;
+ }
+ if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
+ break;
+ std::vector<unsigned> Literals;
+ if (I->isBundle()) {
+ MachineInstr *DeleteMI = I;
+ MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
+ while (++BI != E && BI->isBundledWithPred()) {
+ BI->unbundleFromPred();
+ for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = BI->getOperand(i);
+ if (MO.isReg() && MO.isInternalRead())
+ MO.setIsInternalRead(false);
+ }
+ getLiteral(BI, Literals);
+ ClauseContent.push_back(BI);
+ }
+ I = BI;
+ DeleteMI->eraseFromParent();
+ } else {
+ getLiteral(I, Literals);
+ ClauseContent.push_back(I);
+ I++;
+ }
+ for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
+ unsigned literal0 = Literals[i];
+ unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
+ MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
+ TII->get(AMDGPU::LITERALS))
+ .addImm(literal0)
+ .addImm(literal2);
+ ClauseContent.push_back(MILit);
+ }
+ }
+ ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
+ return ClauseFile(ClauseHead, ClauseContent);
+ }
+
void
EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
unsigned &CfCount) {
@@ -178,6 +269,19 @@ private:
CfCount += 2 * Clause.second.size();
}
+ void
+ EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
+ unsigned &CfCount) {
+ CounterPropagateAddr(Clause.first, CfCount);
+ MachineBasicBlock *BB = Clause.first->getParent();
+ BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
+ .addImm(CfCount);
+ for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
+ BB->splice(InsertPos, BB, Clause.second[i]);
+ }
+ CfCount += Clause.second.size();
+ }
+
void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
}
@@ -234,7 +338,7 @@ public:
getHWInstrDesc(CF_CALL_FS));
CfCount++;
}
- std::vector<ClauseFile> FetchClauses;
+ std::vector<ClauseFile> FetchClauses, AluClauses;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E;) {
if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
@@ -252,6 +356,8 @@ public:
MaxStack = std::max(MaxStack, CurrentStack);
hasPush = true;
case AMDGPU::CF_ALU:
+ I = MI;
+ AluClauses.push_back(MakeALUClause(MBB, I));
case AMDGPU::EG_ExportBuf:
case AMDGPU::EG_ExportSwz:
case AMDGPU::R600_ExportBuf:
@@ -362,6 +468,8 @@ public:
}
for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
EmitFetchClause(I, FetchClauses[i], CfCount);
+ for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
+ EmitALUClause(I, AluClauses[i], CfCount);
}
default:
break;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 3426831a63..1d25da3622 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -944,6 +944,23 @@ def FETCH_CLAUSE : AMDGPUInst <(outs),
let Inst = num;
}
+def ALU_CLAUSE : AMDGPUInst <(outs),
+(ins i32imm:$addr), "ALU clause starting at $addr:", [] > {
+ field bits<8> Inst;
+ bits<8> num;
+ let Inst = num;
+}
+
+def LITERALS : AMDGPUInst <(outs),
+(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > {
+ field bits<64> Inst;
+ bits<32> literal1;
+ bits<32> literal2;
+
+ let Inst{31-0} = literal1;
+ let Inst{63-32} = literal2;
+}
+
def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
field bits<64> Inst;
}
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
index 694431921b..5a2e65c87e 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -88,7 +88,10 @@ def NEG_ONE : R600Reg<"-1.0", 249>;
def ONE_INT : R600Reg<"1", 250>;
def HALF : R600Reg<"0.5", 252>;
def NEG_HALF : R600Reg<"-0.5", 252>;
-def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
+def ALU_LITERAL_X : R600RegWithChan<"literal.x", 253, "X">;
+def ALU_LITERAL_Y : R600RegWithChan<"literal.x", 253, "Y">;
+def ALU_LITERAL_Z : R600RegWithChan<"literal.x", 253, "Z">;
+def ALU_LITERAL_W : R600RegWithChan<"literal.x", 253, "W">;
def PV_X : R600RegWithChan<"PV.x", 254, "X">;
def PV_Y : R600RegWithChan<"PV.y", 254, "Y">;
def PV_Z : R600RegWithChan<"PV.z", 254, "Z">;
diff --git a/test/CodeGen/R600/alu-split.ll b/test/CodeGen/R600/alu-split.ll
index afefcd9f78..48496f6feb 100644
--- a/test/CodeGen/R600/alu-split.ll
+++ b/test/CodeGen/R600/alu-split.ll
@@ -4,6 +4,7 @@
;CHECK: ALU
;CHECK: ALU
;CHECK-NOT: ALU
+;CHECK: CF_END
define void @main() #0 {
main_body:
diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll
index 09baee7a1d..012c17b8fe 100644
--- a/test/CodeGen/R600/disconnected-predset-break-bug.ll
+++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll
@@ -6,7 +6,7 @@
; CHECK: @loop_ge
; CHECK: LOOP_START_DX10
-; CHECK: PRED_SET
+; CHECK: ALU_PUSH_BEFORE
; CHECK-NEXT: JUMP
; CHECK-NEXT: LOOP_BREAK
define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind {
diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll
index eb8b052b6f..fb093ed322 100644
--- a/test/CodeGen/R600/predicates.ll
+++ b/test/CodeGen/R600/predicates.ll
@@ -46,11 +46,11 @@ ENDIF:
; CHECK: @nested_if
; CHECK: ALU_PUSH_BEFORE
-; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
; CHECK: JUMP
+; CHECK: POP
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
-; CHECK: POP
define void @nested_if(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = icmp sgt i32 %in, 0
@@ -73,12 +73,12 @@ ENDIF:
; CHECK: @nested_if_else
; CHECK: ALU_PUSH_BEFORE
-; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
; CHECK: JUMP
+; CHECK: POP
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
-; CHECK: POP
define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = icmp sgt i32 %in, 0