summaryrefslogtreecommitdiff
path: root/lib/Target/R600/R600MachineScheduler.h
diff options
context:
space:
mode:
authorVincent Lejeune <vljn@ovi.com>2013-06-07 23:30:34 +0000
committerVincent Lejeune <vljn@ovi.com>2013-06-07 23:30:34 +0000
commit843c6c2d0e83bcd52a215d768bacaa7b5ffe16a4 (patch)
tree5c1348e877b36cf4695b7d7d50c3e01edca25996 /lib/Target/R600/R600MachineScheduler.h
parentb01bdf87ff5e13eb22fcc20cd395bf282fbf1ecd (diff)
downloadllvm-843c6c2d0e83bcd52a215d768bacaa7b5ffe16a4.tar.gz
llvm-843c6c2d0e83bcd52a215d768bacaa7b5ffe16a4.tar.bz2
llvm-843c6c2d0e83bcd52a215d768bacaa7b5ffe16a4.tar.xz
R600: Use a refined heuristic to choose when switching clause
This is using a hint from AMD APP OpenCL Programming Guide with empirically tweaked parameters. I used Unigine Heaven 3.0 to determine best parameters on my system (i7 2600/Radeon 6950/Kernel 3.9.4) the benchmark : it went from 38.8 average fps to 39.6, which is ~3% gain. (Lightmark 2008.2 gain is much more marginal: from 537 to 539) There is no lit test provided as the parameter were determined empirically and it it would be nearly impossiblet to find a test program that check for optimal behavior. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183593 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/R600/R600MachineScheduler.h')
-rw-r--r--lib/Target/R600/R600MachineScheduler.h5
1 files changed, 4 insertions, 1 deletions
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
index c5024d2492..aae8b3f4eb 100644
--- a/lib/Target/R600/R600MachineScheduler.h
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -60,6 +60,9 @@ class R600SchedStrategy : public MachineSchedStrategy {
int CurEmitted;
InstKind NextInstKind;
+ unsigned AluInstCount;
+ unsigned FetchInstCount;
+
int InstKindLimit[IDLast];
int OccupedSlotsMask;
@@ -85,7 +88,7 @@ private:
bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
AluKind getAluKind(SUnit *SU) const;
void LoadAlu();
- bool isAvailablesAluEmpty() const;
+ unsigned AvailablesAluCount() const;
SUnit *AttemptFillSlot (unsigned Slot);
void PrepareNextSlot();
SUnit *PopInst(std::vector<SUnit*> &Q);