summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-01-22 22:20:54 +0000
committerTom Stellard <thomas.stellard@amd.com>2014-01-22 22:20:54 +0000
commit8a4f11e3b69ce3e58b1d147d98a020b1d311f1f1 (patch)
tree7c8e1df5d45af76654bd9f6456e17b3caaae02ad
parentac2d2e0a3db575c216e0a6f8f5dae59adcf08e07 (diff)
downloadllvm-8a4f11e3b69ce3e58b1d147d98a020b1d311f1f1.tar.gz
llvm-8a4f11e3b69ce3e58b1d147d98a020b1d311f1f1.tar.bz2
llvm-8a4f11e3b69ce3e58b1d147d98a020b1d311f1f1.tar.xz
Revert "R600: Add work-around for the CF stack entry HW bug"
This reverts commit 35b8331cad6eb512a2506adbc394201181da94ba. The -debug-only flag for llc doesn't appear to be available in all build configurations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199845 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/AMDGPU.td5
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp6
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h2
-rw-r--r--lib/Target/R600/Processors.td14
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp43
-rw-r--r--test/CodeGen/R600/cf-stack-bug.ll225
6 files changed, 7 insertions, 288 deletions
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index d1e2cf5319..c4e5efc8d6 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -63,11 +63,6 @@ def FeatureCaymanISA : SubtargetFeature<"caymanISA",
"true",
"Use Cayman ISA">;
-def FeatureCFALUBug : SubtargetFeature<"cfalubug",
- "CFALUBug",
- "true",
- "GPU has CF_ALU bug">;
-
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index e77ab5e6d1..f36aa2071c 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -39,7 +39,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
EnableIRStructurizer = true;
EnableIfCvt = true;
WavefrontSize = 0;
- CFALUBug = false;
ParseSubtargetFeatures(GPU, FS);
DevName = GPU;
}
@@ -98,11 +97,6 @@ AMDGPUSubtarget::getStackEntrySize() const {
}
}
bool
-AMDGPUSubtarget::hasCFAluBug() const {
- assert(getGeneration() <= NORTHERN_ISLANDS);
- return CFALUBug;
-}
-bool
AMDGPUSubtarget::isTargetELF() const {
return false;
}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 7e7f4d0c00..68d853218b 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -52,7 +52,6 @@ private:
bool EnableIRStructurizer;
bool EnableIfCvt;
unsigned WavefrontSize;
- bool CFALUBug;
InstrItineraryData InstrItins;
@@ -72,7 +71,6 @@ public:
bool isIfCvtEnabled() const;
unsigned getWavefrontSize() const;
unsigned getStackEntrySize() const;
- bool hasCFAluBug() const;
virtual bool enableMachineScheduler() const {
return getGeneration() <= NORTHERN_ISLANDS;
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index fde4481497..e601f35316 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -46,15 +46,13 @@ def : Proc<"rv770", R600_VLIW5_Itin,
//===----------------------------------------------------------------------===//
def : Proc<"cedar", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32,
- FeatureCFALUBug]>;
+ [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32]>;
def : Proc<"redwood", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64,
- FeatureCFALUBug]>;
+ [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
def : Proc<"sumo", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureWavefrontSize64, FeatureCFALUBug]>;
+ [FeatureEvergreen, FeatureWavefrontSize64]>;
def : Proc<"juniper", R600_VLIW5_Itin,
[FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
@@ -68,13 +66,13 @@ def : Proc<"cypress", R600_VLIW5_Itin,
//===----------------------------------------------------------------------===//
def : Proc<"barts", R600_VLIW5_Itin,
- [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
+ [FeatureNorthernIslands, FeatureVertexCache]>;
def : Proc<"turks", R600_VLIW5_Itin,
- [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
+ [FeatureNorthernIslands, FeatureVertexCache]>;
def : Proc<"caicos", R600_VLIW5_Itin,
- [FeatureNorthernIslands, FeatureCFALUBug]>;
+ [FeatureNorthernIslands]>;
def : Proc<"cayman", R600_VLIW4_Itin,
[FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA]>;
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index 470ff2e107..6b42a7a9fa 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -73,44 +73,6 @@ bool CFStack::branchStackContains(CFStack::StackItem Item) {
return false;
}
-bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
- if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST.hasCaymanISA() &&
- getLoopDepth() > 1)
- return true;
-
- if (!ST.hasCFAluBug())
- return false;
-
- switch(Opcode) {
- default: return false;
- case AMDGPU::CF_ALU_PUSH_BEFORE:
- case AMDGPU::CF_ALU_ELSE_AFTER:
- case AMDGPU::CF_ALU_BREAK:
- case AMDGPU::CF_ALU_CONTINUE:
- if (CurrentSubEntries == 0)
- return false;
- if (ST.getWavefrontSize() == 64) {
- // We are being conservative here. We only require this work-around if
- // CurrentSubEntries > 3 &&
- // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
- //
- // We have to be conservative, because we don't know for certain that
- // our stack allocation algorithm for Evergreen/NI is correct. Applying this
- // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
- // resources without any problems.
- return CurrentSubEntries > 3;
- } else {
- assert(ST.getWavefrontSize() == 32);
- // We are being conservative here. We only require the work-around if
- // CurrentSubEntries > 7 &&
- // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
- // See the comment on the wavefront size == 64 case for why we are
- // being conservative.
- return CurrentSubEntries > 7;
- }
- }
-}
-
unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
switch(Item) {
default:
@@ -510,12 +472,9 @@ public:
if (MI->getOpcode() == AMDGPU::CF_ALU)
LastAlu.back() = MI;
I++;
- bool RequiresWorkAround =
- CFStack.requiresWorkAroundForInst(MI->getOpcode());
switch (MI->getOpcode()) {
case AMDGPU::CF_ALU_PUSH_BEFORE:
- if (RequiresWorkAround) {
- DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");
+ if (ST.hasCaymanISA() && CFStack.getLoopDepth() > 1) {
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
.addImm(CfCount + 1)
.addImm(1);
diff --git a/test/CodeGen/R600/cf-stack-bug.ll b/test/CodeGen/R600/cf-stack-bug.ll
deleted file mode 100644
index 7fa07b11ee..0000000000
--- a/test/CodeGen/R600/cf-stack-bug.ll
+++ /dev/null
@@ -1,225 +0,0 @@
-; RUN: llc -march=r600 -mcpu=redwood -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=sumo -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=barts -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=turks -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=caicos -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=cedar -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG32 --check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=juniper -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=cypress -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=cayman -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
-
-; We are currently allocating 2 extra sub-entries on Evergreen / NI for
-; non-WQM push instructions if we change this to 1, then we will need to
-; add one level of depth to each of these tests.
-
-; BUG64-NOT: Applying bug work-around
-; BUG32-NOT: Applying bug work-around
-; NOBUG-NOT: Applying bug work-around
-; FUNC-LABEL: @nested3
-define void @nested3(i32 addrspace(1)* %out, i32 %cond) {
-entry:
- %0 = icmp sgt i32 %cond, 0
- br i1 %0, label %if.1, label %end
-
-if.1:
- %1 = icmp sgt i32 %cond, 10
- br i1 %1, label %if.2, label %if.store.1
-
-if.store.1:
- store i32 1, i32 addrspace(1)* %out
- br label %end
-
-if.2:
- %2 = icmp sgt i32 %cond, 20
- br i1 %2, label %if.3, label %if.2.store
-
-if.2.store:
- store i32 2, i32 addrspace(1)* %out
- br label %end
-
-if.3:
- store i32 3, i32 addrspace(1)* %out
- br label %end
-
-end:
- ret void
-}
-
-; BUG64: Applying bug work-around
-; BUG32-NOT: Applying bug work-around
-; NOBUG-NOT: Applying bug work-around
-; FUNC-LABEL: @nested4
-define void @nested4(i32 addrspace(1)* %out, i32 %cond) {
-entry:
- %0 = icmp sgt i32 %cond, 0
- br i1 %0, label %if.1, label %end
-
-if.1:
- %1 = icmp sgt i32 %cond, 10
- br i1 %1, label %if.2, label %if.1.store
-
-if.1.store:
- store i32 1, i32 addrspace(1)* %out
- br label %end
-
-if.2:
- %2 = icmp sgt i32 %cond, 20
- br i1 %2, label %if.3, label %if.2.store
-
-if.2.store:
- store i32 2, i32 addrspace(1)* %out
- br label %end
-
-if.3:
- %3 = icmp sgt i32 %cond, 30
- br i1 %3, label %if.4, label %if.3.store
-
-if.3.store:
- store i32 3, i32 addrspace(1)* %out
- br label %end
-
-if.4:
- store i32 4, i32 addrspace(1)* %out
- br label %end
-
-end:
- ret void
-}
-
-; BUG64: Applying bug work-around
-; BUG32-NOT: Applying bug work-around
-; NOBUG-NOT: Applying bug work-around
-; FUNC-LABEL: @nested7
-define void @nested7(i32 addrspace(1)* %out, i32 %cond) {
-entry:
- %0 = icmp sgt i32 %cond, 0
- br i1 %0, label %if.1, label %end
-
-if.1:
- %1 = icmp sgt i32 %cond, 10
- br i1 %1, label %if.2, label %if.1.store
-
-if.1.store:
- store i32 1, i32 addrspace(1)* %out
- br label %end
-
-if.2:
- %2 = icmp sgt i32 %cond, 20
- br i1 %2, label %if.3, label %if.2.store
-
-if.2.store:
- store i32 2, i32 addrspace(1)* %out
- br label %end
-
-if.3:
- %3 = icmp sgt i32 %cond, 30
- br i1 %3, label %if.4, label %if.3.store
-
-if.3.store:
- store i32 3, i32 addrspace(1)* %out
- br label %end
-
-if.4:
- %4 = icmp sgt i32 %cond, 40
- br i1 %4, label %if.5, label %if.4.store
-
-if.4.store:
- store i32 4, i32 addrspace(1)* %out
- br label %end
-
-if.5:
- %5 = icmp sgt i32 %cond, 50
- br i1 %5, label %if.6, label %if.5.store
-
-if.5.store:
- store i32 5, i32 addrspace(1)* %out
- br label %end
-
-if.6:
- %6 = icmp sgt i32 %cond, 60
- br i1 %6, label %if.7, label %if.6.store
-
-if.6.store:
- store i32 6, i32 addrspace(1)* %out
- br label %end
-
-if.7:
- store i32 7, i32 addrspace(1)* %out
- br label %end
-
-end:
- ret void
-}
-
-; BUG64: Applying bug work-around
-; BUG32: Applying bug work-around
-; NOBUG-NOT: Applying bug work-around
-; FUNC-LABEL: @nested8
-define void @nested8(i32 addrspace(1)* %out, i32 %cond) {
-entry:
- %0 = icmp sgt i32 %cond, 0
- br i1 %0, label %if.1, label %end
-
-if.1:
- %1 = icmp sgt i32 %cond, 10
- br i1 %1, label %if.2, label %if.1.store
-
-if.1.store:
- store i32 1, i32 addrspace(1)* %out
- br label %end
-
-if.2:
- %2 = icmp sgt i32 %cond, 20
- br i1 %2, label %if.3, label %if.2.store
-
-if.2.store:
- store i32 2, i32 addrspace(1)* %out
- br label %end
-
-if.3:
- %3 = icmp sgt i32 %cond, 30
- br i1 %3, label %if.4, label %if.3.store
-
-if.3.store:
- store i32 3, i32 addrspace(1)* %out
- br label %end
-
-if.4:
- %4 = icmp sgt i32 %cond, 40
- br i1 %4, label %if.5, label %if.4.store
-
-if.4.store:
- store i32 4, i32 addrspace(1)* %out
- br label %end
-
-if.5:
- %5 = icmp sgt i32 %cond, 50
- br i1 %5, label %if.6, label %if.5.store
-
-if.5.store:
- store i32 5, i32 addrspace(1)* %out
- br label %end
-
-if.6:
- %6 = icmp sgt i32 %cond, 60
- br i1 %6, label %if.7, label %if.6.store
-
-if.6.store:
- store i32 6, i32 addrspace(1)* %out
- br label %end
-
-if.7:
- %7 = icmp sgt i32 %cond, 70
- br i1 %7, label %if.8, label %if.7.store
-
-if.7.store:
- store i32 7, i32 addrspace(1)* %out
- br label %end
-
-if.8:
- store i32 8, i32 addrspace(1)* %out
- br label %end
-
-end:
- ret void
-}