summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-03-24 18:21:41 +0000
committerTom Stellard <thomas.stellard@amd.com>2014-03-24 18:21:41 +0000
commit9d1cd8e8689cb12d257265d43a79917ac332efbe (patch)
tree92b139dfc9be1fbcf918e70a0c0a87b9e084aa71
parent187225db529de6f63e844038cab578f06261a22f (diff)
downloadllvm-9d1cd8e8689cb12d257265d43a79917ac332efbe.tar.gz
llvm-9d1cd8e8689cb12d257265d43a79917ac332efbe.tar.bz2
llvm-9d1cd8e8689cb12d257265d43a79917ac332efbe.tar.xz
Merging r201097:
------------------------------------------------------------------------ r201097 | thomas.stellard | 2014-02-10 08:58:30 -0800 (Mon, 10 Feb 2014) | 9 lines R600/SI: Initialize M0 and emit S_WQM_B64 whenever DS instructions are used DS instructions that access local memory can only uses addresses that are less than or equal to the value of M0. When M0 is uninitialized, then we experience undefined behavior. This patch also changes the behavior to emit S_WQM_B64 on pixel shaders no matter what kind of DS instruction is used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@204648 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp30
-rw-r--r--test/CodeGen/R600/load.ll19
2 files changed, 39 insertions, 10 deletions
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index d52f558774..ef867d3669 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -109,6 +109,23 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
return new SILowerControlFlowPass(tm);
}
+static bool isDS(unsigned Opcode) {
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::DS_ADD_U32_RTN:
+ case AMDGPU::DS_SUB_U32_RTN:
+ case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_WRITE_B8:
+ case AMDGPU::DS_WRITE_B16:
+ case AMDGPU::DS_READ_B32:
+ case AMDGPU::DS_READ_I8:
+ case AMDGPU::DS_READ_U8:
+ case AMDGPU::DS_READ_I16:
+ case AMDGPU::DS_READ_U16:
+ return true;
+ }
+}
+
bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
MachineBasicBlock *To) {
@@ -435,6 +452,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
Next = llvm::next(I);
MachineInstr &MI = *I;
+ if (isDS(MI.getOpcode())) {
+ NeedM0 = true;
+ NeedWQM = true;
+ }
+
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_IF:
@@ -495,14 +517,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
IndirectDst(MI);
break;
- case AMDGPU::DS_READ_B32:
- NeedWQM = true;
- // Fall through
- case AMDGPU::DS_WRITE_B32:
- case AMDGPU::DS_ADD_U32_RTN:
- NeedM0 = true;
- break;
-
case AMDGPU::V_INTERP_P1_F32:
case AMDGPU::V_INTERP_P2_F32:
case AMDGPU::V_INTERP_MOV_F32:
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index e4492d7d6e..0153524d13 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -445,6 +445,7 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(
; R600-CHECK: LDS_UBYTE_READ_RET
; SI-CHECK-LABEL: @load_i8_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U8
define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
%1 = load i8 addrspace(3)* %in
@@ -458,6 +459,7 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
; R600-CHECK: ASHR
; SI-CHECK-LABEL: @load_i8_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I8
define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
entry:
@@ -472,6 +474,7 @@ entry:
; R600-CHECK: LDS_UBYTE_READ_RET
; SI-CHECK-LABEL: @load_v2i8_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U8
; SI-CHECK: DS_READ_U8
define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
@@ -489,6 +492,7 @@ entry:
; R600-CHECK-DAG: ASHR
; SI-CHECK-LABEL: @load_v2i8_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I8
; SI-CHECK: DS_READ_I8
define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
@@ -506,6 +510,7 @@ entry:
; R600-CHECK: LDS_UBYTE_READ_RET
; SI-CHECK-LABEL: @load_v4i8_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U8
; SI-CHECK: DS_READ_U8
; SI-CHECK: DS_READ_U8
@@ -529,6 +534,7 @@ entry:
; R600-CHECK-DAG: ASHR
; SI-CHECK-LABEL: @load_v4i8_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I8
; SI-CHECK: DS_READ_I8
; SI-CHECK: DS_READ_I8
@@ -546,6 +552,7 @@ entry:
; R600-CHECK: LDS_USHORT_READ_RET
; SI-CHECK-LABEL: @load_i16_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U16
define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
entry:
@@ -560,6 +567,7 @@ entry:
; R600-CHECK: ASHR
; SI-CHECK-LABEL: @load_i16_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I16
define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
entry:
@@ -574,6 +582,7 @@ entry:
; R600-CHECK: LDS_USHORT_READ_RET
; SI-CHECK-LABEL: @load_v2i16_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U16
; SI-CHECK: DS_READ_U16
define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
@@ -591,6 +600,7 @@ entry:
; R600-CHECK-DAG: ASHR
; SI-CHECK-LABEL: @load_v2i16_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I16
; SI-CHECK: DS_READ_I16
define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
@@ -608,6 +618,7 @@ entry:
; R600-CHECK: LDS_USHORT_READ_RET
; SI-CHECK-LABEL: @load_v4i16_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U16
; SI-CHECK: DS_READ_U16
; SI-CHECK: DS_READ_U16
@@ -631,6 +642,7 @@ entry:
; R600-CHECK-DAG: ASHR
; SI-CHECK-LABEL: @load_v4i16_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I16
; SI-CHECK: DS_READ_I16
; SI-CHECK: DS_READ_I16
@@ -643,11 +655,12 @@ entry:
ret void
}
-; load an i32 value from the glocal address space.
+; load an i32 value from the local address space.
; R600-CHECK-LABEL: @load_i32_local
; R600-CHECK: LDS_READ_RET
; SI-CHECK-LABEL: @load_i32_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_B32
define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
@@ -656,10 +669,11 @@ entry:
ret void
}
-; load a f32 value from the global address space.
+; load a f32 value from the local address space.
; R600-CHECK-LABEL: @load_f32_local
; R600-CHECK: LDS_READ_RET
; SI-CHECK-LABEL: @load_f32_local
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_B32
define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
entry:
@@ -673,6 +687,7 @@ entry:
; R600-CHECK: LDS_READ_RET
; R600-CHECK: LDS_READ_RET
; SI-CHECK-LABEL: @load_v2f32_local
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_B32
; SI-CHECK: DS_READ_B32
define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {