summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/R600/AMDGPUPromoteAlloca.cpp26
-rw-r--r--test/CodeGen/R600/private-memory-atomics.ll31
-rw-r--r--test/CodeGen/R600/private-memory-broken.ll20
-rw-r--r--test/CodeGen/R600/private-memory.ll14
4 files changed, 89 insertions, 2 deletions
diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
index 053ea8a90b..218750d445 100644
--- a/lib/Target/R600/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -129,6 +129,22 @@ static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
return GEP->getOperand(2);
}
+// Not an instruction handled below to turn into a vector.
+//
+// TODO: Check isTriviallyVectorizable for calls and handle other
+// instructions.
+static bool canVectorizeInst(Instruction *Inst) {
+ switch (Inst->getOpcode()) {
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ return true;
+ default:
+ return false;
+ }
+}
+
static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
Type *AllocaTy = Alloca->getAllocatedType();
@@ -149,6 +165,9 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
for (User *AllocaUser : Alloca->users()) {
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
if (!GEP) {
+ if (!canVectorizeInst(cast<Instruction>(AllocaUser)))
+ return false;
+
WorkList.push_back(AllocaUser);
continue;
}
@@ -164,6 +183,9 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
GEPVectorIdx[GEP] = Index;
for (User *GEPUser : AllocaUser->users()) {
+ if (!canVectorizeInst(cast<Instruction>(GEPUser)))
+ return false;
+
WorkList.push_back(GEPUser);
}
}
@@ -201,12 +223,12 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
break;
}
case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
break;
default:
Inst->dump();
- llvm_unreachable("Do not know how to replace this instruction "
- "with vector op");
+ llvm_unreachable("Inconsistency in instructions promotable to vector");
}
}
return true;
diff --git a/test/CodeGen/R600/private-memory-atomics.ll b/test/CodeGen/R600/private-memory-atomics.ll
new file mode 100644
index 0000000000..def4f9dee5
--- /dev/null
+++ b/test/CodeGen/R600/private-memory-atomics.ll
@@ -0,0 +1,31 @@
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s
+
+; This works because promote allocas pass replaces these with LDS atomics.
+
+; Private atomics have no real use, but at least shouldn't crash on it.
+define void @atomicrmw_private(i32 addrspace(1)* %out, i32 %in) nounwind {
+entry:
+ %tmp = alloca [2 x i32]
+ %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+ store i32 0, i32* %tmp1
+ store i32 1, i32* %tmp2
+ %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in
+ %tmp4 = atomicrmw add i32* %tmp3, i32 7 acq_rel
+ store i32 %tmp4, i32 addrspace(1)* %out
+ ret void
+}
+
+define void @cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind {
+entry:
+ %tmp = alloca [2 x i32]
+ %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+ store i32 0, i32* %tmp1
+ store i32 1, i32* %tmp2
+ %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in
+ %tmp4 = cmpxchg i32* %tmp3, i32 0, i32 1 acq_rel monotonic
+ %val = extractvalue { i32, i1 } %tmp4, 0
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/private-memory-broken.ll b/test/CodeGen/R600/private-memory-broken.ll
new file mode 100644
index 0000000000..a5f61637e9
--- /dev/null
+++ b/test/CodeGen/R600/private-memory-broken.ll
@@ -0,0 +1,20 @@
+; RUN: not llc -verify-machineinstrs -march=r600 -mcpu=SI %s 2>&1 | FileCheck %s
+
+; Make sure promote alloca pass doesn't crash
+
+; CHECK: unsupported call
+
+declare i32 @foo(i32*) nounwind
+
+define void @call_private(i32 addrspace(1)* %out, i32 %in) nounwind {
+entry:
+ %tmp = alloca [2 x i32]
+ %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+ store i32 0, i32* %tmp1
+ store i32 1, i32* %tmp2
+ %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in
+ %val = call i32 @foo(i32* %tmp3) nounwind
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll
index 22bd358957..89122bec34 100644
--- a/test/CodeGen/R600/private-memory.ll
+++ b/test/CodeGen/R600/private-memory.ll
@@ -267,5 +267,19 @@ entry:
%load = load i32* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
+}
+define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
+entry:
+ %tmp = alloca [2 x i32]
+ %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+ store i32 0, i32* %tmp1
+ store i32 1, i32* %tmp2
+ %cmp = icmp eq i32 %in, 0
+ %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
+ %load = load i32* %sel
+ store i32 %load, i32 addrspace(1)* %out
+ ret void
}
+