summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorJustin Holewinski <jholewinski@nvidia.com>2014-06-27 18:35:51 +0000
committerJustin Holewinski <jholewinski@nvidia.com>2014-06-27 18:35:51 +0000
commitcb8f98382be7361c94439f48ec9b297e4d70c49e (patch)
tree6d7106e3c82873d46121fbb09453a5a2fd92c80e /test
parent899227441241fda27d9c9eaf9143d533fff75559 (diff)
downloadllvm-cb8f98382be7361c94439f48ec9b297e4d70c49e.tar.gz
llvm-cb8f98382be7361c94439f48ec9b297e4d70c49e.tar.bz2
llvm-cb8f98382be7361c94439f48ec9b297e4d70c49e.tar.xz
[NVPTX] Fix handling of ldg/ldu intrinsics.
The address space of the pointer must be global (1) for these intrinsics. There must also be alignment metadata attached to the intrinsic calls, e.g. %val = tail call i32 @llvm.nvvm.ldu.i.global.i32.p1i32(i32 addrspace(1)* %ptr), !align !0 !0 = metadata !{i32 4} git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211939 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/NVPTX/ldu-i8.ll6
-rw-r--r--test/CodeGen/NVPTX/ldu-ldg.ll40
-rw-r--r--test/CodeGen/NVPTX/ldu-reg-plus-offset.ll6
3 files changed, 47 insertions, 5 deletions
diff --git a/test/CodeGen/NVPTX/ldu-i8.ll b/test/CodeGen/NVPTX/ldu-i8.ll
index 81a82b2c38..9cc6675579 100644
--- a/test/CodeGen/NVPTX/ldu-i8.ll
+++ b/test/CodeGen/NVPTX/ldu-i8.ll
@@ -2,13 +2,15 @@
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-declare i8 @llvm.nvvm.ldu.global.i.i8(i8*)
+declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*)
define i8 @foo(i8* %a) {
; Ensure we properly truncate off the high-order 24 bits
; CHECK: ldu.global.u8
; CHECK: cvt.u32.u16
; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 255
- %val = tail call i8 @llvm.nvvm.ldu.global.i.i8(i8* %a)
+ %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a), !align !0
ret i8 %val
}
+
+!0 = metadata !{i32 4}
diff --git a/test/CodeGen/NVPTX/ldu-ldg.ll b/test/CodeGen/NVPTX/ldu-ldg.ll
new file mode 100644
index 0000000000..3b0619ff51
--- /dev/null
+++ b/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
+declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
+declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
+
+
+; CHECK: func0
+define i8 @func0(i8 addrspace(1)* %ptr) {
+; ldu.global.u8
+ %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0
+ ret i8 %val
+}
+
+; CHECK: func1
+define i32 @func1(i32 addrspace(1)* %ptr) {
+; ldu.global.u32
+ %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0
+ ret i32 %val
+}
+
+; CHECK: func2
+define i8 @func2(i8 addrspace(1)* %ptr) {
+; ld.global.nc.u8
+ %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0
+ ret i8 %val
+}
+
+; CHECK: func3
+define i32 @func3(i32 addrspace(1)* %ptr) {
+; ld.global.nc.u32
+ %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0
+ ret i32 %val
+}
+
+
+
+!0 = metadata !{i32 4}
diff --git a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
index 26cadc401b..55707ea851 100644
--- a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
+++ b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
@@ -7,9 +7,9 @@ define void @reg_plus_offset(i32* %a) {
; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32];
; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36];
%p2 = getelementptr i32* %a, i32 8
- %t1 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p2), !align !1
+ %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2), !align !1
%p3 = getelementptr i32* %a, i32 9
- %t2 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p3), !align !1
+ %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3), !align !1
%t3 = mul i32 %t1, %t2
store i32 %t3, i32* %a
ret void
@@ -17,5 +17,5 @@ define void @reg_plus_offset(i32* %a) {
!1 = metadata !{ i32 4 }
-declare i32 @llvm.nvvm.ldu.global.i.i32(i32*)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*)
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()