From cb8f98382be7361c94439f48ec9b297e4d70c49e Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 27 Jun 2014 18:35:51 +0000 Subject: [NVPTX] Fix handling of ldg/ldu intrinsics. The address space of the pointer must be global (1) for these intrinsics. There must also be alignment metadata attached to the intrinsic calls, e.g. %val = tail call i32 @llvm.nvvm.ldu.i.global.i32.p1i32(i32 addrspace(1)* %ptr), !align !0 !0 = metadata !{i32 4} git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211939 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/NVPTX/ldu-i8.ll | 6 +++-- test/CodeGen/NVPTX/ldu-ldg.ll | 40 +++++++++++++++++++++++++++++++ test/CodeGen/NVPTX/ldu-reg-plus-offset.ll | 6 ++--- 3 files changed, 47 insertions(+), 5 deletions(-) create mode 100644 test/CodeGen/NVPTX/ldu-ldg.ll (limited to 'test') diff --git a/test/CodeGen/NVPTX/ldu-i8.ll b/test/CodeGen/NVPTX/ldu-i8.ll index 81a82b2c38..9cc6675579 100644 --- a/test/CodeGen/NVPTX/ldu-i8.ll +++ b/test/CodeGen/NVPTX/ldu-i8.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" -declare i8 @llvm.nvvm.ldu.global.i.i8(i8*) +declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*) define i8 @foo(i8* %a) { ; Ensure we properly truncate off the high-order 24 bits ; CHECK: ldu.global.u8 ; CHECK: cvt.u32.u16 ; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 255 - %val = tail call i8 @llvm.nvvm.ldu.global.i.i8(i8* %a) + %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a), !align !0 ret i8 %val } + +!0 = metadata !{i32 4} diff --git a/test/CodeGen/NVPTX/ldu-ldg.ll b/test/CodeGen/NVPTX/ldu-ldg.ll new file mode 100644 index 0000000000..3b0619ff51 --- /dev/null +++ b/test/CodeGen/NVPTX/ldu-ldg.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + + +declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr) +declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr) +declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr) +declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr) + + +; CHECK: func0 +define i8 @func0(i8 addrspace(1)* %ptr) { +; ldu.global.u8 + %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0 + ret i8 %val +} + +; CHECK: func1 +define i32 @func1(i32 addrspace(1)* %ptr) { +; ldu.global.u32 + %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0 + ret i32 %val +} + +; CHECK: func2 +define i8 @func2(i8 addrspace(1)* %ptr) { +; ld.global.nc.u8 + %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0 + ret i8 %val +} + +; CHECK: func3 +define i32 @func3(i32 addrspace(1)* %ptr) { +; ld.global.nc.u32 + %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0 + ret i32 %val +} + + + +!0 = metadata !{i32 4} diff --git a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll index 26cadc401b..55707ea851 100644 --- a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll +++ b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll @@ -7,9 +7,9 @@ define void @reg_plus_offset(i32* %a) { ; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32]; ; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36]; %p2 = getelementptr i32* %a, i32 8 - %t1 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p2), !align !1 + %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2), !align !1 %p3 = getelementptr i32* %a, i32 9 - %t2 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p3), !align !1 + %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3), !align !1 %t3 = mul i32 %t1, %t2 store i32 %t3, i32* %a ret void @@ -17,5 +17,5 @@ define void @reg_plus_offset(i32* %a) { !1 = metadata !{ i32 4 } -declare i32 @llvm.nvvm.ldu.global.i.i32(i32*) +declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*) declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() -- cgit v1.2.3