From ac78a0645ddd2046fb66237ba4cfadffa2d367d7 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 28 Jun 2013 17:58:10 +0000 Subject: [NVPTX] Calling conventions fix Fix ABI handling for function returning bool -- use st.param.b32 to return the value and use ld.param.b32 in caller to load the return value. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185177 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/NVPTX/compare-int.ll | 40 +++++++++++++++++----------------- test/CodeGen/NVPTX/convert-int-sm20.ll | 8 +++---- test/CodeGen/NVPTX/i8-param.ll | 23 +++++++++++++++++++ test/CodeGen/NVPTX/ld-addrspace.ll | 24 ++++++++++---------- test/CodeGen/NVPTX/ld-generic.ll | 8 +++---- 5 files changed, 63 insertions(+), 40 deletions(-) create mode 100644 test/CodeGen/NVPTX/i8-param.ll (limited to 'test/CodeGen/NVPTX') diff --git a/test/CodeGen/NVPTX/compare-int.ll b/test/CodeGen/NVPTX/compare-int.ll index e929f24ddb..c595f215f6 100644 --- a/test/CodeGen/NVPTX/compare-int.ll +++ b/test/CodeGen/NVPTX/compare-int.ll @@ -195,7 +195,7 @@ define i32 @icmp_sle_i32(i32 %a, i32 %b) { define i16 @icmp_eq_i16(i16 %a, i16 %b) { ; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp eq i16 %a, %b %ret = zext i1 %cmp to i16 @@ -204,7 +204,7 @@ define i16 @icmp_eq_i16(i16 %a, i16 %b) { define i16 @icmp_ne_i16(i16 %a, i16 %b) { ; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ne i16 %a, %b %ret = zext i1 %cmp to i16 @@ -213,7 +213,7 @@ define i16 @icmp_ne_i16(i16 %a, i16 %b) { define i16 @icmp_ugt_i16(i16 %a, i16 %b) { ; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ugt i16 %a, %b %ret = zext i1 %cmp to i16 @@ -222,7 +222,7 @@ define i16 @icmp_ugt_i16(i16 %a, i16 %b) { define i16 @icmp_uge_i16(i16 %a, i16 %b) { ; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp uge i16 %a, %b %ret = zext i1 %cmp to i16 @@ -231,7 +231,7 @@ define i16 @icmp_uge_i16(i16 %a, i16 %b) { define i16 @icmp_ult_i16(i16 %a, i16 %b) { ; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ult i16 %a, %b %ret = zext i1 %cmp to i16 @@ -240,7 +240,7 @@ define i16 @icmp_ult_i16(i16 %a, i16 %b) { define i16 @icmp_ule_i16(i16 %a, i16 %b) { ; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ule i16 %a, %b %ret = zext i1 %cmp to i16 @@ -249,7 +249,7 @@ define i16 @icmp_ule_i16(i16 %a, i16 %b) { define i16 @icmp_sgt_i16(i16 %a, i16 %b) { ; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp sgt i16 %a, %b %ret = zext i1 %cmp to i16 @@ -258,7 +258,7 @@ define i16 @icmp_sgt_i16(i16 %a, i16 %b) { define i16 @icmp_sge_i16(i16 %a, i16 %b) { ; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp sge i16 %a, %b %ret = zext i1 %cmp to i16 @@ -267,7 +267,7 @@ define i16 @icmp_sge_i16(i16 %a, i16 %b) { define i16 @icmp_slt_i16(i16 %a, i16 %b) { ; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp slt i16 %a, %b %ret = zext i1 %cmp to i16 @@ -276,7 +276,7 @@ define i16 @icmp_slt_i16(i16 %a, i16 %b) { define i16 @icmp_sle_i16(i16 %a, i16 %b) { ; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp sle i16 %a, %b %ret = zext i1 %cmp to i16 @@ -289,7 +289,7 @@ define i16 @icmp_sle_i16(i16 %a, i16 %b) { define i8 @icmp_eq_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp eq i8 %a, %b %ret = zext i1 %cmp to i8 @@ -299,7 +299,7 @@ define i8 @icmp_eq_i8(i8 %a, i8 %b) { define i8 @icmp_ne_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ne i8 %a, %b %ret = zext i1 %cmp to i8 @@ -309,7 +309,7 @@ define i8 @icmp_ne_i8(i8 %a, i8 %b) { define i8 @icmp_ugt_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ugt i8 %a, %b %ret = zext i1 %cmp to i8 @@ -319,7 +319,7 @@ define i8 @icmp_ugt_i8(i8 %a, i8 %b) { define i8 @icmp_uge_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp uge i8 %a, %b %ret = zext i1 %cmp to i8 @@ -329,7 +329,7 @@ define i8 @icmp_uge_i8(i8 %a, i8 %b) { define i8 @icmp_ult_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ult i8 %a, %b %ret = zext i1 %cmp to i8 @@ -339,7 +339,7 @@ define i8 @icmp_ult_i8(i8 %a, i8 %b) { define i8 @icmp_ule_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp ule i8 %a, %b %ret = zext i1 %cmp to i8 @@ -349,7 +349,7 @@ define i8 @icmp_ule_i8(i8 %a, i8 %b) { define i8 @icmp_sgt_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp sgt i8 %a, %b %ret = zext i1 %cmp to i8 @@ -359,7 +359,7 @@ define i8 @icmp_sgt_i8(i8 %a, i8 %b) { define i8 @icmp_sge_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp sge i8 %a, %b %ret = zext i1 %cmp to i8 @@ -369,7 +369,7 @@ define i8 @icmp_sge_i8(i8 %a, i8 %b) { define i8 @icmp_slt_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp slt i8 %a, %b %ret = zext i1 %cmp to i8 @@ -379,7 +379,7 @@ define i8 @icmp_slt_i8(i8 %a, i8 %b) { define i8 @icmp_sle_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit ; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]] +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]] ; CHECK: ret %cmp = icmp sle i8 %a, %b %ret = zext i1 %cmp to i8 diff --git a/test/CodeGen/NVPTX/convert-int-sm20.ll b/test/CodeGen/NVPTX/convert-int-sm20.ll index fad240e03d..227cd31e11 100644 --- a/test/CodeGen/NVPTX/convert-int-sm20.ll +++ b/test/CodeGen/NVPTX/convert-int-sm20.ll @@ -8,16 +8,16 @@ ; i16 define i16 @cvt_i16_i32(i32 %x) { -; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}] -; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]] +; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}] +; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]] ; CHECK: ret %a = trunc i32 %x to i16 ret i16 %a } define i16 @cvt_i16_i64(i64 %x) { -; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}] -; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]] +; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}] +; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]] ; CHECK: ret %a = trunc i64 %x to i16 ret i16 %a diff --git a/test/CodeGen/NVPTX/i8-param.ll b/test/CodeGen/NVPTX/i8-param.ll new file mode 100644 index 0000000000..9a253ff6c9 --- /dev/null +++ b/test/CodeGen/NVPTX/i8-param.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +; CHECK: .visible .func (.param .b32 func_retval0) callee +define i8 @callee(i8 %a) { +; CHECK: ld.param.u8 + %ret = add i8 %a, 42 +; CHECK: st.param.b32 + ret i8 %ret +} + +; CHECK: .visible .func caller +define void @caller(i8* %a) { +; CHECK: ld.u8 + %val = load i8* %a + %ret = tail call i8 @callee(i8 %val) +; CHECK: ld.param.b32 + store i8 %ret, i8* %a + ret void +} + + \ No newline at end of file diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll index 204ae7b1fb..133ef09afd 100644 --- a/test/CodeGen/NVPTX/ld-addrspace.ll +++ b/test/CodeGen/NVPTX/ld-addrspace.ll @@ -4,27 +4,27 @@ ;; i8 define i8 @ld_global_i8(i8 addrspace(1)* %ptr) { -; PTX32: ld.global.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.global.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.global.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i8 addrspace(1)* %ptr ret i8 %a } define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) { -; PTX32: ld.shared.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.shared.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.shared.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i8 addrspace(3)* %ptr ret i8 %a } define i8 @ld_local_i8(i8 addrspace(5)* %ptr) { -; PTX32: ld.local.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.local.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.local.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i8 addrspace(5)* %ptr ret i8 %a @@ -32,27 +32,27 @@ define i8 @ld_local_i8(i8 addrspace(5)* %ptr) { ;; i16 define i16 @ld_global_i16(i16 addrspace(1)* %ptr) { -; PTX32: ld.global.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.global.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.global.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i16 addrspace(1)* %ptr ret i16 %a } define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) { -; PTX32: ld.shared.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.shared.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.shared.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i16 addrspace(3)* %ptr ret i16 %a } define i16 @ld_local_i16(i16 addrspace(5)* %ptr) { -; PTX32: ld.local.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.local.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.local.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i16 addrspace(5)* %ptr ret i16 %a diff --git a/test/CodeGen/NVPTX/ld-generic.ll b/test/CodeGen/NVPTX/ld-generic.ll index f811a37191..3728268c24 100644 --- a/test/CodeGen/NVPTX/ld-generic.ll +++ b/test/CodeGen/NVPTX/ld-generic.ll @@ -4,9 +4,9 @@ ;; i8 define i8 @ld_global_i8(i8 addrspace(0)* %ptr) { -; PTX32: ld.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i8 addrspace(0)* %ptr ret i8 %a @@ -14,9 +14,9 @@ define i8 @ld_global_i8(i8 addrspace(0)* %ptr) { ;; i16 define i16 @ld_global_i16(i16 addrspace(0)* %ptr) { -; PTX32: ld.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: ret -; PTX64: ld.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: ld.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}] ; PTX64: ret %a = load i16 addrspace(0)* %ptr ret i16 %a -- cgit v1.2.3