From c6f24f4086c786e8ee8e74193ea9a08e90853e0b Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Tue, 8 Mar 2011 14:10:18 +0000 Subject: PTX: Add intrinsic support for ntid, ctaid, and nctaid registers git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127246 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IntrinsicsPTX.td | 10 +++- lib/Target/PTX/PTXIntrinsicInstrInfo.td | 26 +++++++-- test/CodeGen/PTX/intrinsic.ll | 96 +++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+), 6 deletions(-) diff --git a/include/llvm/IntrinsicsPTX.td b/include/llvm/IntrinsicsPTX.td index ec291e467e..9e372301c7 100644 --- a/include/llvm/IntrinsicsPTX.td +++ b/include/llvm/IntrinsicsPTX.td @@ -25,8 +25,14 @@ let TargetPrefix = "ptx" in { } } -defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic; -defm int_ptx_read_tid : PTXReadSpecialSubRegisterIntrinsic; +defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic; +defm int_ptx_read_tid : PTXReadSpecialSubRegisterIntrinsic; +defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic; +defm int_ptx_read_ntid : PTXReadSpecialSubRegisterIntrinsic; +defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic; +defm int_ptx_read_ctaid : PTXReadSpecialSubRegisterIntrinsic; +defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic; +defm int_ptx_read_nctaid : PTXReadSpecialSubRegisterIntrinsic; let TargetPrefix = "ptx" in def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>; diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td index 3f2737e077..a75c1086f7 100644 --- a/lib/Target/PTX/PTXIntrinsicInstrInfo.td +++ b/lib/Target/PTX/PTXIntrinsicInstrInfo.td @@ -24,10 +24,28 @@ class PTX_READ_SPECIAL_SUB_REGISTER [(set RRegu16:$d, (intop))]>; def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER<"tid", int_ptx_read_tid_r64>; -def PTX_READ_TID_X : PTX_READ_SPECIAL_SUB_REGISTER<"tid.x", int_ptx_read_tid_x>; -def PTX_READ_TID_Y : PTX_READ_SPECIAL_SUB_REGISTER<"tid.y", int_ptx_read_tid_y>; -def PTX_READ_TID_Z : PTX_READ_SPECIAL_SUB_REGISTER<"tid.z", int_ptx_read_tid_z>; -def PTX_READ_TID_W : PTX_READ_SPECIAL_SUB_REGISTER<"tid.w", int_ptx_read_tid_w>; +def PTX_READ_TID_X : PTX_READ_SPECIAL_SUB_REGISTER<"tid.x", int_ptx_read_tid_x>; +def PTX_READ_TID_Y : PTX_READ_SPECIAL_SUB_REGISTER<"tid.y", int_ptx_read_tid_y>; +def PTX_READ_TID_Z : PTX_READ_SPECIAL_SUB_REGISTER<"tid.z", int_ptx_read_tid_z>; +def PTX_READ_TID_W : PTX_READ_SPECIAL_SUB_REGISTER<"tid.w", int_ptx_read_tid_w>; + +def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER<"ntid", int_ptx_read_ntid_r64>; +def PTX_READ_NTID_X : PTX_READ_SPECIAL_SUB_REGISTER<"ntid.x", int_ptx_read_ntid_x>; +def PTX_READ_NTID_Y : PTX_READ_SPECIAL_SUB_REGISTER<"ntid.y", int_ptx_read_ntid_y>; +def PTX_READ_NTID_Z : PTX_READ_SPECIAL_SUB_REGISTER<"ntid.z", int_ptx_read_ntid_z>; +def PTX_READ_NTID_W : PTX_READ_SPECIAL_SUB_REGISTER<"ntid.w", int_ptx_read_ntid_w>; + +def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER<"ctaid", int_ptx_read_ctaid_r64>; +def PTX_READ_CTAID_X : PTX_READ_SPECIAL_SUB_REGISTER<"ctaid.x", int_ptx_read_ctaid_x>; +def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_SUB_REGISTER<"ctaid.y", int_ptx_read_ctaid_y>; +def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_SUB_REGISTER<"ctaid.z", int_ptx_read_ctaid_z>; +def PTX_READ_CTAID_W : PTX_READ_SPECIAL_SUB_REGISTER<"ctaid.w", int_ptx_read_ctaid_w>; + +def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER<"nctaid", int_ptx_read_nctaid_r64>; +def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_SUB_REGISTER<"nctaid.x", int_ptx_read_nctaid_x>; +def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_SUB_REGISTER<"nctaid.y", int_ptx_read_nctaid_y>; +def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_SUB_REGISTER<"nctaid.z", int_ptx_read_nctaid_z>; +def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_SUB_REGISTER<"nctaid.w", int_ptx_read_nctaid_w>; // PTX Parallel Synchronization and Communication Intrinsics diff --git a/test/CodeGen/PTX/intrinsic.ll b/test/CodeGen/PTX/intrinsic.ll index 24c328b773..804d8b5f95 100644 --- a/test/CodeGen/PTX/intrinsic.ll +++ b/test/CodeGen/PTX/intrinsic.ll @@ -28,6 +28,90 @@ define ptx_device i16 @tid_w() { ret i16 %x } +define ptx_device i16 @ntid_x() { +; CHECK: mov.u16 rh0, ntid.x; +; CHECK-NEXT: ret; + %x = call i16 @llvm.ptx.read.ntid.x() + ret i16 %x +} + +define ptx_device i16 @ntid_y() { +; CHECK: mov.u16 rh0, ntid.y; +; CHECK-NEXT: ret; + %x = call i16 @llvm.ptx.read.ntid.y() + ret i16 %x +} + +define ptx_device i16 @ntid_z() { +; CHECK: mov.u16 rh0, ntid.z; +; CHECK-NEXT: ret; + %x = call i16 @llvm.ptx.read.ntid.z() + ret i16 %x +} + +define ptx_device i16 @ntid_w() { +; CHECK: mov.u16 rh0, ntid.w; +; CHECK-NEXT: ret; + %x = call i16 @llvm.ptx.read.ntid.w() + ret i16 %x +} + +define ptx_device i16 @ctaid_x() { +; CHECK: mov.u16 rh0, ctaid.x; +; CHECK-NEXT: ret; + %x = call i16 @llvm.ptx.read.ctaid.x() + ret i16 %x +} + +define ptx_device i16 @ctaid_y() { +; CHECK: mov.u16 rh0, ctaid.y; +; CHECK-NEXT: ret; + %x = call i16 @llvm.ptx.read.ctaid.y() + ret i16 %x +} + +define ptx_device i16 @ctaid_z() { +; CHECK: mov.u16 rh0, ctaid.z; +; CHECK-NEXT: ret; + %x = call i16 @llvm.ptx.read.ctaid.z() + ret i16 %x +} + +define ptx_device i16 @ctaid_w() { +; CHECK: mov.u16 rh0, ctaid.w; +; CHECK-NEXT: ret; + %x = call i16 @llvm.ptx.read.ctaid.w() + ret i16 %x +} + +define ptx_device i16 @nctaid_x() { +; CHECK: mov.u16 rh0, nctaid.x; +; CHECK-NEXT: ret; + %x = call i16 @llvm.ptx.read.nctaid.x() + ret i16 %x +} + +define ptx_device i16 @nctaid_y() { +; CHECK: mov.u16 rh0, nctaid.y; +; CHECK-NEXT: ret; + %x = call i16 @llvm.ptx.read.nctaid.y() + ret i16 %x +} + +define ptx_device i16 @nctaid_z() { +; CHECK: mov.u16 rh0, nctaid.z; +; CHECK-NEXT: ret; + %x = call i16 @llvm.ptx.read.nctaid.z() + ret i16 %x +} + +define ptx_device i16 @nctaid_w() { +; CHECK: mov.u16 rh0, nctaid.w; +; CHECK-NEXT: ret; + %x = call i16 @llvm.ptx.read.nctaid.w() + ret i16 %x +} + define ptx_device void @bar_sync() { ; CHECK: bar.sync 0 ; CHECK-NEXT: ret; @@ -39,5 +123,17 @@ declare i16 @llvm.ptx.read.tid.x() declare i16 @llvm.ptx.read.tid.y() declare i16 @llvm.ptx.read.tid.z() declare i16 @llvm.ptx.read.tid.w() +declare i16 @llvm.ptx.read.ntid.x() +declare i16 @llvm.ptx.read.ntid.y() +declare i16 @llvm.ptx.read.ntid.z() +declare i16 @llvm.ptx.read.ntid.w() +declare i16 @llvm.ptx.read.ctaid.x() +declare i16 @llvm.ptx.read.ctaid.y() +declare i16 @llvm.ptx.read.ctaid.z() +declare i16 @llvm.ptx.read.ctaid.w() +declare i16 @llvm.ptx.read.nctaid.x() +declare i16 @llvm.ptx.read.nctaid.y() +declare i16 @llvm.ptx.read.nctaid.z() +declare i16 @llvm.ptx.read.nctaid.w() declare void @llvm.ptx.bar.sync(i32 %i) -- cgit v1.2.3