summaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
authorJustin Holewinski <jholewinski@nvidia.com>2013-06-28 17:58:07 +0000
committerJustin Holewinski <jholewinski@nvidia.com>2013-06-28 17:58:07 +0000
commit331ba2739d484b670000bd59b170fe1e993786d2 (patch)
treee2063393dd23f6e777e3d843a1191b1caff34b8b /lib/Target
parentef0ccc93203e99077632cec7a0a15b8e1b704aee (diff)
downloadllvm-331ba2739d484b670000bd59b170fe1e993786d2.tar.gz
llvm-331ba2739d484b670000bd59b170fe1e993786d2.tar.bz2
llvm-331ba2739d484b670000bd59b170fe1e993786d2.tar.xz
[NVPTX] Add support for cttz/ctlz/ctpop
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185176 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp16
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td58
2 files changed, 74 insertions, 0 deletions
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 338fe7c155..8877d131ea 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -216,6 +216,22 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
// Custom handling for i8 intrinsics
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
+ setOperationAction(ISD::CTLZ, MVT::i16, Legal);
+ setOperationAction(ISD::CTLZ, MVT::i32, Legal);
+ setOperationAction(ISD::CTLZ, MVT::i64, Legal);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal);
+ setOperationAction(ISD::CTTZ, MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i16, Legal);
+ setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+ setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+
// Now deduce the information based on the above mentioned
// actions
computeRegisterProperties();
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 32193641f2..553a6ba703 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2406,6 +2406,64 @@ def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
"mov.b64\t{{$d1, $d2}}, $s;",
[]>;
+// Count leading zeros
+def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
+ "clz.b32\t$d, $a;",
+ []>;
+def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "clz.b64\t$d, $a;",
+ []>;
+
+// 32-bit has a direct PTX instruction
+def : Pat<(ctlz Int32Regs:$a),
+ (CLZr32 Int32Regs:$a)>;
+def : Pat<(ctlz_zero_undef Int32Regs:$a),
+ (CLZr32 Int32Regs:$a)>;
+
+// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
+// to 64-bit to match the LLVM semantics
+def : Pat<(ctlz Int64Regs:$a),
+ (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
+def : Pat<(ctlz_zero_undef Int64Regs:$a),
+ (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
+
+// For 16-bit, we zero-extend to 32-bit, then trunc the result back
+// to 16-bits (ctlz of a 16-bit value is guaranteed to require less
+// than 16 bits to store). We also need to subtract 16 because the
+// high-order 16 zeros were counted.
+def : Pat<(ctlz Int16Regs:$a),
+ (SUBi16ri (CVT_u16_u32 (CLZr32
+ (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
+ CvtNONE), 16)>;
+def : Pat<(ctlz_zero_undef Int16Regs:$a),
+ (SUBi16ri (CVT_u16_u32 (CLZr32
+ (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
+ CvtNONE), 16)>;
+
+// Population count
+def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
+ "popc.b32\t$d, $a;",
+ []>;
+def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "popc.b64\t$d, $a;",
+ []>;
+
+// 32-bit has a direct PTX instruction
+def : Pat<(ctpop Int32Regs:$a),
+ (POPCr32 Int32Regs:$a)>;
+
+// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
+// to 64-bit to match the LLVM semantics
+def : Pat<(ctpop Int64Regs:$a),
+ (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
+
+// For 16-bit, we zero-extend to 32-bit, then trunc the result back
+// to 16-bits (ctpop of a 16-bit value is guaranteed to require less
+// than 16 bits to store)
+def : Pat<(ctpop Int16Regs:$a),
+ (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
+ CvtNONE)>;
+
// fround f64 -> f32
def : Pat<(f32 (fround Float64Regs:$a)),
(CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;