summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEli Bendersky <eliben@google.com>2014-03-18 23:51:07 +0000
committerEli Bendersky <eliben@google.com>2014-03-18 23:51:07 +0000
commit21354ec60d0dde4f1995d816701b895c57f806bd (patch)
treec00e675912eb5ddca01b813594beb3b9d03dd719
parentcae25dcbf7347d1a04f8746aedd6d6600b528a40 (diff)
downloadllvm-21354ec60d0dde4f1995d816701b895c57f806bd.tar.gz
llvm-21354ec60d0dde4f1995d816701b895c57f806bd.tar.bz2
llvm-21354ec60d0dde4f1995d816701b895c57f806bd.tar.xz
Expose "noduplicate" attribute as a property for intrinsics.
The "noduplicate" function attribute exists to prevent certain optimizations from duplicating calls to the function. This is important on platforms where certain function call duplications are unsafe (for example execution barriers for CUDA and OpenCL). This patch makes it possible to specify intrinsics as "noduplicate" and translates that to the appropriate function attribute. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204200 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/IR/Intrinsics.td4
-rw-r--r--include/llvm/IR/IntrinsicsNVVM.td10
-rw-r--r--test/CodeGen/NVPTX/noduplicate-syncthreads.ll74
-rw-r--r--test/Feature/intrinsic-noduplicate.ll9
-rw-r--r--utils/TableGen/CodeGenIntrinsics.h3
-rw-r--r--utils/TableGen/CodeGenTarget.cpp3
-rw-r--r--utils/TableGen/IntrinsicEmitter.cpp12
7 files changed, 109 insertions, 6 deletions
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index 755c9c2b1f..94c7b50ac8 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -69,6 +69,10 @@ class ReadNone<int argNo> : IntrinsicProperty {
def IntrNoReturn : IntrinsicProperty;
+// IntrNoduplicate - Calls to this intrinsic cannot be duplicated.
+// Parallels the noduplicate attribute on LLVM IR functions.
+def IntrNoDuplicate : IntrinsicProperty;
+
//===----------------------------------------------------------------------===//
// Types used by intrinsics.
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td
index a372c22e43..7f72ce8b66 100644
--- a/include/llvm/IR/IntrinsicsNVVM.td
+++ b/include/llvm/IR/IntrinsicsNVVM.td
@@ -730,15 +730,15 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
// Bar.Sync
def int_cuda_syncthreads : GCCBuiltin<"__syncthreads">,
- Intrinsic<[], [], []>;
+ Intrinsic<[], [], [IntrNoDuplicate]>;
def int_nvvm_barrier0 : GCCBuiltin<"__nvvm_bar0">,
- Intrinsic<[], [], []>;
+ Intrinsic<[], [], [IntrNoDuplicate]>;
def int_nvvm_barrier0_popc : GCCBuiltin<"__nvvm_bar0_popc">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoDuplicate]>;
def int_nvvm_barrier0_and : GCCBuiltin<"__nvvm_bar0_and">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoDuplicate]>;
def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoDuplicate]>;
// Membar
def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
diff --git a/test/CodeGen/NVPTX/noduplicate-syncthreads.ll b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
new file mode 100644
index 0000000000..64745fcba3
--- /dev/null
+++ b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+
+; Make sure the call to syncthreads is not duplicate here by the LLVM
+; optimizations, because it has the noduplicate attribute set.
+
+; CHECK: call void @llvm.cuda.syncthreads
+; CHECK-NOT: call void @llvm.cuda.syncthreads
+
+; Function Attrs: nounwind
+define void @foo(float* %output) #1 {
+entry:
+ %output.addr = alloca float*, align 8
+ store float* %output, float** %output.addr, align 8
+ %0 = load float** %output.addr, align 8
+ %arrayidx = getelementptr inbounds float* %0, i64 0
+ %1 = load float* %arrayidx, align 4
+ %conv = fpext float %1 to double
+ %cmp = fcmp olt double %conv, 1.000000e+01
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %2 = load float** %output.addr, align 8
+ %3 = load float* %2, align 4
+ %conv1 = fpext float %3 to double
+ %add = fadd double %conv1, 1.000000e+00
+ %conv2 = fptrunc double %add to float
+ store float %conv2, float* %2, align 4
+ br label %if.end
+
+if.else: ; preds = %entry
+ %4 = load float** %output.addr, align 8
+ %5 = load float* %4, align 4
+ %conv3 = fpext float %5 to double
+ %add4 = fadd double %conv3, 2.000000e+00
+ %conv5 = fptrunc double %add4 to float
+ store float %conv5, float* %4, align 4
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ call void @llvm.cuda.syncthreads()
+ %6 = load float** %output.addr, align 8
+ %arrayidx6 = getelementptr inbounds float* %6, i64 0
+ %7 = load float* %arrayidx6, align 4
+ %conv7 = fpext float %7 to double
+ %cmp8 = fcmp olt double %conv7, 1.000000e+01
+ br i1 %cmp8, label %if.then9, label %if.else13
+
+if.then9: ; preds = %if.end
+ %8 = load float** %output.addr, align 8
+ %9 = load float* %8, align 4
+ %conv10 = fpext float %9 to double
+ %add11 = fadd double %conv10, 3.000000e+00
+ %conv12 = fptrunc double %add11 to float
+ store float %conv12, float* %8, align 4
+ br label %if.end17
+
+if.else13: ; preds = %if.end
+ %10 = load float** %output.addr, align 8
+ %11 = load float* %10, align 4
+ %conv14 = fpext float %11 to double
+ %add15 = fadd double %conv14, 4.000000e+00
+ %conv16 = fptrunc double %add15 to float
+ store float %conv16, float* %10, align 4
+ br label %if.end17
+
+if.end17: ; preds = %if.else13, %if.then9
+ ret void
+}
+
+; Function Attrs: noduplicate nounwind
+declare void @llvm.cuda.syncthreads() #2
+
+!0 = metadata !{void (float*)* @foo, metadata !"kernel", i32 1}
+!1 = metadata !{null, metadata !"align", i32 8}
diff --git a/test/Feature/intrinsic-noduplicate.ll b/test/Feature/intrinsic-noduplicate.ll
new file mode 100644
index 0000000000..9a2b0aba5b
--- /dev/null
+++ b/test/Feature/intrinsic-noduplicate.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Make sure LLVM knows about the noduplicate attribute on the
+; llvm.cuda.syncthreads intrinsic.
+
+declare void @llvm.cuda.syncthreads()
+
+; CHECK: declare void @llvm.cuda.syncthreads() #[[ATTRNUM:[0-9]+]]
+; CHECK: attributes #[[ATTRNUM]] = { noduplicate nounwind }
diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h
index edbb18bbcf..06daa97b66 100644
--- a/utils/TableGen/CodeGenIntrinsics.h
+++ b/utils/TableGen/CodeGenIntrinsics.h
@@ -73,6 +73,9 @@ namespace llvm {
/// canThrow - True if the intrinsic can throw.
bool canThrow;
+ /// isNoDuplicate - True if the intrinsic is marked as noduplicate.
+ bool isNoDuplicate;
+
/// isNoReturn - True if the intrinsic is no-return.
bool isNoReturn;
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 1f47675eb5..884af4c7cb 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -446,6 +446,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
isCommutative = false;
canThrow = false;
isNoReturn = false;
+ isNoDuplicate = false;
if (DefName.size() <= 4 ||
std::string(DefName.begin(), DefName.begin() + 4) != "int_")
@@ -570,6 +571,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
isCommutative = true;
else if (Property->getName() == "Throws")
canThrow = true;
+ else if (Property->getName() == "IntrNoDuplicate")
+ isNoDuplicate = true;
else if (Property->getName() == "IntrNoReturn")
isNoReturn = true;
else if (Property->isSubClassOf("NoCapture")) {
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index cf6934cb16..1b281288a4 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -502,6 +502,9 @@ struct AttributeComparator {
if (L->canThrow != R->canThrow)
return R->canThrow;
+ if (L->isNoDuplicate != R->isNoDuplicate)
+ return R->isNoDuplicate;
+
if (L->isNoReturn != R->isNoReturn)
return R->isNoReturn;
@@ -616,7 +619,8 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
ModRefKind modRef = getModRefKind(intrinsic);
- if (!intrinsic.canThrow || modRef || intrinsic.isNoReturn) {
+ if (!intrinsic.canThrow || modRef || intrinsic.isNoReturn ||
+ intrinsic.isNoDuplicate) {
OS << " const Attribute::AttrKind Atts[] = {";
bool addComma = false;
if (!intrinsic.canThrow) {
@@ -629,6 +633,12 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
OS << "Attribute::NoReturn";
addComma = true;
}
+ if (intrinsic.isNoDuplicate) {
+ if (addComma)
+ OS << ",";
+ OS << "Attribute::NoDuplicate";
+ addComma = true;
+ }
switch (modRef) {
case MRK_none: break;