summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-08-06 22:37:52 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-08-06 22:37:52 +0000
commit2d66d4cf42022239bfc9cd260a7b924400139cb5 (patch)
treecb821fd7e77aed0eb0fae4c9a825949a968c9873
parent014096e4d5e65309ca71d0e63327f5386ddf16fb (diff)
downloadllvm-2d66d4cf42022239bfc9cd260a7b924400139cb5.tar.gz
llvm-2d66d4cf42022239bfc9cd260a7b924400139cb5.tar.bz2
llvm-2d66d4cf42022239bfc9cd260a7b924400139cb5.tar.xz
LoopVectorize: Allow vectorization of loops with lifetime markers
Patch by Marc Jessome! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187825 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp3
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp26
-rw-r--r--test/Transforms/LoopVectorize/lifetime.ll96
3 files changed, 117 insertions, 8 deletions
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index cb12a40ff0..b48b81767e 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -452,6 +452,9 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::pow: ISD = ISD::FPOW; break;
case Intrinsic::fma: ISD = ISD::FMA; break;
case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add?
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ return 0;
}
const TargetLoweringBase *TLI = getTLI();
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index b93d9a065a..a62fedc43d 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1775,6 +1775,8 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
return II->getIntrinsicID();
default:
return Intrinsic::not_intrinsic;
@@ -2491,15 +2493,23 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
CallInst *CI = cast<CallInst>(it);
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
assert(ID && "Not an intrinsic call!");
- for (unsigned Part = 0; Part < UF; ++Part) {
- SmallVector<Value*, 4> Args;
- for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
- VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
- Args.push_back(Arg[Part]);
+ switch (ID) {
+ case Intrinsic::lifetime_end:
+ case Intrinsic::lifetime_start:
+ scalarizeInstruction(it);
+ break;
+ default:
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ SmallVector<Value *, 4> Args;
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
+ Args.push_back(Arg[Part]);
+ }
+ Type *Tys[] = { VectorType::get(CI->getType()->getScalarType(), VF) };
+ Function *F = Intrinsic::getDeclaration(M, ID, Tys);
+ Entry[Part] = Builder.CreateCall(F, Args);
}
- Type *Tys[] = { VectorType::get(CI->getType()->getScalarType(), VF) };
- Function *F = Intrinsic::getDeclaration(M, ID, Tys);
- Entry[Part] = Builder.CreateCall(F, Args);
+ break;
}
break;
}
diff --git a/test/Transforms/LoopVectorize/lifetime.ll b/test/Transforms/LoopVectorize/lifetime.ll
new file mode 100644
index 0000000000..87006ed065
--- /dev/null
+++ b/test/Transforms/LoopVectorize/lifetime.ll
@@ -0,0 +1,96 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Make sure we can vectorize loops which contain lifetime markers.
+
+; CHECK-LABEL: test
+; CHECK: call void @llvm.lifetime.end
+; CHECK: store <2 x i32>
+; CHECK: call void @llvm.lifetime.start
+
+define void @test(i32 *%d) {
+entry:
+ %arr = alloca [1024 x i32], align 16
+ %0 = bitcast [1024 x i32]* %arr to i8*
+ call void @llvm.lifetime.start(i64 4096, i8* %0) #1
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ call void @llvm.lifetime.end(i64 4096, i8* %0) #1
+ %arrayidx = getelementptr inbounds i32* %d, i64 %indvars.iv
+ %1 = load i32* %arrayidx, align 8
+ store i32 100, i32* %arrayidx, align 8
+ call void @llvm.lifetime.start(i64 4096, i8* %0) #1
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 128
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ call void @llvm.lifetime.end(i64 4096, i8* %0) #1
+ ret void
+}
+
+; CHECK-LABEL: testbitcast
+; CHECK: call void @llvm.lifetime.end
+; CHECK: store <2 x i32>
+; CHECK: call void @llvm.lifetime.start
+
+define void @testbitcast(i32 *%d) {
+entry:
+ %arr = alloca [1024 x i32], align 16
+ %0 = bitcast [1024 x i32]* %arr to i8*
+ call void @llvm.lifetime.start(i64 4096, i8* %0) #1
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %1 = bitcast [1024 x i32]* %arr to i8*
+ call void @llvm.lifetime.end(i64 4096, i8* %1) #1
+ %arrayidx = getelementptr inbounds i32* %d, i64 %indvars.iv
+ %2 = load i32* %arrayidx, align 8
+ store i32 100, i32* %arrayidx, align 8
+ call void @llvm.lifetime.start(i64 4096, i8* %1) #1
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 128
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ call void @llvm.lifetime.end(i64 4096, i8* %0) #1
+ ret void
+}
+
+; CHECK-LABEL: testloopvariant
+; CHECK: call void @llvm.lifetime.end
+; CHECK: store <2 x i32>
+; CHECK: call void @llvm.lifetime.start
+
+define void @testloopvariant(i32 *%d) {
+entry:
+ %arr = alloca [1024 x i32], align 16
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = getelementptr [1024 x i32]* %arr, i32 0, i64 %indvars.iv
+ %1 = bitcast [1024 x i32]* %arr to i8*
+ call void @llvm.lifetime.end(i64 4096, i8* %1) #1
+ %arrayidx = getelementptr inbounds i32* %d, i64 %indvars.iv
+ %2 = load i32* %arrayidx, align 8
+ store i32 100, i32* %arrayidx, align 8
+ call void @llvm.lifetime.start(i64 4096, i8* %1) #1
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 128
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1