summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp7
-rw-r--r--test/Transforms/LoopVectorize/funcall.ll32
2 files changed, 39 insertions, 0 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index f41bd28776..e6e0f6b6ae 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3520,6 +3520,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// but is not a load, then we quit. Notice that we don't handle function
// calls that read or write.
if (it->mayReadFromMemory()) {
+ // Many math library functions read the rounding mode. We will only
+ // vectorize a loop if it contains known function calls that don't set
+ // the flag. Therefore, it is safe to ignore this read from memory.
+ CallInst *Call = dyn_cast<CallInst>(it);
+ if (Call && getIntrinsicIDForCall(Call, TLI))
+ continue;
+
LoadInst *Ld = dyn_cast<LoadInst>(it);
if (!Ld) return false;
if (!Ld->isSimple() && !IsAnnotatedParallel) {
diff --git a/test/Transforms/LoopVectorize/funcall.ll b/test/Transforms/LoopVectorize/funcall.ll
new file mode 100644
index 0000000000..0fb929f65c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/funcall.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Make sure we can vectorize loops with functions to math library functions.
+; They might read the rounding mode but we are only vectorizing loops that
+; contain a limited set of function calls and none of them sets the rounding
+; mode, so vectorizing them is safe.
+
+; CHECK: test
+; CHECK: <2 x double>
+
+define void @test(double* %d, double %t) {
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double* %d, i64 %indvars.iv
+ %0 = load double* %arrayidx, align 8
+ %1 = tail call double @llvm.pow.f64(double %0, double %t)
+ store double %1, double* %arrayidx, align 8
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 128
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+declare double @llvm.pow.f64(double, double)