Revert "LoopVectorizer: Only allow vectorization of intrinsics."

Revert 191122 - with extra checks we are allowed to vectorize math library function calls. Standard library indentifiers are reserved names so functions with external linkage must not overrided them. However, functions with internal linkage can. Therefore, we can vectorize calls to math library functions with a check for external linkage and matching signature. This matches what we do during SelectionDAG building. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191206 91177308-0d34-0410-b5e6-96231b3b80d8
author: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-09-23 14:54:39 +0000
committer: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-09-23 14:54:39 +0000
commit: 4e7b015a4a724782bf37284c3c74f2540cf8f3e1 (patch)
tree: ed7396669c87d8a20a7739484e28d11420faaaaf
parent: 2bbb2d4576ec821c5a5a1648d4ccffc920ed2449 (diff)
download: llvm-4e7b015a4a724782bf37284c3c74f2540cf8f3e1.tar.gz
llvm-4e7b015a4a724782bf37284c3c74f2540cf8f3e1.tar.bz2
llvm-4e7b015a4a724782bf37284c3c74f2540cf8f3e1.tar.xz
2 files changed, 99 insertions, 49 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 02029e6ae0..e3bae02c7f 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1809,6 +1809,31 @@ LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
   }
 }
 
+static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
+                                              Intrinsic::ID ValidIntrinsicID) {
+  if (I.getNumArgOperands() != 1 ||
+      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+      I.getType() != I.getArgOperand(0)->getType() ||
+      !I.onlyReadsMemory())
+    return Intrinsic::not_intrinsic;
+
+  return ValidIntrinsicID;
+}
+
+static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I,
+                                               Intrinsic::ID ValidIntrinsicID) {
+  if (I.getNumArgOperands() != 2 ||
+      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+      !I.getArgOperand(1)->getType()->isFloatingPointTy() ||
+      I.getType() != I.getArgOperand(0)->getType() ||
+      I.getType() != I.getArgOperand(1)->getType() ||
+      !I.onlyReadsMemory())
+    return Intrinsic::not_intrinsic;
+
+  return ValidIntrinsicID;
+}
+
+
 static Intrinsic::ID
 getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
   // If we have an intrinsic call, check if it is trivially vectorizable.
@@ -1847,8 +1872,9 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
   LibFunc::Func Func;
   Function *F = CI->getCalledFunction();
   // We're going to make assumptions on the semantics of the functions, check
-  // that the target knows that it's available in this environment.
-  if (!F || !TLI->getLibFunc(F->getName(), Func))
+  // that the target knows that it's available in this environment and it does
+  // not have local linkage.
+  if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
     return Intrinsic::not_intrinsic;
 
   // Otherwise check if we have a call to a function that can be turned into a
@@ -1859,67 +1885,67 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
   case LibFunc::sin:
   case LibFunc::sinf:
   case LibFunc::sinl:
-    return Intrinsic::sin;
+    return checkUnaryFloatSignature(*CI, Intrinsic::sin);
   case LibFunc::cos:
   case LibFunc::cosf:
   case LibFunc::cosl:
-    return Intrinsic::cos;
+    return checkUnaryFloatSignature(*CI, Intrinsic::cos);
   case LibFunc::exp:
   case LibFunc::expf:
   case LibFunc::expl:
-    return Intrinsic::exp;
+    return checkUnaryFloatSignature(*CI, Intrinsic::exp);
   case LibFunc::exp2:
   case LibFunc::exp2f:
   case LibFunc::exp2l:
-    return Intrinsic::exp2;
+    return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
   case LibFunc::log:
   case LibFunc::logf:
   case LibFunc::logl:
-    return Intrinsic::log;
+    return checkUnaryFloatSignature(*CI, Intrinsic::log);
   case LibFunc::log10:
   case LibFunc::log10f:
   case LibFunc::log10l:
-    return Intrinsic::log10;
+    return checkUnaryFloatSignature(*CI, Intrinsic::log10);
   case LibFunc::log2:
   case LibFunc::log2f:
   case LibFunc::log2l:
-    return Intrinsic::log2;
+    return checkUnaryFloatSignature(*CI, Intrinsic::log2);
   case LibFunc::fabs:
   case LibFunc::fabsf:
   case LibFunc::fabsl:
-    return Intrinsic::fabs;
+    return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
   case LibFunc::copysign:
   case LibFunc::copysignf:
   case LibFunc::copysignl:
-    return Intrinsic::copysign;
+    return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
   case LibFunc::floor:
   case LibFunc::floorf:
   case LibFunc::floorl:
-    return Intrinsic::floor;
+    return checkUnaryFloatSignature(*CI, Intrinsic::floor);
   case LibFunc::ceil:
   case LibFunc::ceilf:
   case LibFunc::ceill:
-    return Intrinsic::ceil;
+    return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
   case LibFunc::trunc:
   case LibFunc::truncf:
   case LibFunc::truncl:
-    return Intrinsic::trunc;
+    return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
   case LibFunc::rint:
   case LibFunc::rintf:
   case LibFunc::rintl:
-    return Intrinsic::rint;
+    return checkUnaryFloatSignature(*CI, Intrinsic::rint);
   case LibFunc::nearbyint:
   case LibFunc::nearbyintf:
   case LibFunc::nearbyintl:
-    return Intrinsic::nearbyint;
+    return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
   case LibFunc::round:
   case LibFunc::roundf:
   case LibFunc::roundl:
-    return Intrinsic::round;
+    return checkUnaryFloatSignature(*CI, Intrinsic::round);
   case LibFunc::pow:
   case LibFunc::powf:
   case LibFunc::powl:
-    return Intrinsic::pow;
+    return checkBinaryFloatSignature(*CI, Intrinsic::pow);
   }
 
   return Intrinsic::not_intrinsic;
@@ -2925,18 +2951,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       // We still don't handle functions. However, we can ignore dbg intrinsic
       // calls and we do handle certain intrinsic and libm functions.
       CallInst *CI = dyn_cast<CallInst>(it);
-      if (CI) {
+      if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
         DEBUG(dbgs() << "LV: Found a call site.\n");
-
-        if (!isa<IntrinsicInst>(it)) {
-          DEBUG(dbgs() << "LV: We only vectorize intrinsics.\n");
-          return false;
-        }
-
-        if (!getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
-          DEBUG(dbgs() << "LV: Found an unknown intrinsic.\n");
-          return false;
-        }
+        return false;
       }
 
       // Check that the instruction return type is vectorizable.
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
index 99d6646e67..c3d570c03a 100644
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -1018,7 +1018,7 @@ for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
   %0 = load float* %arrayidx, align 4
-  %call = tail call float @llvm.fabs.f32(float %0) nounwind readnone
+  %call = tail call float @fabsf(float %0) nounwind readnone
   store float %call, float* %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -1029,31 +1029,64 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
+declare float @fabsf(float) nounwind readnone
+
 declare double @llvm.pow.f64(double, double) nounwind readnone
 
 
-;CHECK: @not_intrin
-;CHECK: @round
-;CHECK-NOT: @round
-;CHECK: ret
-define void @not_intrin(i32* nocapture %A) nounwind ssp uwtable {
-  br label %1
-
-; <label>:1                                       ; preds = %1, %0
-  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
-  %3 = load i32* %2, align 4
-  %4 = add nsw i32 %3, 3
-  store i32 %4, i32* %2, align 4
-  %5 = trunc i64 %indvars.iv to i32
-  tail call void @round(i32 %5) nounwind
+
+; Make sure we don't replace calls to functions with standard library function
+; signatures but defined with internal linkage.
+
+define internal float @roundf(float %x) nounwind readnone {
+  ret float 0.00000000
+}
+; CHECK-LABEL: internal_round
+; CHECK-NOT:  load <4 x float>
+
+define void @internal_round(float* nocapture %x) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %call = tail call float @roundf(float %0) nounwind readnone
+  store float %call, float* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Make sure we don't replace calls to functions with standard library names but
+; different signatures.
+
+declare void @round(double %f)
+
+; CHECK-LABEL: wrong_signature
+; CHECK-NOT:  load <4 x double>
+
+define void @wrong_signature(double* nocapture %x) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double* %x, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 4
+  store double %0, double* %arrayidx, align 4
+  tail call void @round(double %0) nounwind readnone
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, 256
-  br i1 %exitcond, label %6, label %1
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
 
-; <label>:6                                       ; preds = %1
+for.end:                                          ; preds = %for.body
   ret void
 }
 
-declare void @round(i32)
author	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-09-23 14:54:39 +0000
committer	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-09-23 14:54:39 +0000
commit	4e7b015a4a724782bf37284c3c74f2540cf8f3e1 (patch)
tree	ed7396669c87d8a20a7739484e28d11420faaaaf
parent	2bbb2d4576ec821c5a5a1648d4ccffc920ed2449 (diff)
download	llvm-4e7b015a4a724782bf37284c3c74f2540cf8f3e1.tar.gz llvm-4e7b015a4a724782bf37284c3c74f2540cf8f3e1.tar.bz2 llvm-4e7b015a4a724782bf37284c3c74f2540cf8f3e1.tar.xz