From 86cb795388643710dab34941ddcb5a9470ac39d8 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Sat, 1 Feb 2014 01:37:30 +0000 Subject: Revert "[SLPV] Recognize vectorizable intrinsics during SLP vectorization ..." This reverts commit r200576. It broke 32-bit self-host builds by vectorizing two calls to @llvm.bswap.i64, which we then fail to expand. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200602 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/SLPVectorizer.cpp | 89 +------------------------- test/Transforms/SLPVectorizer/X86/intrinsic.ll | 75 ---------------------- 2 files changed, 3 insertions(+), 161 deletions(-) delete mode 100644 test/Transforms/SLPVectorizer/X86/intrinsic.ll diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0dec4b05f9..80d9ffccaf 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -947,39 +947,6 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { buildTree_rec(Operands, Depth + 1); return; } - case Instruction::Call: { - // Check if the calls are all to the same vectorizable intrinsic. - IntrinsicInst *II = dyn_cast(VL[0]); - if (II==NULL) { - newTreeEntry(VL, false); - DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); - return; - } - - Intrinsic::ID ID = II->getIntrinsicID(); - - for (unsigned i = 1, e = VL.size(); i != e; ++i) { - IntrinsicInst *II2 = dyn_cast(VL[i]); - if (!II2 || II2->getIntrinsicID() != ID) { - newTreeEntry(VL, false); - DEBUG(dbgs() << "SLP: mismatched calls:" << *II << "!=" << *VL[i] - << "\n"); - return; - } - } - - newTreeEntry(VL, true); - for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) { - ValueList Operands; - // Prepare the operand vector. - for (unsigned j = 0; j < VL.size(); ++j) { - IntrinsicInst *II2 = dyn_cast(VL[j]); - Operands.push_back(II2->getArgOperand(i)); - } - buildTree_rec(Operands, Depth + 1); - } - return; - } default: newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); @@ -1105,30 +1072,6 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { int VecStCost = TTI->getMemoryOpCost(Instruction::Store, VecTy, 1, 0); return VecStCost - ScalarStCost; } - case Instruction::Call: { - CallInst *CI = cast(VL0); - IntrinsicInst *II = cast(CI); - Intrinsic::ID ID = II->getIntrinsicID(); - - // Calculate the cost of the scalar and vector calls. - SmallVector ScalarTys, VecTys; - for (unsigned op = 0, opc = II->getNumArgOperands(); op!= opc; ++op) { - ScalarTys.push_back(CI->getArgOperand(op)->getType()); - VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(), - VecTy->getNumElements())); - } - - int ScalarCallCost = VecTy->getNumElements() * - TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys); - - int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys); - - DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost - << " (" << VecCallCost << "-" << ScalarCallCost << ")" - << " for " << *II << "\n"); - - return VecCallCost - ScalarCallCost; - } default: llvm_unreachable("Unknown instruction"); } @@ -1143,10 +1086,10 @@ bool BoUpSLP::isFullyVectorizableTinyTree() { return false; // Gathering cost would be too much for tiny trees. - if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather) - return false; + if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather) + return false; - return true; + return true; } int BoUpSLP::getTreeCost() { @@ -1612,32 +1555,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { E->VectorizedValue = S; return propagateMetadata(S, E->Scalars); } - case Instruction::Call: { - CallInst *CI = cast(VL0); - - setInsertPointAfterBundle(E->Scalars); - std::vector OpVecs; - for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) { - ValueList OpVL; - for (int i = 0, e = E->Scalars.size(); i < e; ++i) { - CallInst *CEI = cast(E->Scalars[i]); - OpVL.push_back(CEI->getArgOperand(j)); - } - - Value *OpVec = vectorizeTree(OpVL); - DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n"); - OpVecs.push_back(OpVec); - } - - Module *M = F->getParent(); - IntrinsicInst *II = cast(CI); - Intrinsic::ID ID = II->getIntrinsicID(); - Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) }; - Function *CF = Intrinsic::getDeclaration(M, ID, Tys); - Value *V = Builder.CreateCall(CF, OpVecs); - E->VectorizedValue = V; - return V; - } default: llvm_unreachable("unknown inst"); } diff --git a/test/Transforms/SLPVectorizer/X86/intrinsic.ll b/test/Transforms/SLPVectorizer/X86/intrinsic.ll deleted file mode 100644 index 2b7ee754df..0000000000 --- a/test/Transforms/SLPVectorizer/X86/intrinsic.ll +++ /dev/null @@ -1,75 +0,0 @@ -; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-999 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.8.0" - -declare double @llvm.fabs.f64(double) nounwind readnone - -;CHECK-LABEL: @vec_fabs_f64( -;CHECK: load <2 x double> -;CHECK: load <2 x double> -;CHECK: call <2 x double> @llvm.fabs.v2f64 -;CHECK: store <2 x double> -;CHECK: ret -define void @vec_fabs_f64(double* %a, double* %b, double* %c) { -entry: - %i0 = load double* %a, align 8 - %i1 = load double* %b, align 8 - %mul = fmul double %i0, %i1 - %call = tail call double @llvm.fabs.f64(double %mul) nounwind readnone - %arrayidx3 = getelementptr inbounds double* %a, i64 1 - %i3 = load double* %arrayidx3, align 8 - %arrayidx4 = getelementptr inbounds double* %b, i64 1 - %i4 = load double* %arrayidx4, align 8 - %mul5 = fmul double %i3, %i4 - %call5 = tail call double @llvm.fabs.f64(double %mul5) nounwind readnone - store double %call, double* %c, align 8 - %arrayidx5 = getelementptr inbounds double* %c, i64 1 - store double %call5, double* %arrayidx5, align 8 - ret void -} - -declare float @llvm.copysign.f32(float, float) nounwind readnone - -;CHECK-LABEL: @vec_copysign_f32( -;CHECK: load <4 x float> -;CHECK: load <4 x float> -;CHECK: call <4 x float> @llvm.copysign.v4f32 -;CHECK: store <4 x float> -;CHECK: ret -define void @vec_copysign_f32(float* %a, float* %b, float* noalias %c) { -entry: - %0 = load float* %a, align 4 - %1 = load float* %b, align 4 - %call0 = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone - store float %call0, float* %c, align 4 - - %ix2 = getelementptr inbounds float* %a, i64 1 - %2 = load float* %ix2, align 4 - %ix3 = getelementptr inbounds float* %b, i64 1 - %3 = load float* %ix3, align 4 - %call1 = tail call float @llvm.copysign.f32(float %2, float %3) nounwind readnone - %c1 = getelementptr inbounds float* %c, i64 1 - store float %call1, float* %c1, align 4 - - %ix4 = getelementptr inbounds float* %a, i64 2 - %4 = load float* %ix4, align 4 - %ix5 = getelementptr inbounds float* %b, i64 2 - %5 = load float* %ix5, align 4 - %call2 = tail call float @llvm.copysign.f32(float %4, float %5) nounwind readnone - %c2 = getelementptr inbounds float* %c, i64 2 - store float %call2, float* %c2, align 4 - - %ix6 = getelementptr inbounds float* %a, i64 3 - %6 = load float* %ix6, align 4 - %ix7 = getelementptr inbounds float* %b, i64 3 - %7 = load float* %ix7, align 4 - %call3 = tail call float @llvm.copysign.f32(float %6, float %7) nounwind readnone - %c3 = getelementptr inbounds float* %c, i64 3 - store float %call3, float* %c3, align 4 - - ret void -} - - - -- cgit v1.2.3