summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp76
-rw-r--r--test/Transforms/BBVectorize/X86/simple-int.ll79
2 files changed, 143 insertions, 12 deletions
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 4a2c878195..f9e9b3fbc4 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -642,7 +642,7 @@ namespace {
Function *F = I->getCalledFunction();
if (!F) return false;
- unsigned IID = F->getIntrinsicID();
+ Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
if (!IID) return false;
switch(IID) {
@@ -1020,14 +1020,67 @@ namespace {
// vectorized, the second arguments must be equal.
CallInst *CI = dyn_cast<CallInst>(I);
Function *FI;
- if (CI && (FI = CI->getCalledFunction()) &&
- FI->getIntrinsicID() == Intrinsic::powi) {
-
- Value *A1I = CI->getArgOperand(1),
- *A1J = cast<CallInst>(J)->getArgOperand(1);
- const SCEV *A1ISCEV = SE->getSCEV(A1I),
- *A1JSCEV = SE->getSCEV(A1J);
- return (A1ISCEV == A1JSCEV);
+ if (CI && (FI = CI->getCalledFunction())) {
+ Intrinsic::ID IID = (Intrinsic::ID) FI->getIntrinsicID();
+ if (IID == Intrinsic::powi) {
+ Value *A1I = CI->getArgOperand(1),
+ *A1J = cast<CallInst>(J)->getArgOperand(1);
+ const SCEV *A1ISCEV = SE->getSCEV(A1I),
+ *A1JSCEV = SE->getSCEV(A1J);
+ return (A1ISCEV == A1JSCEV);
+ }
+
+ if (IID && VTTI) {
+ SmallVector<Type*, 4> Tys;
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+ Tys.push_back(CI->getArgOperand(i)->getType());
+ unsigned ICost = VTTI->getIntrinsicInstrCost(IID, IT1, Tys);
+
+ Tys.clear();
+ CallInst *CJ = cast<CallInst>(J);
+ for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i)
+ Tys.push_back(CJ->getArgOperand(i)->getType());
+ unsigned JCost = VTTI->getIntrinsicInstrCost(IID, JT1, Tys);
+
+ Tys.clear();
+ assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
+ "Intrinsic argument counts differ");
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ if (IID == Intrinsic::powi && i == 1)
+ Tys.push_back(CI->getArgOperand(i)->getType());
+ else
+ Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(),
+ CJ->getArgOperand(i)->getType()));
+ }
+
+ Type *RetTy = getVecTypeForPair(IT1, JT1);
+ unsigned VCost = VTTI->getIntrinsicInstrCost(IID, RetTy, Tys);
+
+ if (VCost > ICost + JCost)
+ return false;
+
+ // We don't want to fuse to a type that will be split, even
+ // if the two input types will also be split and there is no other
+ // associated cost.
+ unsigned RetParts = VTTI->getNumberOfParts(RetTy);
+ if (RetParts > 1)
+ return false;
+ else if (!RetParts && VCost == ICost + JCost)
+ return false;
+
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ if (!Tys[i]->isVectorTy())
+ continue;
+
+ unsigned NumParts = VTTI->getNumberOfParts(Tys[i]);
+ if (NumParts > 1)
+ return false;
+ else if (!NumParts && VCost == ICost + JCost)
+ return false;
+ }
+
+ CostSavings = ICost + JCost - VCost;
+ }
}
return true;
@@ -2551,7 +2604,7 @@ namespace {
continue;
} else if (isa<CallInst>(I)) {
Function *F = cast<CallInst>(I)->getCalledFunction();
- unsigned IID = F->getIntrinsicID();
+ Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
if (o == NumOperands-1) {
BasicBlock &BB = *I->getParent();
@@ -2560,8 +2613,7 @@ namespace {
Type *ArgTypeJ = J->getType();
Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
- ReplacedOperands[o] = Intrinsic::getDeclaration(M,
- (Intrinsic::ID) IID, VArgType);
+ ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType);
continue;
} else if (IID == Intrinsic::powi && o == 1) {
// The second argument of powi is a single integer and we've already
diff --git a/test/Transforms/BBVectorize/X86/simple-int.ll b/test/Transforms/BBVectorize/X86/simple-int.ll
new file mode 100644
index 0000000000..f5dbe46b14
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/simple-int.ll
@@ -0,0 +1,79 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+declare double @llvm.fma.f64(double, double, double)
+declare double @llvm.fmuladd.f64(double, double, double)
+declare double @llvm.cos.f64(double)
+declare double @llvm.powi.f64(double, i32)
+
+; Basic depth-3 chain with fma
+define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+ %Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
+ %Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
+ %Z1 = fadd double %Y1, %B1
+ %Z2 = fadd double %Y2, %B2
+ %R = fmul double %Z1, %Z2
+ ret double %R
+; CHECK: @test1
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with fmuladd
+define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+ %Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
+ %Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
+ %Z1 = fadd double %Y1, %B1
+ %Z2 = fadd double %Y2, %B2
+ %R = fmul double %Z1, %Z2
+ ret double %R
+; CHECK: @test1a
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with cos
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+ %Y1 = call double @llvm.cos.f64(double %X1)
+ %Y2 = call double @llvm.cos.f64(double %X2)
+ %Z1 = fadd double %Y1, %B1
+ %Z2 = fadd double %Y2, %B2
+ %R = fmul double %Z1, %Z2
+ ret double %R
+; CHECK: @test2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi
+define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+ %Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+ %Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
+ %Z1 = fadd double %Y1, %B1
+ %Z2 = fadd double %Y2, %B2
+ %R = fmul double %Z1, %Z2
+ ret double %R
+; CHECK: @test3
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi (different powers: should not vectorize)
+define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+ %P2 = add i32 %P, 1
+ %Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+ %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
+ %Z1 = fadd double %Y1, %B1
+ %Z2 = fadd double %Y2, %B2
+ %R = fmul double %Z1, %Z2
+ ret double %R
+; CHECK: @test4
+; CHECK: ret double %R
+}
+