summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2013-04-21 08:05:59 +0000
committerNadav Rotem <nrotem@apple.com>2013-04-21 08:05:59 +0000
commitb86dff862f6ff434a903f99b36d73a5e5280d9e4 (patch)
tree240521ba2b36f03dc0fff93f0a82a510200bcedc
parentbd65805016562bd0eed157123e9998d8916c3088 (diff)
downloadllvm-b86dff862f6ff434a903f99b36d73a5e5280d9e4.tar.gz
llvm-b86dff862f6ff434a903f99b36d73a5e5280d9e4.tar.bz2
llvm-b86dff862f6ff434a903f99b36d73a5e5280d9e4.tar.xz
SLPVectorize: Add support for vectorization of casts.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179975 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Vectorize/VecUtils.cpp69
-rw-r--r--test/Transforms/SLPVectorizer/X86/cast.ll38
2 files changed, 107 insertions, 0 deletions
diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp
index 7701d080ff..9b9436683b 100644
--- a/lib/Transforms/Vectorize/VecUtils.cpp
+++ b/lib/Transforms/Vectorize/VecUtils.cpp
@@ -328,6 +328,18 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
switch (Opcode) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -445,6 +457,41 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
switch (Opcode) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ int Cost = 0;
+ ValueList Operands;
+ Type *SrcTy = VL0->getOperand(0)->getType();
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+ // Check that the casted type is the same for all users.
+ if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
+ return getScalarizationCost(VecTy);
+ }
+
+ Cost += getTreeCost_rec(Operands, Depth+1);
+ if (Cost >= max_cost) return max_cost;
+
+ // Calculate the cost of this instruction.
+ int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
+ VL0->getType(), SrcTy);
+
+ VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
+ int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
+ Cost += (VecCost - ScalarCost);
+ return Cost;
+ }
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -583,6 +630,28 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
}
switch (Opcode) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ ValueList INVL;
+ for (int i = 0; i < VF; ++i)
+ INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+ Value *InVec = vectorizeTree_rec(INVL, VF);
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ CastInst *CI = dyn_cast<CastInst>(VL0);
+ Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
+ VectorizedValues[VL0] = V;
+ return V;
+ }
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
diff --git a/test/Transforms/SLPVectorizer/X86/cast.ll b/test/Transforms/SLPVectorizer/X86/cast.ll
new file mode 100644
index 0000000000..344dbbca2c
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/cast.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; int foo(int * restrict A, char * restrict B) {
+; A[0] = B[0];
+; A[1] = B[1];
+; A[2] = B[2];
+; A[3] = B[3];
+; }
+;CHECK: @foo
+;CHECK: load <4 x i8>
+;CHECK: sext
+;CHECK: store <4 x i32>
+define i32 @foo(i32* noalias nocapture %A, i8* noalias nocapture %B) {
+entry:
+ %0 = load i8* %B, align 1
+ %conv = sext i8 %0 to i32
+ store i32 %conv, i32* %A, align 4
+ %arrayidx2 = getelementptr inbounds i8* %B, i64 1
+ %1 = load i8* %arrayidx2, align 1
+ %conv3 = sext i8 %1 to i32
+ %arrayidx4 = getelementptr inbounds i32* %A, i64 1
+ store i32 %conv3, i32* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds i8* %B, i64 2
+ %2 = load i8* %arrayidx5, align 1
+ %conv6 = sext i8 %2 to i32
+ %arrayidx7 = getelementptr inbounds i32* %A, i64 2
+ store i32 %conv6, i32* %arrayidx7, align 4
+ %arrayidx8 = getelementptr inbounds i8* %B, i64 3
+ %3 = load i8* %arrayidx8, align 1
+ %conv9 = sext i8 %3 to i32
+ %arrayidx10 = getelementptr inbounds i32* %A, i64 3
+ store i32 %conv9, i32* %arrayidx10, align 4
+ ret i32 undef
+}
+