summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2012-11-06 19:33:53 +0000
committerNadav Rotem <nrotem@apple.com>2012-11-06 19:33:53 +0000
commitb042868c01925dae3a1032890f591f1da78b19d3 (patch)
tree38cb8d56122a6e2eb42f2fccafe210bcec458225
parenta082892fb734f4738f5e687778997706fa0dd65a (diff)
downloadllvm-b042868c01925dae3a1032890f591f1da78b19d3.tar.gz
llvm-b042868c01925dae3a1032890f591f1da78b19d3.tar.bz2
llvm-b042868c01925dae3a1032890f591f1da78b19d3.tar.xz
Cost Model: add tables for some avx type-conversion hacks.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167480 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp60
-rw-r--r--lib/Target/X86/X86ISelLowering.h3
-rw-r--r--test/Analysis/CostModel/X86/cast.ll32
-rw-r--r--test/Transforms/LoopVectorize/X86/cost-model.ll2
4 files changed, 94 insertions, 3 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6b5466d70b..5853b5109c 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -17526,6 +17526,23 @@ int FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) {
return -1;
}
+struct X86TypeConversionCostTblEntry {
+ int ISD;
+ MVT Dst;
+ MVT Src;
+ unsigned Cost;
+};
+
+int FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
+ int ISD, MVT Dst, MVT Src) {
+ for (unsigned int i = 0; i < len; ++i)
+ if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst)
+ return i;
+
+ // Could not find an entry.
+ return -1;
+}
+
unsigned
X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
Type *Ty) const {
@@ -17535,8 +17552,7 @@ X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
int ISD = InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- const X86Subtarget &ST =
- TLI->getTargetMachine().getSubtarget<X86Subtarget>();
+ const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
static const X86CostTblEntry AVX1CostTable[] = {
// We don't have to scalarize unsupported ops. We can issue two half-sized
@@ -17647,5 +17663,45 @@ unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode,
return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
+unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode,
+ Type *Dst,
+ Type *Src) const {
+ int ISD = InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ EVT SrcTy = TLI->getValueType(Src);
+ EVT DstTy = TLI->getValueType(Dst);
+
+ if (!SrcTy.isSimple() || !DstTy.isSimple())
+ return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src);
+
+ const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
+
+ static const X86TypeConversionCostTblEntry AVXConversionTbl[] = {
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
+ { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
+ };
+ if (ST.hasAVX()) {
+ int Idx = FindInConvertTable(AVXConversionTbl,
+ array_lengthof(AVXConversionTbl),
+ ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return AVXConversionTbl[Idx].Cost;
+ }
+
+ return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src);
+}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 40dfaa0d43..c6354b0801 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -960,6 +960,9 @@ namespace llvm {
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy) const;
+
+ virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
};
}
diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll
index f8b1114a7c..298d5c66ad 100644
--- a/test/Analysis/CostModel/X86/cast.ll
+++ b/test/Analysis/CostModel/X86/cast.ll
@@ -32,3 +32,35 @@ define i32 @add(i32 %arg) {
ret i32 undef
}
+define i32 @zext_sext(<8 x i1> %in) {
+ ;CHECK: cost of 6 {{.*}} zext
+ %Z = zext <8 x i1> %in to <8 x i32>
+ ;CHECK: cost of 9 {{.*}} sext
+ %S = sext <8 x i1> %in to <8 x i32>
+
+ ;CHECK: cost of 1 {{.*}} sext
+ %A = sext <8 x i16> undef to <8 x i32>
+ ;CHECK: cost of 1 {{.*}} zext
+ %B = zext <8 x i16> undef to <8 x i32>
+ ;CHECK: cost of 1 {{.*}} sext
+ %C = sext <4 x i32> undef to <4 x i64>
+
+ ;CHECK: cost of 1 {{.*}} zext
+ %D = zext <4 x i32> undef to <4 x i64>
+ ;CHECK: cost of 1 {{.*}} trunc
+
+ %E = trunc <4 x i64> undef to <4 x i32>
+ ;CHECK: cost of 1 {{.*}} trunc
+ %F = trunc <8 x i32> undef to <8 x i16>
+
+ ret i32 undef
+}
+
+define i32 @masks(<8 x i1> %in) {
+ ;CHECK: cost of 6 {{.*}} zext
+ %Z = zext <8 x i1> %in to <8 x i32>
+ ;CHECK: cost of 9 {{.*}} sext
+ %S = sext <8 x i1> %in to <8 x i32>
+ ret i32 undef
+}
+
diff --git a/test/Transforms/LoopVectorize/X86/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll
index 40e660855b..628f9912c8 100644
--- a/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
@a = common global [2048 x i32] zeroinitializer, align 16
;CHECK: cost_model_1
-;CHECK-NOT: <4 x i32>
+;CHECK: <4 x i32>
;CHECK: ret void
define void @cost_model_1() nounwind uwtable noinline ssp {
entry: