diff options
author | Tim Northover <tnorthover@apple.com> | 2014-02-06 18:18:36 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2014-02-06 18:18:36 +0000 |
commit | 0c245b69f7a6b9eaa1386f7c2251a47a253ced62 (patch) | |
tree | da0f2a7a9d37fa05b043a4a3f3505531a8facc43 /lib/Target/X86 | |
parent | 6984ee6aa262a0ab3b49a161066f4fab941a2ee6 (diff) | |
download | llvm-0c245b69f7a6b9eaa1386f7c2251a47a253ced62.tar.gz llvm-0c245b69f7a6b9eaa1386f7c2251a47a253ced62.tar.bz2 llvm-0c245b69f7a6b9eaa1386f7c2251a47a253ced62.tar.xz |
X86: add costs for 64-bit vector ext/trunc & rebalance
The most important part of this is probably adding any cost at all for
operations like zext <8 x i8> to <8 x i32>. Before they were being
recorded as extremely costly (24, I believe) which made LLVM fall back
on a 4-wide vectorisation of a loop.
It also rebalances the values for sext, zext and trunc. Lacking any
other sane metric that might work across CPU microarchitectures I went
for instructions. This seems to be in reasonable accord with the rest
of the table (sitofp, ...) though no doubt at least one value is
sub-optimal for some bizarre reason.
Finally, separate AVX and AVX2 values are provided where appropriate.
The CodeGen is quite different in many cases.
rdar://problem/15981990
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200928 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86TargetTransformInfo.cpp | 73 |
1 files changed, 58 insertions, 15 deletions
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 781be2fddd..207a7685c5 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -411,16 +411,58 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); static const TypeConversionCostTblEntry<MVT::SimpleValueType> + AVX2ConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 2 }, + { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 }, + { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 }, + }; + + static const TypeConversionCostTblEntry<MVT::SimpleValueType> AVXConversionTbl[] = { - { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, - { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, - { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, - { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, - { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 4 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 4 }, + + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 }, + { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 4 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 4 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 }, + { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 9 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 }, @@ -450,14 +492,15 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, - { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, }; + if (ST->hasAVX2()) { + int Idx = ConvertCostTableLookup(AVX2ConversionTbl, ISD, + DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + if (Idx != -1) + return AVX2ConversionTbl[Idx].Cost; + } + if (ST->hasAVX()) { int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); |