X86 cost model: Vector shifts are expensive in most cases

The default logic does not correctly identify costs of casts because they are marked as custom on x86. For some cases, where the shift amount is a scalar we would be able to generate better code. Unfortunately, when this is the case the value (the splat) will get hoisted out of the loop, thereby making it invisible to ISel. radar://13130673 radar://13537826 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178703 91177308-0d34-0410-b5e6-96231b3b80d8
author: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-04-03 21:46:05 +0000
committer: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-04-03 21:46:05 +0000
commit: 6b6050b229976a2f53184f6d6857e6f445a869d0 (patch)
tree: 3f91e43d68b7aedf1ce51e9da9ff457b47f03ecb /lib
parent: d787a41b118a3724d1df87dc3d38cc3fddb3a145 (diff)
download: llvm-6b6050b229976a2f53184f6d6857e6f445a869d0.tar.gz
llvm-6b6050b229976a2f53184f6d6857e6f445a869d0.tar.bz2
llvm-6b6050b229976a2f53184f6d6857e6f445a869d0.tar.xz
1 files changed, 42 insertions, 0 deletions
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 2336035bea..a38b22bd78 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -182,6 +182,16 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
     { ISD::SRL,     MVT::v2i64,    1 },
     { ISD::SHL,     MVT::v4i64,    1 },
     { ISD::SRL,     MVT::v4i64,    1 },
+
+    { ISD::SHL,  MVT::v32i8,  42 }, // cmpeqb sequence.
+    { ISD::SHL,  MVT::v16i16,  16*10 }, // Scalarized.
+
+    { ISD::SRL,  MVT::v32i8,  32*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v16i16,  8*10 }, // Scalarized.
+
+    { ISD::SRA,  MVT::v32i8,  32*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v16i16,  16*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v4i64,  4*10 }, // Scalarized.
   };
 
   // Look for AVX2 lowering tricks.
@@ -192,6 +202,38 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
       return LT.first * AVX2CostTable[Idx].Cost;
   }
 
+  static const CostTblEntry<MVT> SSE2CostTable[] = {
+    // We don't correctly identify costs of casts because they are marked as
+    // custom.
+    // For some cases, where the shift amount is a scalar we would be able
+    // to generate better code. Unfortunately, when this is the case the value
+    // (the splat) will get hoisted out of the loop, thereby making it invisible
+    // to ISel. The cost model must return worst case assumptions because it is
+    // used for vectorization and we don't want to make vectorized code worse
+    // than scalar code.
+    { ISD::SHL,  MVT::v16i8,  30 }, // cmpeqb sequence.
+    { ISD::SHL,  MVT::v8i16,  8*10 }, // Scalarized.
+    { ISD::SHL,  MVT::v4i32,  2*5 }, // We optimized this using mul.
+    { ISD::SHL,  MVT::v2i64,  2*10 }, // Scalarized.
+
+    { ISD::SRL,  MVT::v16i8,  16*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v8i16,  8*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v4i32,  4*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v2i64,  2*10 }, // Scalarized.
+
+    { ISD::SRA,  MVT::v16i8,  16*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v8i16,  8*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v4i32,  4*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v2i64,  2*10 }, // Scalarized.
+  };
+
+  if (ST->hasSSE2()) {
+    int Idx = CostTableLookup<MVT>(SSE2CostTable, array_lengthof(SSE2CostTable),
+                                   ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * SSE2CostTable[Idx].Cost;
+  }
+
   static const CostTblEntry<MVT> AVX1CostTable[] = {
     // We don't have to scalarize unsupported ops. We can issue two half-sized
     // operations and we only need to extract the upper YMM half.
author	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-04-03 21:46:05 +0000
committer	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-04-03 21:46:05 +0000
commit	6b6050b229976a2f53184f6d6857e6f445a869d0 (patch)
tree	3f91e43d68b7aedf1ce51e9da9ff457b47f03ecb /lib
parent	d787a41b118a3724d1df87dc3d38cc3fddb3a145 (diff)
download	llvm-6b6050b229976a2f53184f6d6857e6f445a869d0.tar.gz llvm-6b6050b229976a2f53184f6d6857e6f445a869d0.tar.bz2 llvm-6b6050b229976a2f53184f6d6857e6f445a869d0.tar.xz