Refine fix to bug 15041.

Thanks to help from Nadav and Hal, I have a more reasonable (and even correct!) approach. This specifically penalizes the insertelement and extractelement operations for the performance hit that will occur on PowerPC processors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174725 91177308-0d34-0410-b5e6-96231b3b80d8
author: Bill Schmidt <wschmidt@linux.vnet.ibm.com> 2013-02-08 18:19:17 +0000
committer: Bill Schmidt <wschmidt@linux.vnet.ibm.com> 2013-02-08 18:19:17 +0000
commit: 0f58dbae4ab2397dc310e19d171a4bc60fc6c9a7 (patch)
tree: 7c7e52d5180abb20a4fe7d79bee1685e75682ecc /lib/Target/PowerPC/PPCTargetTransformInfo.cpp
parent: 33daeab1bb8df65273fd9ecbf1a261f96733732e (diff)
download: llvm-0f58dbae4ab2397dc310e19d171a4bc60fc6c9a7.tar.gz
llvm-0f58dbae4ab2397dc310e19d171a4bc60fc6c9a7.tar.bz2
llvm-0f58dbae4ab2397dc310e19d171a4bc60fc6c9a7.tar.xz
1 files changed, 17 insertions, 18 deletions
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index f57d7643e1..5e9ad347d3 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -194,24 +194,23 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
                                     unsigned Index) const {
   assert(Val->isVectorTy() && "This must be a vector type");
 
-  const unsigned Awful = 1000;
-
-  // Vector element insert/extract with Altivec is very expensive.
-  // Until VSX is available, avoid vectorizing loops that require
-  // these operations.
-  if (Opcode == ISD::EXTRACT_VECTOR_ELT ||
-      Opcode == ISD::INSERT_VECTOR_ELT)
-    return Awful;
-
-  // We don't vectorize SREM/UREM so well.  Constrain the vectorizer
-  // for those as well.
-  if (Opcode == ISD::SREM || Opcode == ISD::UREM)
-    return Awful;
-
-  // VSELECT is not yet implemented, leading to use of insert/extract
-  // and ISEL, hence not a good idea.
-  if (Opcode == ISD::VSELECT)
-    return Awful;
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  // Estimated cost of a load-hit-store delay.  This was obtained
+  // experimentally as a minimum needed to prevent unprofitable
+  // vectorization for the paq8p benchmark.  It may need to be
+  // raised further if other unprofitable cases remain.
+  unsigned LHSPenalty = 12;
+
+  // Vector element insert/extract with Altivec is very expensive,
+  // because they require store and reload with the attendant
+  // processor stall for load-hit-store.  Until VSX is available,
+  // these need to be estimated as very costly.
+  if (ISD == ISD::EXTRACT_VECTOR_ELT ||
+      ISD == ISD::INSERT_VECTOR_ELT)
+    return LHSPenalty +
+      TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
 
   return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
 }
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>	2013-02-08 18:19:17 +0000
committer	Bill Schmidt <wschmidt@linux.vnet.ibm.com>	2013-02-08 18:19:17 +0000
commit	0f58dbae4ab2397dc310e19d171a4bc60fc6c9a7 (patch)
tree	7c7e52d5180abb20a4fe7d79bee1685e75682ecc /lib/Target/PowerPC/PPCTargetTransformInfo.cpp
parent	33daeab1bb8df65273fd9ecbf1a261f96733732e (diff)
download	llvm-0f58dbae4ab2397dc310e19d171a4bc60fc6c9a7.tar.gz llvm-0f58dbae4ab2397dc310e19d171a4bc60fc6c9a7.tar.bz2 llvm-0f58dbae4ab2397dc310e19d171a4bc60fc6c9a7.tar.xz