summaryrefslogtreecommitdiff
path: root/lib/Target/ARM/ARMTargetTransformInfo.cpp
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-03-14 19:17:02 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-03-14 19:17:02 +0000
commitd81511f0a671a9271d7ba10cce6c27331b57f553 (patch)
tree776bf66db17ca79ca91b4e7b7ba8ae5bb29d4df0 /lib/Target/ARM/ARMTargetTransformInfo.cpp
parent042b79625f315da6378d06b5480b15894d6b06b1 (diff)
downloadllvm-d81511f0a671a9271d7ba10cce6c27331b57f553.tar.gz
llvm-d81511f0a671a9271d7ba10cce6c27331b57f553.tar.bz2
llvm-d81511f0a671a9271d7ba10cce6c27331b57f553.tar.xz
ARM cost model: Increase cost of some vector selects we do terrible on
By terrible I mean we store/load from the stack. This matters on PAQp8 in _Z5trainPsS_ii (which is inlined into Mixer::update) where we decide to vectorize a loop with a VF of 8 resulting in a 25% degradation on a cortex-a8. LV: Found an estimated cost of 2 for VF 8 For instruction: icmp slt i32 LV: Found an estimated cost of 2 for VF 8 For instruction: select i1, i32, i32 The bug that tracks the CodeGen part is PR14868. radar://13403975 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177105 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM/ARMTargetTransformInfo.cpp')
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp24
1 files changed, 24 insertions, 0 deletions
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index eef282e709..062d4d3f57 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -334,6 +334,30 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a a vector select gets lowered to vbsl.
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
+ // Lowering of some vector selects is currently far from perfect.
+ static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = {
+ { ISD::SELECT, MVT::v4i1, MVT::v4i8, 2*4 + 2*1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i8, 2*8 + 1 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i8, 2*16 + 1 },
+ { ISD::SELECT, MVT::v4i1, MVT::v4i16, 2*4 + 1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i16, 2*8 + 1 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
+ { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
+ };
+
+ EVT SelCondTy = TLI->getValueType(CondTy);
+ EVT SelValTy = TLI->getValueType(ValTy);
+ int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl,
+ array_lengthof(NEONVectorSelectTbl),
+ ISD, SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONVectorSelectTbl[Idx].Cost;
+
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
return LT.first;
}