From a8b289b70d5ef416608bb71a874b8b4fe80158e1 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 25 Jan 2013 23:05:59 +0000 Subject: Initial implementation of PPCTargetTransformInfo This provides a place to add customized operation cost information and control some other target-specific IR-level transformations. The only non-trivial logic in this checkin assigns a higher cost to unaligned loads and stores (covered by the included test case). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173520 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/CMakeLists.txt | 1 + lib/Target/PowerPC/PPC.h | 4 + lib/Target/PowerPC/PPCTargetMachine.cpp | 9 ++ lib/Target/PowerPC/PPCTargetMachine.h | 3 + lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 220 ++++++++++++++++++++++++++ test/Analysis/CostModel/PowerPC/lit.local.cfg | 6 + test/Analysis/CostModel/PowerPC/load_store.ll | 34 ++++ 7 files changed, 277 insertions(+) create mode 100644 lib/Target/PowerPC/PPCTargetTransformInfo.cpp create mode 100644 test/Analysis/CostModel/PowerPC/lit.local.cfg create mode 100644 test/Analysis/CostModel/PowerPC/load_store.ll diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 192d18d664..6036428fad 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -26,6 +26,7 @@ add_llvm_target(PowerPCCodeGen PPCRegisterInfo.cpp PPCSubtarget.cpp PPCTargetMachine.cpp + PPCTargetTransformInfo.cpp PPCSelectionDAGInfo.cpp ) diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index e6d38ebf21..72e0ce3568 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -25,6 +25,7 @@ namespace llvm { class PPCTargetMachine; class FunctionPass; + class ImmutablePass; class JITCodeEmitter; class MachineInstr; class AsmPrinter; @@ -37,6 +38,9 @@ namespace llvm { JITCodeEmitter &MCE); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); + + /// \brief Creates an PPC-specific Target Transformation Info pass. + ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM); namespace PPCII { diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index b8b7882ac0..fe851c1b6f 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -126,3 +126,12 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, return false; } + +void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our PPC pass. This + // allows the PPC pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createPPCTargetTransformInfoPass(this)); +} + diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index d917d99ded..606ccb3141 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -68,6 +68,9 @@ public: virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); + + /// \brief Register PPC analysis passes with a pass manager. + virtual void addAnalysisPasses(PassManagerBase &PM); }; /// PPC32TargetMachine - PowerPC 32-bit target machine. diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp new file mode 100644 index 0000000000..1afef33df0 --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -0,0 +1,220 @@ +//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// PPC target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ppctti" +#include "PPC.h" +#include "PPCTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/CostTable.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializePPCTTIPass(PassRegistry &); +} + +namespace { + +class PPCTTI : public ImmutablePass, public TargetTransformInfo { + const PPCTargetMachine *TM; + const PPCSubtarget *ST; + const PPCTargetLowering *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + PPCTTI(const PPCTargetMachine *TM) + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + TLI(TM->getTargetLowering()) { + initializePPCTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getRegisterBitWidth(bool Vector) const; + virtual unsigned getMaximumUnrollFactor() const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; + + /// @} +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti", + "PPC Target Transform Info", true, true, false) +char PPCTTI::ID = 0; + +ImmutablePass * +llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) { + return new PPCTTI(TM); +} + + +//===----------------------------------------------------------------------===// +// +// PPC cost model. +// +//===----------------------------------------------------------------------===// + +PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + // FIXME: PPC currently does not have custom popcnt lowering even though + // there is hardware support. Once this is fixed, update this function + // to reflect the real capabilities of the hardware. + return PSK_Software; +} + +unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { + if (Vector && !ST->hasAltivec()) + return 0; + return 32; +} + +unsigned PPCTTI::getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasAltivec()) return 128; + return 0; + } + + if (ST->isPPC64()) + return 64; + return 32; + +} + +unsigned PPCTTI::getMaximumUnrollFactor() const { + unsigned Directive = ST->getDarwinDirective(); + // The 440 has no SIMD support, but floating-point instructions + // have a 5-cycle latency, so unroll by 5x for latency hiding. + if (Directive == PPC::DIR_440) + return 5; + + // The A2 has no SIMD support, but floating-point instructions + // have a 6-cycle latency, so unroll by 6x for latency hiding. + if (Directive == PPC::DIR_A2) + return 6; + + // FIXME: For lack of any better information, do no harm... + if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) + return 1; + + // For most things, modern systems have two execution units (and + // out-of-order execution). + return 2; +} + +unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Fallback to the default implementation. + return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); +} + +unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); +} + +unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); +} + +unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + assert(Val->isVectorTy() && "This must be a vector type"); + + return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); +} + +unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(Src); + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); + + // Each load/store unit costs 1. + unsigned Cost = LT.first * 1; + + // PPC in general does not support unaligned loads and stores. They'll need + // to be decomposed based on the alignment factor. + unsigned SrcBytes = LT.second.getStoreSize(); + if (SrcBytes && Alignment && Alignment < SrcBytes) + Cost *= (SrcBytes/Alignment); + + return Cost; +} + diff --git a/test/Analysis/CostModel/PowerPC/lit.local.cfg b/test/Analysis/CostModel/PowerPC/lit.local.cfg new file mode 100644 index 0000000000..4019eca0bb --- /dev/null +++ b/test/Analysis/CostModel/PowerPC/lit.local.cfg @@ -0,0 +1,6 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'PowerPC' in targets: + config.unsupported = True + diff --git a/test/Analysis/CostModel/PowerPC/load_store.ll b/test/Analysis/CostModel/PowerPC/load_store.ll new file mode 100644 index 0000000000..c77cce955a --- /dev/null +++ b/test/Analysis/CostModel/PowerPC/load_store.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i32 @stores(i32 %arg) { + + ; CHECK: cost of 1 {{.*}} store + store i8 undef, i8* undef, align 4 + ; CHECK: cost of 1 {{.*}} store + store i16 undef, i16* undef, align 4 + ; CHECK: cost of 1 {{.*}} store + store i32 undef, i32* undef, align 4 + ; CHECK: cost of 2 {{.*}} store + store i64 undef, i64* undef, align 4 + ; CHECK: cost of 4 {{.*}} store + store i128 undef, i128* undef, align 4 + + ret i32 undef +} +define i32 @loads(i32 %arg) { + ; CHECK: cost of 1 {{.*}} load + load i8* undef, align 4 + ; CHECK: cost of 1 {{.*}} load + load i16* undef, align 4 + ; CHECK: cost of 1 {{.*}} load + load i32* undef, align 4 + ; CHECK: cost of 2 {{.*}} load + load i64* undef, align 4 + ; CHECK: cost of 4 {{.*}} load + load i128* undef, align 4 + + ret i32 undef +} + -- cgit v1.2.3