From 8bfb46e790b8dbe3ccb7a79c530712fc3404fb50 Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Thu, 29 May 2014 01:55:07 +0000 Subject: Add LoadCombine pass. This pass is disabled by default. Use -combine-loads to enable in -O[1-3] Differential revision: http://reviews.llvm.org/D3580 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209791 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/PassManagerBuilder.cpp | 11 ++ lib/Transforms/Scalar/CMakeLists.txt | 1 + lib/Transforms/Scalar/LoadCombine.cpp | 268 ++++++++++++++++++++++++++++++ lib/Transforms/Scalar/Scalar.cpp | 1 + 4 files changed, 281 insertions(+) create mode 100644 lib/Transforms/Scalar/LoadCombine.cpp (limited to 'lib') diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 38e1b8e166..c20c717de5 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -53,6 +53,10 @@ static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); +static cl::opt RunLoadCombine("combine-loads", cl::init(false), + cl::Hidden, + cl::desc("Run the load combining pass")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -65,6 +69,7 @@ PassManagerBuilder::PassManagerBuilder() { SLPVectorize = RunSLPVectorization; LoopVectorize = RunLoopVectorization; RerollLoops = RunLoopRerolling; + LoadCombine = RunLoadCombine; } PassManagerBuilder::~PassManagerBuilder() { @@ -236,6 +241,9 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createLoopUnrollPass()); } + if (LoadCombine) + MPM.add(createLoadCombinePass()); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Clean up after everything. @@ -352,6 +360,9 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // More scalar chains could be vectorized due to more alias information PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + if (LoadCombine) + PM.add(createLoadCombinePass()); + // Cleanup and simplify the code after the scalar optimizations. PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 3ad1488d00..b2461fc627 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_library(LLVMScalarOpts IndVarSimplify.cpp JumpThreading.cpp LICM.cpp + LoadCombine.cpp LoopDeletion.cpp LoopIdiomRecognize.cpp LoopInstSimplify.cpp diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp new file mode 100644 index 0000000000..846aa703c9 --- /dev/null +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -0,0 +1,268 @@ +//===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This transformation combines adjacent loads. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetFolder.h" +#include "llvm/Pass.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "load-combine" + +STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining"); +STATISTIC(NumLoadsCombined, "Number of loads combined"); + +namespace { +struct PointerOffsetPair { + Value *Pointer; + uint64_t Offset; +}; + +struct LoadPOPPair { + LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O) + : Load(L), POP(P), InsertOrder(O) {} + LoadPOPPair() {} + LoadInst *Load; + PointerOffsetPair POP; + /// \brief The new load needs to be created before the first load in IR order. + unsigned InsertOrder; +}; + +class LoadCombine : public BasicBlockPass { + LLVMContext *C; + const DataLayout *DL; + +public: + LoadCombine() + : BasicBlockPass(ID), + C(nullptr), DL(nullptr) { + initializeSROAPass(*PassRegistry::getPassRegistry()); + } + bool doInitialization(Function &) override; + bool runOnBasicBlock(BasicBlock &BB) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + const char *getPassName() const override { return "LoadCombine"; } + static char ID; + + typedef IRBuilder BuilderTy; + +private: + BuilderTy *Builder; + + PointerOffsetPair getPointerOffsetPair(LoadInst &); + bool combineLoads(DenseMap> &); + bool aggregateLoads(SmallVectorImpl &); + bool combineLoads(SmallVectorImpl &); +}; +} + +bool LoadCombine::doInitialization(Function &F) { + DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n"); + C = &F.getContext(); + DataLayoutPass *DLP = getAnalysisIfAvailable(); + if (!DLP) { + DEBUG(dbgs() << " Skipping LoadCombine -- no target data!\n"); + return false; + } + DL = &DLP->getDataLayout(); + return true; +} + +PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) { + PointerOffsetPair POP; + POP.Pointer = LI.getPointerOperand(); + POP.Offset = 0; + while (isa(POP.Pointer) || isa(POP.Pointer)) { + if (auto *GEP = dyn_cast(POP.Pointer)) { + unsigned BitWidth = DL->getPointerTypeSizeInBits(GEP->getType()); + APInt Offset(BitWidth, 0); + if (GEP->accumulateConstantOffset(*DL, Offset)) + POP.Offset += Offset.getZExtValue(); + else + // Can't handle GEPs with variable indices. + return POP; + POP.Pointer = GEP->getPointerOperand(); + } else if (auto *BC = dyn_cast(POP.Pointer)) + POP.Pointer = BC->getOperand(0); + } + return POP; +} + +bool LoadCombine::combineLoads( + DenseMap> &LoadMap) { + bool Combined = false; + for (auto &Loads : LoadMap) { + if (Loads.second.size() < 2) + continue; + std::sort(Loads.second.begin(), Loads.second.end(), + [](const LoadPOPPair &A, const LoadPOPPair &B) { + return A.POP.Offset < B.POP.Offset; + }); + if (aggregateLoads(Loads.second)) + Combined = true; + } + return Combined; +} + +/// \brief Try to aggregate loads from a sorted list of loads to be combined. +/// +/// It is guaranteed that no writes occur between any of the loads. All loads +/// have the same base pointer. There are at least two loads. +bool LoadCombine::aggregateLoads(SmallVectorImpl &Loads) { + assert(Loads.size() >= 2 && "Insufficient loads!"); + LoadInst *BaseLoad = nullptr; + SmallVector AggregateLoads; + bool Combined = false; + uint64_t PrevOffset = -1ull; + uint64_t PrevSize = 0; + for (auto &L : Loads) { + if (PrevOffset == -1ull) { + BaseLoad = L.Load; + PrevOffset = L.POP.Offset; + PrevSize = DL->getTypeStoreSize(L.Load->getType()); + AggregateLoads.push_back(L); + continue; + } + if (L.Load->getAlignment() > BaseLoad->getAlignment()) + continue; + if (L.POP.Offset > PrevOffset + PrevSize) { + // No other load will be combinable + if (combineLoads(AggregateLoads)) + Combined = true; + AggregateLoads.clear(); + PrevOffset = -1; + continue; + } + if (L.POP.Offset != PrevOffset + PrevSize) + // This load is offset less than the size of the last load. + // FIXME: We may want to handle this case. + continue; + PrevOffset = L.POP.Offset; + PrevSize = DL->getTypeStoreSize(L.Load->getType()); + AggregateLoads.push_back(L); + } + if (combineLoads(AggregateLoads)) + Combined = true; + return Combined; +} + +/// \brief Given a list of combinable load. Combine the maximum number of them. +bool LoadCombine::combineLoads(SmallVectorImpl &Loads) { + // Remove loads from the end while the size is not a power of 2. + unsigned TotalSize = 0; + for (const auto &L : Loads) + TotalSize += L.Load->getType()->getPrimitiveSizeInBits(); + while (TotalSize != 0 && !isPowerOf2_32(TotalSize)) + TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits(); + if (Loads.size() < 2) + return false; + + DEBUG({ + dbgs() << "***** Combining Loads ******\n"; + for (const auto &L : Loads) { + dbgs() << L.POP.Offset << ": " << *L.Load << "\n"; + } + }); + + // Find first load. This is where we put the new load. + LoadPOPPair FirstLP; + FirstLP.InsertOrder = -1u; + for (const auto &L : Loads) + if (L.InsertOrder < FirstLP.InsertOrder) + FirstLP = L; + + unsigned AddressSpace = + FirstLP.POP.Pointer->getType()->getPointerAddressSpace(); + + Builder->SetInsertPoint(FirstLP.Load); + Value *Ptr = Builder->CreateConstGEP1_64( + Builder->CreatePointerCast(Loads[0].POP.Pointer, + Builder->getInt8PtrTy(AddressSpace)), + Loads[0].POP.Offset); + LoadInst *NewLoad = new LoadInst( + Builder->CreatePointerCast( + Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize), + Ptr->getType()->getPointerAddressSpace())), + Twine(Loads[0].Load->getName()) + ".combined", false, + Loads[0].Load->getAlignment(), FirstLP.Load); + + for (const auto &L : Loads) { + Builder->SetInsertPoint(L.Load); + Value *V = Builder->CreateExtractInteger( + *DL, NewLoad, cast(L.Load->getType()), + L.POP.Offset - Loads[0].POP.Offset, "combine.extract"); + L.Load->replaceAllUsesWith(V); + } + + NumLoadsCombined = NumLoadsCombined + Loads.size(); + return true; +} + +bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { + if (skipOptnoneFunction(BB) || !DL) + return false; + + IRBuilder + TheBuilder(BB.getContext(), TargetFolder(DL)); + Builder = &TheBuilder; + + DenseMap> LoadMap; + + bool Combined = false; + unsigned Index = 0; + for (auto &I : BB) { + if (I.mayWriteToMemory() || I.mayThrow()) { + if (combineLoads(LoadMap)) + Combined = true; + LoadMap.clear(); + continue; + } + LoadInst *LI = dyn_cast(&I); + if (!LI) + continue; + ++NumLoadsAnalyzed; + if (!LI->isSimple() || !LI->getType()->isIntegerTy()) + continue; + auto POP = getPointerOffsetPair(*LI); + if (!POP.Pointer) + continue; + LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++)); + } + if (combineLoads(LoadMap)) + Combined = true; + return Combined; +} + +void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); +} + +char LoadCombine::ID = 0; + +BasicBlockPass *llvm::createLoadCombinePass() { + return new LoadCombine(); +} + +INITIALIZE_PASS(LoadCombine, "load-combine", "Combine Adjacent Loads", false, + false) diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index f8f828c840..edf012d811 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -65,6 +65,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeSinkingPass(Registry); initializeTailCallElimPass(Registry); initializeSeparateConstOffsetFromGEPPass(Registry); + initializeLoadCombinePass(Registry); } void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { -- cgit v1.2.3