summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Holewinski <jholewinski@nvidia.com>2014-06-27 18:35:14 +0000
committerJustin Holewinski <jholewinski@nvidia.com>2014-06-27 18:35:14 +0000
commit7d7f3e392303a1be037cad98a2f79141575c8e51 (patch)
tree5fab1279bbfcd3c2243230e2a5c2aa8a61f4800d
parenta54609ed935ffb5bc83fad3b9839275f96b11d3f (diff)
downloadllvm-7d7f3e392303a1be037cad98a2f79141575c8e51.tar.gz
llvm-7d7f3e392303a1be037cad98a2f79141575c8e51.tar.bz2
llvm-7d7f3e392303a1be037cad98a2f79141575c8e51.tar.xz
[NVPTX] Directly control the Machine SSA passes that are invoked for NVPTX.
NVPTX is a bit special in the optimizations it requires, so this gives us better control over the backend optimization pipeline. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211927 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp41
1 files changed, 41 insertions, 0 deletions
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 6e18322f82..069a1b9966 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -106,6 +106,7 @@ public:
bool addInstSelector() override;
bool addPreRegAlloc() override;
bool addPostRegAlloc() override;
+ void addMachineSSAOptimization() override;
FunctionPass *createTargetRegisterAllocator(bool) override;
void addFastRegAlloc(FunctionPass *RegAllocPass) override;
@@ -207,3 +208,43 @@ void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
printAndVerify("After StackSlotColoring");
}
+
+void NVPTXPassConfig::addMachineSSAOptimization() {
+ // Pre-ra tail duplication.
+ if (addPass(&EarlyTailDuplicateID))
+ printAndVerify("After Pre-RegAlloc TailDuplicate");
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ addPass(&OptimizePHIsID);
+
+ // This pass merges large allocas. StackSlotColoring is a different pass
+ // which merges spill slots.
+ addPass(&StackColoringID);
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(&LocalStackSlotAllocationID);
+
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ addPass(&DeadMachineInstructionElimID);
+ printAndVerify("After codegen DCE pass");
+
+ // Allow targets to insert passes that improve instruction level parallelism,
+ // like if-conversion. Such passes will typically need dominator trees and
+ // loop info, just like LICM and CSE below.
+ if (addILPOpts())
+ printAndVerify("After ILP optimizations");
+
+ addPass(&MachineLICMID);
+ addPass(&MachineCSEID);
+
+ addPass(&MachineSinkingID);
+ printAndVerify("After Machine LICM, CSE and Sinking passes");
+
+ addPass(&PeepholeOptimizerID);
+ printAndVerify("After codegen peephole optimization pass");
+}