R600: Unconditionally unroll loops that contain GEPs with alloca pointers

Implement the getUnrollingPreferences() function for AMDGPUTargetTransformInfo so that loops that do address calculations on pointers derived from alloca are unconditionally unrolled. Unrolling these loops makes it more likely that SROA will be able to eliminate the allocas, which is a big win for R600 since memory allocated by alloca (private memory) is really slow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199916 91177308-0d34-0410-b5e6-96231b3b80d8
author: Tom Stellard <thomas.stellard@amd.com> 2014-01-23 18:49:28 +0000
committer: Tom Stellard <thomas.stellard@amd.com> 2014-01-23 18:49:28 +0000
commit: 01df2fa3c427b49b60da9ea19b6c42ae13e0a134 (patch)
tree: 4620c85b0daa3b72ff41e96624179aca5f1668f3 /lib/Target/R600
parent: fc7edee6313d05868d783e8c01850d97cb6a9799 (diff)
download: llvm-01df2fa3c427b49b60da9ea19b6c42ae13e0a134.tar.gz
llvm-01df2fa3c427b49b60da9ea19b6c42ae13e0a134.tar.bz2
llvm-01df2fa3c427b49b60da9ea19b6c42ae13e0a134.tar.xz
1 files changed, 29 insertions, 0 deletions
diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index ca1e0b6528..a4feec7131 100644
--- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -18,7 +18,9 @@
 #define DEBUG_TYPE "AMDGPUtti"
 #include "AMDGPU.h"
 #include "AMDGPUTargetMachine.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/CostTable.h"
 #include "llvm/Target/TargetLowering.h"
@@ -73,6 +75,8 @@ public:
 
   virtual bool hasBranchDivergence() const;
 
+  virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
+
   /// @}
 };
 
@@ -88,3 +92,28 @@ llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
 }
 
 bool AMDGPUTTI::hasBranchDivergence() const { return true; }
+
+void AMDGPUTTI::getUnrollingPreferences(Loop *L,
+                                        UnrollingPreferences &UP) const {
+  for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
+                                                  BI != BE; ++BI) {
+    BasicBlock *BB = *BI;
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+                                                      I != E; ++I) {
+      const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I);
+      if (!GEP)
+        continue;
+      const Value *Ptr = GEP->getPointerOperand();
+      const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
+      if (Alloca) {
+        // We want to do whatever we can to limit the number of alloca
+        // instructions that make it through to the code generator.  allocas
+        // require us to use indirect addressing, which is slow and prone to
+        // compiler bugs.  If this loop does an address calculation on an
+        // alloca ptr, then we want to unconditionally unroll the loop.  In most
+        // cases, this will make it possible for SROA to eliminate these allocas.
+        UP.Threshold = UINT_MAX;
+      }
+    }
+  }
+}
author	Tom Stellard <thomas.stellard@amd.com>	2014-01-23 18:49:28 +0000
committer	Tom Stellard <thomas.stellard@amd.com>	2014-01-23 18:49:28 +0000
commit	01df2fa3c427b49b60da9ea19b6c42ae13e0a134 (patch)
tree	4620c85b0daa3b72ff41e96624179aca5f1668f3 /lib/Target/R600
parent	fc7edee6313d05868d783e8c01850d97cb6a9799 (diff)
download	llvm-01df2fa3c427b49b60da9ea19b6c42ae13e0a134.tar.gz llvm-01df2fa3c427b49b60da9ea19b6c42ae13e0a134.tar.bz2 llvm-01df2fa3c427b49b60da9ea19b6c42ae13e0a134.tar.xz