rewrite the memset_pattern pattern generation stuff to accept any 2/4/8/16-byte

constant, including globals. This makes us generate much more "pretty" pattern globals as well because it doesn't break it down to an array of bytes all the time. This enables us to handle stores of relocatable globals. This kicks in about 48 times in 254.gap, giving us stuff like this: @.memset_pattern40 = internal constant [2 x %struct.TypHeader* (%struct.TypHeader*, %struct.TypHeader*)*] [%struct.TypHeader* (%struct.TypHeader*, %struct .TypHeader*)* @IsFalse, %struct.TypHeader* (%struct.TypHeader*, %struct.TypHeader*)* @IsFalse], align 16 ... call void @memset_pattern16(i8* %scevgep5859, i8* bitcast ([2 x %struct.TypHeader* (%struct.TypHeader*, %struct.TypHeader*)*]* @.memset_pattern40 to i8* ), i64 %tmp75) nounwind git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126044 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2011-02-19 19:56:44 +0000
committer: Chris Lattner <sabre@nondot.org> 2011-02-19 19:56:44 +0000
commit: 80e8b506b8134d63dc3cb6211cccc34be4b19d40 (patch)
tree: 21ebd657abbe2afd75135ee6ce91e24e7e2e55a1
parent: 41bfbb0a8776674c486682cbf2aa80f15abfef68 (diff)
download: llvm-80e8b506b8134d63dc3cb6211cccc34be4b19d40.tar.gz
llvm-80e8b506b8134d63dc3cb6211cccc34be4b19d40.tar.bz2
llvm-80e8b506b8134d63dc3cb6211cccc34be4b19d40.tar.xz
2 files changed, 39 insertions, 34 deletions
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index be3ff9258f..d7fa149492 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -388,43 +388,24 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
   if (Size == 0 || (Size & 7) || (Size & (Size-1)))
     return 0;
   
-  // Convert the constant to an integer type of the appropriate size so we can
-  // start hacking on it.
-  if (isa<PointerType>(V->getType()))
-    C = ConstantExpr::getPtrToInt(C, IntegerType::get(C->getContext(), Size));
-  else if (isa<VectorType>(V->getType()) || V->getType()->isFloatingPointTy())
-    C = ConstantExpr::getBitCast(C, IntegerType::get(C->getContext(), Size));
-  else if (!isa<IntegerType>(V->getType()))
-    return 0;  // Unhandled type.
+  // Don't care enough about darwin/ppc to implement this.
+  if (TD.isBigEndian())
+    return 0;
 
   // Convert to size in bytes.
   Size /= 8;
-  
-  // If we couldn't fold this to an integer, we fail.  We don't bother to handle
-  // relocatable expressions like the address of a global yet.
-  // FIXME!
-  ConstantInt *CI = dyn_cast<ConstantInt>(C);
-  if (CI == 0) return 0;
 
-  APInt CVal = CI->getValue();
-  
   // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
-  // if the top and bottom are the same.
+  // if the top and bottom are the same (e.g. for vectors and large integers).
   if (Size > 16) return 0;
-
-  // If this is a big endian target (PPC) then we need to bswap.
-  if (TD.isBigEndian())
-    CVal = CVal.byteSwap();
-  
-  // Determine what each byte of the pattern value should be.
-  char Value[16];
-  for (unsigned i = 0; i != 16; ++i) {
-    // Get the byte value we're indexing into.
-    unsigned CByte = i % Size;
-    Value[i] = (unsigned char)(CVal.getZExtValue() >> CByte);
-  }
   
-  return ConstantArray::get(V->getContext(), StringRef(Value, 16), false);
+  // If the constant is exactly 16 bytes, just use it.
+  if (Size == 16) return C;
+
+  // Otherwise, we'll use an array of the constants.
+  unsigned ArraySize = 16/Size;
+  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+  return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
 }
 
 
@@ -518,8 +499,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
                                             PatternValue, ".memset_pattern");
     GV->setUnnamedAddr(true); // Ok to merge these.
     GV->setAlignment(16);
-    Value *PatternPtr = Builder.CreateConstInBoundsGEP2_32(GV, 0, 0, "pattern");
-    
+    Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
     NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
   }
   
diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll
index 122d25a22a..485114c8d4 100644
--- a/test/Transforms/LoopIdiom/basic.ll
+++ b/test/Transforms/LoopIdiom/basic.ll
@@ -277,7 +277,7 @@ for.end13:                                        ; preds = %for.inc10
 ; On darwin10 (which is the triple in this .ll file) this loop can be turned
 ; into a memset_pattern call.
 ; rdar://9009151
-define void @test11(i32* nocapture %P) nounwind ssp {
+define void @test11_pattern(i32* nocapture %P) nounwind ssp {
 entry:
   br label %for.body
 
@@ -291,7 +291,7 @@ for.body:                                         ; preds = %entry, %for.body
 
 for.end:                                          ; preds = %for.body
   ret void
-; CHECK: @test11
+; CHECK: @test11_pattern
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: memset_pattern
@@ -322,3 +322,28 @@ for.end:                                          ; preds = %for.body
 ; CHECK: ret void
 }
 
+@G = global i32 5
+
+; This store-of-address loop can be turned into a memset_pattern call.
+; rdar://9009151
+define void @test13_pattern(i32** nocapture %P) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+  %arrayidx = getelementptr i32** %P, i64 %indvar
+  store i32* @G, i32** %arrayidx, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+; CHECK: @test13_pattern
+; CHECK-NEXT: entry:
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: memset_pattern
+; CHECK-NOT: store
+; CHECK: ret void
+}
author	Chris Lattner <sabre@nondot.org>	2011-02-19 19:56:44 +0000
committer	Chris Lattner <sabre@nondot.org>	2011-02-19 19:56:44 +0000
commit	80e8b506b8134d63dc3cb6211cccc34be4b19d40 (patch)
tree	21ebd657abbe2afd75135ee6ce91e24e7e2e55a1
parent	41bfbb0a8776674c486682cbf2aa80f15abfef68 (diff)
download	llvm-80e8b506b8134d63dc3cb6211cccc34be4b19d40.tar.gz llvm-80e8b506b8134d63dc3cb6211cccc34be4b19d40.tar.bz2 llvm-80e8b506b8134d63dc3cb6211cccc34be4b19d40.tar.xz