Update the memcpy rewriting to fully support widened int rewriting. This

includes extracting ints for copying elsewhere and inserting ints when copying into the alloca. This should fix the CanSROA assertion coming out of Clang's regression test suite. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165931 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chandler Carruth <chandlerc@gmail.com> 2012-10-15 10:24:43 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2012-10-15 10:24:43 +0000
commit: d2cd73f6a5103f5de221a67410e35e6bee211fa3 (patch)
tree: d1bf9e30986daa6bea0e45ff6bea7282b2256f52
parent: 94fc64c42fe27aa16dd5611eb3657ff0297801c8 (diff)
download: llvm-d2cd73f6a5103f5de221a67410e35e6bee211fa3.tar.gz
llvm-d2cd73f6a5103f5de221a67410e35e6bee211fa3.tar.bz2
llvm-d2cd73f6a5103f5de221a67410e35e6bee211fa3.tar.xz
2 files changed, 31 insertions, 9 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 419756d4dc..7d2ce098aa 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2717,9 +2717,9 @@ private:
     // If this doesn't map cleanly onto the alloca type, and that type isn't
     // a single value type, just emit a memcpy.
     bool EmitMemCpy
-      = !VecTy && (BeginOffset != NewAllocaBeginOffset ||
-                   EndOffset != NewAllocaEndOffset ||
-                   !NewAI.getAllocatedType()->isSingleValueType());
+      = !VecTy && !IntTy && (BeginOffset != NewAllocaBeginOffset ||
+                             EndOffset != NewAllocaEndOffset ||
+                             !NewAI.getAllocatedType()->isSingleValueType());
 
     // If we're just going to emit a memcpy, the alloca hasn't changed, and the
     // size hasn't been shrunk based on analysis of the viable range, this is
@@ -2741,14 +2741,23 @@ private:
     if (Pass.DeadSplitInsts.insert(&II))
       Pass.DeadInsts.push_back(&II);
 
-    bool IsVectorElement = VecTy && (BeginOffset > NewAllocaBeginOffset ||
-                                     EndOffset < NewAllocaEndOffset);
+    bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
+                         EndOffset == NewAllocaEndOffset;
+    bool IsVectorElement = VecTy && !IsWholeAlloca;
+    uint64_t Size = EndOffset - BeginOffset;
+    IntegerType *SubIntTy
+      = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
 
     Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
                               : II.getRawDest()->getType();
-    if (!EmitMemCpy)
-      OtherPtrTy = IsVectorElement ? VecTy->getElementType()->getPointerTo()
-                                   : NewAI.getType();
+    if (!EmitMemCpy) {
+      if (IsVectorElement)
+        OtherPtrTy = VecTy->getElementType()->getPointerTo();
+      else if (IntTy && !IsWholeAlloca)
+        OtherPtrTy = SubIntTy->getPointerTo();
+      else
+        OtherPtrTy = NewAI.getType();
+    }
 
     // Compute the other pointer, folding as much as possible to produce
     // a single, simple GEP in most cases.
@@ -2795,11 +2804,20 @@ private:
         IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
         getIndex(IRB, BeginOffset),
         getName(".copyextract"));
+    } else if (IntTy && !IsWholeAlloca && !IsDest) {
+      Src = extractInteger(IRB, SubIntTy, BeginOffset);
     } else {
       Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
                                   getName(".copyload"));
     }
 
+    if (IntTy && !IsWholeAlloca && IsDest) {
+      StoreInst *Store = insertInteger(IRB, Src, BeginOffset);
+      (void)Store;
+      DEBUG(dbgs() << "          to: " << *Store << "\n");
+      return true;
+    }
+
     if (IsVectorElement && IsDest) {
       // We have to insert into a loaded copy before storing.
       Src = IRB.CreateInsertElement(
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index b33ffa65f6..644fda167d 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1046,8 +1046,12 @@ entry:
   ; Or a memset of the whole thing.
   call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 1, i1 false)
 
-  ; Store to the high 32-bits...
+  ; Write to the high 32-bits with a memcpy.
   %X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8* %0, i32 4
+  %d.raw = bitcast double* %d to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %X.sroa.0.4.raw_idx4.i, i8* %d.raw, i32 4, i32 1, i1 false)
+
+  ; Store to the high 32-bits...
   %X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32*
   store i32 1072693248, i32* %X.sroa.0.4.cast5.i, align 4
author	Chandler Carruth <chandlerc@gmail.com>	2012-10-15 10:24:43 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2012-10-15 10:24:43 +0000
commit	d2cd73f6a5103f5de221a67410e35e6bee211fa3 (patch)
tree	d1bf9e30986daa6bea0e45ff6bea7282b2256f52
parent	94fc64c42fe27aa16dd5611eb3657ff0297801c8 (diff)
download	llvm-d2cd73f6a5103f5de221a67410e35e6bee211fa3.tar.gz llvm-d2cd73f6a5103f5de221a67410e35e6bee211fa3.tar.bz2 llvm-d2cd73f6a5103f5de221a67410e35e6bee211fa3.tar.xz