6 files changed, 83 insertions, 25 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 183ccbd48e..ad85c7e13a 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -1366,7 +1366,7 @@ public:
   }
 
   /// HandleByVal - Target-specific cleanup for formal ByVal parameters.
-  virtual void HandleByVal(CCState *, unsigned &) const {}
+  virtual void HandleByVal(CCState *, unsigned &, unsigned) const {}
 
   /// CanLowerReturn - This hook should be implemented to check whether the
   /// return values described by the Outs array can fit into the return
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index cdae33c244..22b9140924 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -50,7 +50,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
   if (MinAlign > (int)Align)
     Align = MinAlign;
   MF.getFrameInfo()->ensureMaxAlignment(Align);
-  TM.getTargetLowering()->HandleByVal(this, Size);
+  TM.getTargetLowering()->HandleByVal(this, Size, Align);
   unsigned Offset = AllocateStack(Size, Align);
   addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
 }
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 6ae44eb353..b2eb578487 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1656,22 +1656,31 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 /// and then confiscate the rest of the parameter registers to insure
 /// this.
 void
-ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+ARMTargetLowering::HandleByVal(
+    CCState *State, unsigned &size, unsigned Align) const {
   unsigned reg = State->AllocateReg(GPRArgRegs, 4);
   assert((State->getCallOrPrologue() == Prologue ||
           State->getCallOrPrologue() == Call) &&
          "unhandled ParmContext");
   if ((!State->isFirstByValRegValid()) &&
       (ARM::R0 <= reg) && (reg <= ARM::R3)) {
-    State->setFirstByValReg(reg);
-    // At a call site, a byval parameter that is split between
-    // registers and memory needs its size truncated here.  In a
-    // function prologue, such byval parameters are reassembled in
-    // memory, and are not truncated.
-    if (State->getCallOrPrologue() == Call) {
-      unsigned excess = 4 * (ARM::R4 - reg);
-      assert(size >= excess && "expected larger existing stack allocation");
-      size -= excess;
+    if (Subtarget->isAAPCS_ABI() && Align > 4) {
+      unsigned AlignInRegs = Align / 4;
+      unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
+      for (unsigned i = 0; i < Waste; ++i)
+        reg = State->AllocateReg(GPRArgRegs, 4);
+    }
+    if (reg != 0) {
+      State->setFirstByValReg(reg);
+      // At a call site, a byval parameter that is split between
+      // registers and memory needs its size truncated here.  In a
+      // function prologue, such byval parameters are reassembled in
+      // memory, and are not truncated.
+      if (State->getCallOrPrologue() == Call) {
+        unsigned excess = 4 * (ARM::R4 - reg);
+        assert(size >= excess && "expected larger existing stack allocation");
+        size -= excess;
+      }
     }
   }
   // Confiscate any remaining parameter registers to preclude their
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index b5020c9710..9acab0b083 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -480,7 +480,7 @@ namespace llvm {
                 SmallVectorImpl<SDValue> &InVals) const;
 
     /// HandleByVal - Target-specific cleanup for ByVal support.
-    virtual void HandleByVal(CCState *, unsigned &) const;
+    virtual void HandleByVal(CCState *, unsigned &, unsigned) const;
 
     /// IsEligibleForTailCallOptimization - Check whether the call is eligible
     /// for tail call optimization. Targets which want to do tail call
diff --git a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
new file mode 100644
index 0000000000..b5f6d311cb
--- /dev/null
+++ b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
+; Test that we correctly use registers and align elements when using va_arg
+
+%struct_t = type { double, double, double }
+@static_val = constant %struct_t { double 1.0, double 2.0, double 3.0 }
+
+declare void @llvm.va_start(i8*) nounwind
+declare void @llvm.va_end(i8*) nounwind
+
+; CHECK: test_byval_8_bytes_alignment:
+define void @test_byval_8_bytes_alignment(i32 %i, ...) {
+entry:
+; CHECK: stm     r0, {r1, r2, r3}
+  %g = alloca i8*
+  %g1 = bitcast i8** %g to i8*
+  call void @llvm.va_start(i8* %g1)
+
+; CHECK: add	[[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
+; CHECK: bfc	[[REG]], #0, #3
+  %0 = va_arg i8** %g, double
+  call void @llvm.va_end(i8* %g1)
+  
+  ret void
+}
+
+; CHECK: main:
+; CHECK: ldm     r0, {r2, r3}
+define i32 @main() {
+entry:
+  call void (i32, ...)* @test_byval_8_bytes_alignment(i32 555, %struct_t* byval @static_val)
+  ret i32 0
+}
+
+declare void @f(double);
+
+; CHECK:     test_byval_8_bytes_alignment_fixed_arg:
+; CHECK-NOT:   str     r1
+; CHECK:       str     r3, [sp, #12]
+; CHECK:       str     r2, [sp, #8]
+; CHECK-NOT:   str     r1
+define void @test_byval_8_bytes_alignment_fixed_arg(i32 %n1, %struct_t* byval %val) nounwind {
+entry:
+  %a = getelementptr inbounds %struct_t* %val, i32 0, i32 0
+  %0 = load double* %a
+  call void (double)* @f(double %0)
+  ret void
+}
+
+; CHECK: main_fixed_arg:
+; CHECK: ldm     r0, {r2, r3}
+define i32 @main_fixed_arg() {
+entry:
+  call void (i32, %struct_t*)* @test_byval_8_bytes_alignment_fixed_arg(i32 555, %struct_t* byval @static_val)
+  ret i32 0
+}
+
diff --git a/test/CodeGen/ARM/2012-10-04-LDRB_POST_IMM-Crash.ll b/test/CodeGen/ARM/2012-10-04-LDRB_POST_IMM-Crash.ll
index 6eb8fcb2db..f2395107d4 100644
--- a/test/CodeGen/ARM/2012-10-04-LDRB_POST_IMM-Crash.ll
+++ b/test/CodeGen/ARM/2012-10-04-LDRB_POST_IMM-Crash.ll
@@ -1,23 +1,16 @@
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-none-linux- | FileCheck %s
 ; Check that LDRB_POST_IMM instruction emitted properly.
 
-%my_struct_t = type { double, double, double }
-@main.val = private unnamed_addr constant %my_struct_t { double 1.0, double 2.0, double 3.0 }, align 8
-
-declare void @f(i32 %n1, %my_struct_t* byval %val);
+%my_struct_t = type { i8, i8, i8, i8, i8 }
+@main.val = private unnamed_addr constant %my_struct_t { i8 1, i8 2, i8 3, i8 4, i8 5 }
 
+declare void @f(i32 %n1, i32 %n2, i32 %n3, %my_struct_t* byval %val);
 
 ; CHECK: main:
 define i32 @main() nounwind {
 entry:
-  %val = alloca %my_struct_t, align 8
-  %0 = bitcast %my_struct_t* %val to i8*
-
 ; CHECK: ldrb	{{(r[0-9]+)}}, {{(\[r[0-9]+\])}}, #1
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* bitcast (%my_struct_t* @main.val to i8*), i32 24, i32 8, i1 false)
-
-  call void @f(i32 555, %my_struct_t* byval %val)
+  call void @f(i32 555, i32 555, i32 555, %my_struct_t* byval @main.val)
   ret i32 0
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind