summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2008-01-23 23:17:41 +0000
committerEvan Cheng <evan.cheng@apple.com>2008-01-23 23:17:41 +0000
commit29286502628867b31872ead2f2527592480f0970 (patch)
treeef8b23bf5acebabc33fea3c8928a9f83b85ddeae
parentf02e26abc04c95be6be02b614ea68616ed5c0927 (diff)
downloadllvm-29286502628867b31872ead2f2527592480f0970.tar.gz
llvm-29286502628867b31872ead2f2527592480f0970.tar.bz2
llvm-29286502628867b31872ead2f2527592480f0970.tar.xz
Let each target decide byval alignment. For X86, it's 4-byte unless the aggregare contains SSE vector(s). For x86-64, it's max of 8 or alignment of the type.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46286 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Target/TargetLowering.h6
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp40
-rw-r--r--lib/Target/X86/X86ISelLowering.h6
-rw-r--r--test/CodeGen/X86/byval4.ll4
-rw-r--r--test/CodeGen/X86/byval5.ll4
-rw-r--r--test/CodeGen/X86/byval6.ll16
-rw-r--r--test/CodeGen/X86/byval7.ll14
7 files changed, 85 insertions, 5 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index f55c2d0da2..55147172fc 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -405,6 +405,10 @@ public:
return VT == MVT::iPTR ? PointerTy : VT;
}
+ /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// function arguments in the caller parameter area.
+ virtual unsigned getByValTypeAlignment(const Type *Ty) const;
+
/// getRegisterType - Return the type of registers that this ValueType will
/// eventually require.
MVT::ValueType getRegisterType(MVT::ValueType VT) const {
@@ -433,7 +437,7 @@ public:
}
assert(0 && "Unsupported extended type!");
}
-
+
/// hasTargetDAGCombine - If true, the target has custom DAG combine
/// transformations that it can perform for the specified node.
bool hasTargetDAGCombine(ISD::NodeType NT) const {
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5c1ce69eb3..9140539e38 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -672,6 +672,46 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
allowUnalignedMemoryAccesses = true; // x86 supports it!
}
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
+ if (MaxAlign == 16)
+ return;
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (VTy->getBitWidth() == 128)
+ MaxAlign = 16;
+ else if (VTy->getBitWidth() == 64)
+ if (MaxAlign < 8)
+ MaxAlign = 8;
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(STy->getElementType(i), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == 16)
+ break;
+ }
+ }
+ return;
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. For X86, aggregates
+/// that contains are placed at 16-byte boundaries while the rest are at
+/// 4-byte boundaries.
+unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+ if (Subtarget->is64Bit())
+ return getTargetData()->getABITypeAlignment(Ty);
+ unsigned Align = 4;
+ getMaxByValAlign(Ty, Align);
+ return Align;
+}
/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
/// jumptable.
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index acdc579f7a..9df3a2e9fc 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -322,6 +322,12 @@ namespace llvm {
/// getStackPtrReg - Return the stack pointer register we are using: either
/// ESP or RSP.
unsigned getStackPtrReg() const { return X86StackPtr; }
+
+ /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// function arguments in the caller parameter area. For X86, aggregates
+ /// that contains are placed at 16-byte boundaries while the rest are at
+ /// 4-byte boundaries.
+ virtual unsigned getByValTypeAlignment(const Type *Ty) const;
/// LowerOperation - Provide custom lowering hooks for some operations.
///
diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll
index 9ce635b562..591749f768 100644
--- a/test/CodeGen/X86/byval4.ll
+++ b/test/CodeGen/X86/byval4.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsw | count 2
+; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsw | count 2
+; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
%struct.s = type { i16, i16, i16, i16, i16, i16 }
diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll
index 95bf3f4239..4965d16666 100644
--- a/test/CodeGen/X86/byval5.ll
+++ b/test/CodeGen/X86/byval5.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsb | count 2
+; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsb | count 2
+; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
%struct.s = type { i8, i8, i8, i8, i8, i8 }
diff --git a/test/CodeGen/X86/byval6.ll b/test/CodeGen/X86/byval6.ll
new file mode 100644
index 0000000000..47269d21d9
--- /dev/null
+++ b/test/CodeGen/X86/byval6.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep add | not grep 16
+
+ %struct.W = type { x86_fp80, x86_fp80 }
+@B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32
+@.cpx = internal constant %struct.W { x86_fp80 0xK4001E000000000000000, x86_fp80 0xK40028000000000000000 }
+
+define i32 @main() nounwind {
+entry:
+ tail call void (i32, ...)* @bar( i32 3, %struct.W* byval @.cpx ) nounwind
+ tail call void (i32, ...)* @baz( i32 3, %struct.W* byval @B ) nounwind
+ ret i32 undef
+}
+
+declare void @bar(i32, ...)
+
+declare void @baz(i32, ...)
diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll
new file mode 100644
index 0000000000..54210c8036
--- /dev/null
+++ b/test/CodeGen/X86/byval7.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep add | grep 16
+
+ %struct.S = type { <2 x i64> }
+
+define i32 @main() nounwind {
+entry:
+ %s = alloca %struct.S ; <%struct.S*> [#uses=2]
+ %tmp15 = getelementptr %struct.S* %s, i32 0, i32 0 ; <<2 x i64>*> [#uses=1]
+ store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16
+ call void @t( i32 1, %struct.S* byval %s ) nounwind
+ ret i32 0
+}
+
+declare void @t(i32, %struct.S* byval )