diff options
author | Evan Cheng <evan.cheng@apple.com> | 2008-08-21 21:00:15 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2008-08-21 21:00:15 +0000 |
commit | 1887c1c2f99903d13406e723f2dcbab4511e3f49 (patch) | |
tree | a41725c80694a8834cd1eb9557d175b2880cb389 /lib | |
parent | 66b17ba0d263442b8b4e82aaa08acc0df85e1787 (diff) | |
download | llvm-1887c1c2f99903d13406e723f2dcbab4511e3f49.tar.gz llvm-1887c1c2f99903d13406e723f2dcbab4511e3f49.tar.bz2 llvm-1887c1c2f99903d13406e723f2dcbab4511e3f49.tar.xz |
Fix a number of byval / memcpy / memset related codegen issues.
1. x86-64 byval alignment should be max of 8 and alignment of type. Previously the code was not doing what the commit message was saying.
2. Do not use byte repeat move and store operations. These are slow.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55139 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 90 |
1 files changed, 46 insertions, 44 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fdc461894d..a6bffe285f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -796,8 +796,14 @@ static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) { /// that contain SSE vectors are placed at 16-byte boundaries while the rest /// are at 4-byte boundaries. unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { - if (Subtarget->is64Bit()) - return getTargetData()->getABITypeAlignment(Ty); + if (Subtarget->is64Bit()) { + // Max of 8 and alignment of type. + unsigned TyAlign = getTargetData()->getABITypeAlignment(Ty); + if (TyAlign > 8) + return TyAlign; + return 8; + } + unsigned Align = 4; if (Subtarget->hasSSE1()) getMaxByValAlign(Ty, Align); @@ -5014,16 +5020,16 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, const Value *DstSV, uint64_t DstSVOff) { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); /// If not DWORD aligned or size is more than the threshold, call the library. /// The libc version is likely to be faster for these cases. It can use the /// address value and run time information about the CPU. - if ((Align & 3) == 0 || + if ((Align & 3) != 0 || !ConstantSize || ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) { SDValue InFlag(0, 0); @@ -5065,27 +5071,27 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, // If the value is a constant, then we can potentially use larger sets. switch (Align & 3) { - case 2: // WORD aligned - AVT = MVT::i16; - ValReg = X86::AX; - Val = (Val << 8) | Val; - break; - case 0: // DWORD aligned - AVT = MVT::i32; - ValReg = X86::EAX; - Val = (Val << 8) | Val; - Val = (Val << 16) | Val; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned - AVT = MVT::i64; - ValReg = X86::RAX; - Val = (Val << 32) | Val; - } - break; - default: // Byte aligned - AVT = MVT::i8; - ValReg = X86::AL; - Count = DAG.getIntPtrConstant(SizeVal); - break; + case 2: // WORD aligned + AVT = MVT::i16; + ValReg = X86::AX; + Val = (Val << 8) | Val; + break; + case 0: // DWORD aligned + AVT = MVT::i32; + ValReg = X86::EAX; + Val = (Val << 8) | Val; + Val = (Val << 16) | Val; + if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned + AVT = MVT::i64; + ValReg = X86::RAX; + Val = (Val << 32) | Val; + } + break; + default: // Byte aligned + AVT = MVT::i8; + ValReg = X86::AL; + Count = DAG.getIntPtrConstant(SizeVal); + break; } if (AVT.bitsGT(MVT::i8)) { @@ -5153,13 +5159,11 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, SDValue X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff){ - + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff) { // This requires the copy size to be a constant, preferrably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -5169,21 +5173,19 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) return SDValue(); - MVT AVT; - unsigned BytesLeft = 0; - if (Align >= 8 && Subtarget->is64Bit()) + /// If not DWORD aligned, call the library. + if ((Align & 3) != 0) + return SDValue(); + + // DWORD aligned + MVT AVT = MVT::i32; + if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned AVT = MVT::i64; - else if (Align >= 4) - AVT = MVT::i32; - else if (Align >= 2) - AVT = MVT::i16; - else - AVT = MVT::i8; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; SDValue Count = DAG.getIntPtrConstant(CountVal); - BytesLeft = SizeVal % UBytes; + unsigned BytesLeft = SizeVal % UBytes; SDValue InFlag(0, 0); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, |