diff options
author | Benjamin Kramer <benny.kra@googlemail.com> | 2012-11-14 20:08:40 +0000 |
---|---|---|
committer | Benjamin Kramer <benny.kra@googlemail.com> | 2012-11-14 20:08:40 +0000 |
commit | 2dbe929685da37e904d6bb0c5a3504e1bafe348f (patch) | |
tree | 1d50f76beb60b492c0c66395316c637c02baa262 /lib | |
parent | 97d19ebe5b5bf27617e536a16fa232116cefe914 (diff) | |
download | llvm-2dbe929685da37e904d6bb0c5a3504e1bafe348f.tar.gz llvm-2dbe929685da37e904d6bb0c5a3504e1bafe348f.tar.bz2 llvm-2dbe929685da37e904d6bb0c5a3504e1bafe348f.tar.xz |
X86: Enable SSE memory intrinsics even when stack alignment is less than 16 bytes.
The stack realignment code was fixed to work when there is stack realignment and
a dynamic alloca is present so this shouldn't cause correctness issues anymore.
Note that this also enables generation of AVX instructions for memset
under the assumptions:
- Unaligned loads/stores are always fast on CPUs supporting AVX
- AVX is not slower than SSE
We may need some tweaked heuristics if one of those assumptions turns out not to
be true.
Effectively reverts r58317. Part of PR2962.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167967 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 9 |
1 files changed, 2 insertions, 7 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 863379ec5a..53a095f718 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1362,18 +1362,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const { - // FIXME: This turns off use of xmm stores for memset/memcpy on targets like - // linux. This is because the stack realignment code can't handle certain - // cases like PR2962. This should be removed when PR2962 is fixed. const Function *F = MF.getFunction(); if (IsZeroVal && !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) { if (Size >= 16 && (Subtarget->isUnalignedMemAccessFast() || ((DstAlign == 0 || DstAlign >= 16) && - (SrcAlign == 0 || SrcAlign >= 16))) && - Subtarget->getStackAlignment() >= 16) { - if (Subtarget->getStackAlignment() >= 32) { + (SrcAlign == 0 || SrcAlign >= 16)))) { + if (Size >= 32) { if (Subtarget->hasAVX2()) return MVT::v8i32; if (Subtarget->hasAVX()) @@ -1385,7 +1381,6 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::v4f32; } else if (!MemcpyStrSrc && Size >= 8 && !Subtarget->is64Bit() && - Subtarget->getStackAlignment() >= 8 && Subtarget->hasSSE2()) { // Do not use f64 to lower memcpy if source is string constant. It's // better to use i32 to avoid the loads. |