X86: Enable SSE memory intrinsics even when stack alignment is less than 16 bytes.

The stack realignment code was fixed to work when there is stack realignment and a dynamic alloca is present so this shouldn't cause correctness issues anymore. Note that this also enables generation of AVX instructions for memset under the assumptions: - Unaligned loads/stores are always fast on CPUs supporting AVX - AVX is not slower than SSE We may need some tweaked heuristics if one of those assumptions turns out not to be true. Effectively reverts r58317. Part of PR2962. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167967 91177308-0d34-0410-b5e6-96231b3b80d8
author: Benjamin Kramer <benny.kra@googlemail.com> 2012-11-14 20:08:40 +0000
committer: Benjamin Kramer <benny.kra@googlemail.com> 2012-11-14 20:08:40 +0000
commit: 2dbe929685da37e904d6bb0c5a3504e1bafe348f (patch)
tree: 1d50f76beb60b492c0c66395316c637c02baa262 /lib
parent: 97d19ebe5b5bf27617e536a16fa232116cefe914 (diff)
download: llvm-2dbe929685da37e904d6bb0c5a3504e1bafe348f.tar.gz
llvm-2dbe929685da37e904d6bb0c5a3504e1bafe348f.tar.bz2
llvm-2dbe929685da37e904d6bb0c5a3504e1bafe348f.tar.xz
1 files changed, 2 insertions, 7 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 863379ec5a..53a095f718 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1362,18 +1362,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
                                        bool IsZeroVal,
                                        bool MemcpyStrSrc,
                                        MachineFunction &MF) const {
-  // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
-  // linux.  This is because the stack realignment code can't handle certain
-  // cases like PR2962.  This should be removed when PR2962 is fixed.
   const Function *F = MF.getFunction();
   if (IsZeroVal &&
       !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) {
     if (Size >= 16 &&
         (Subtarget->isUnalignedMemAccessFast() ||
          ((DstAlign == 0 || DstAlign >= 16) &&
-          (SrcAlign == 0 || SrcAlign >= 16))) &&
-        Subtarget->getStackAlignment() >= 16) {
-      if (Subtarget->getStackAlignment() >= 32) {
+          (SrcAlign == 0 || SrcAlign >= 16)))) {
+      if (Size >= 32) {
         if (Subtarget->hasAVX2())
           return MVT::v8i32;
         if (Subtarget->hasAVX())
@@ -1385,7 +1381,6 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
         return MVT::v4f32;
     } else if (!MemcpyStrSrc && Size >= 8 &&
                !Subtarget->is64Bit() &&
-               Subtarget->getStackAlignment() >= 8 &&
                Subtarget->hasSSE2()) {
       // Do not use f64 to lower memcpy if source is string constant. It's
       // better to use i32 to avoid the loads.
author	Benjamin Kramer <benny.kra@googlemail.com>	2012-11-14 20:08:40 +0000
committer	Benjamin Kramer <benny.kra@googlemail.com>	2012-11-14 20:08:40 +0000
commit	2dbe929685da37e904d6bb0c5a3504e1bafe348f (patch)
tree	1d50f76beb60b492c0c66395316c637c02baa262 /lib
parent	97d19ebe5b5bf27617e536a16fa232116cefe914 (diff)
download	llvm-2dbe929685da37e904d6bb0c5a3504e1bafe348f.tar.gz llvm-2dbe929685da37e904d6bb0c5a3504e1bafe348f.tar.bz2 llvm-2dbe929685da37e904d6bb0c5a3504e1bafe348f.tar.xz