Revise alignment checking/calculation on 256-bit unaligned memory access

- It's still considered aligned when the specified alignment is larger than the natural alignment; - The new alignment for the high 128-bit vector should be min(16, alignment) as the pointer is advanced by 16, a power-of-2 offset. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177947 91177308-0d34-0410-b5e6-96231b3b80d8
author: Michael Liao <michael.liao@intel.com> 2013-03-25 23:50:10 +0000
committer: Michael Liao <michael.liao@intel.com> 2013-03-25 23:50:10 +0000
commit: d4584c9e5658887ec50c43760c988d04eaa13e34 (patch)
tree: 1f64d66547a6aed3e1b1ee10531534229ad01f18 /lib/Target/X86/X86ISelLowering.cpp
parent: b4f98ea1213c866f39aa5b341ec0116f9c2335d7 (diff)
download: llvm-d4584c9e5658887ec50c43760c988d04eaa13e34.tar.gz
llvm-d4584c9e5658887ec50c43760c988d04eaa13e34.tar.bz2
llvm-d4584c9e5658887ec50c43760c988d04eaa13e34.tar.xz
1 files changed, 6 insertions, 7 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 23cfd6d72f..fef2b9659b 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16639,11 +16639,10 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   unsigned RegSz = RegVT.getSizeInBits();
 
+  // On Sandybridge unaligned 256bit loads are inefficient.
   ISD::LoadExtType Ext = Ld->getExtensionType();
   unsigned Alignment = Ld->getAlignment();
-  bool IsAligned = Alignment == 0 || Alignment == MemVT.getSizeInBits()/8;
-
-  // On Sandybridge unaligned 256bit loads are inefficient.
+  bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8;
   if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
       !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
     unsigned NumElems = RegVT.getVectorNumElements();
@@ -16663,7 +16662,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
     SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
                                 Ld->getPointerInfo(), Ld->isVolatile(),
                                 Ld->isNonTemporal(), Ld->isInvariant(),
-                                std::max(Alignment/2U, 1U));
+                                std::min(16U, Alignment));
     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                              Load1.getValue(1),
                              Load2.getValue(1));
@@ -16834,13 +16833,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
   DebugLoc dl = St->getDebugLoc();
   SDValue StoredVal = St->getOperand(1);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  unsigned Alignment = St->getAlignment();
-  bool IsAligned = Alignment == 0 || Alignment == VT.getSizeInBits()/8;
 
   // If we are saving a concatenation of two XMM registers, perform two stores.
   // On Sandy Bridge, 256-bit memory operations are executed by two
   // 128-bit ports. However, on Haswell it is better to issue a single 256-bit
   // memory  operation.
+  unsigned Alignment = St->getAlignment();
+  bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8;
   if (VT.is256BitVector() && !Subtarget->hasInt256() &&
       StVT == VT && !IsAligned) {
     unsigned NumElems = VT.getVectorNumElements();
@@ -16860,7 +16859,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
     SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
                                 St->getPointerInfo(), St->isVolatile(),
                                 St->isNonTemporal(),
-                                std::max(Alignment/2U, 1U));
+                                std::min(16U, Alignment));
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
   }
author	Michael Liao <michael.liao@intel.com>	2013-03-25 23:50:10 +0000
committer	Michael Liao <michael.liao@intel.com>	2013-03-25 23:50:10 +0000
commit	d4584c9e5658887ec50c43760c988d04eaa13e34 (patch)
tree	1f64d66547a6aed3e1b1ee10531534229ad01f18 /lib/Target/X86/X86ISelLowering.cpp
parent	b4f98ea1213c866f39aa5b341ec0116f9c2335d7 (diff)
download	llvm-d4584c9e5658887ec50c43760c988d04eaa13e34.tar.gz llvm-d4584c9e5658887ec50c43760c988d04eaa13e34.tar.bz2 llvm-d4584c9e5658887ec50c43760c988d04eaa13e34.tar.xz