summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2013-01-19 08:38:41 +0000
committerNadav Rotem <nrotem@apple.com>2013-01-19 08:38:41 +0000
commitba9586544164e69754039a25cb0ef7907d27382d (patch)
tree98ebe27964ae162fa9c5dd28f27e7bfbdb4182ad /lib
parentcfcab21e4d0e4d7444b147898d6aed1348df3043 (diff)
downloadllvm-ba9586544164e69754039a25cb0ef7907d27382d.tar.gz
llvm-ba9586544164e69754039a25cb0ef7907d27382d.tar.bz2
llvm-ba9586544164e69754039a25cb0ef7907d27382d.tar.xz
On Sandybridge split unaligned 256bit stores into two xmm-sized stores.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172894 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp27
1 files changed, 18 insertions, 9 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b6b10e2dca..ca8cd741e7 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16344,12 +16344,15 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
ISD::LoadExtType Ext = Ld->getExtensionType();
unsigned Alignment = Ld->getAlignment();
+ bool IsAligned = Alignment == 0 || Alignment == MemVT.getSizeInBits()/8;
// On Sandybridge unaligned 256bit loads are inefficient.
if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
- !DCI.isBeforeLegalizeOps() && Alignment < 32 &&
- Ext == ISD::NON_EXTLOAD) {
+ !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
unsigned NumElems = RegVT.getVectorNumElements();
+ if (NumElems < 2)
+ return SDValue();
+
SDValue Ptr = Ld->getBasePtr();
SDValue Increment = DAG.getConstant(16, TLI.getPointerTy());
@@ -16363,7 +16366,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->isInvariant(),
- Alignment);
+ std::max(Alignment/2U, 1U));
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Load1.getValue(1),
Load2.getValue(1));
@@ -16536,16 +16539,21 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
DebugLoc dl = St->getDebugLoc();
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned Alignment = St->getAlignment();
+ bool IsAligned = Alignment == 0 || Alignment == VT.getSizeInBits()/8;
// If we are saving a concatenation of two XMM registers, perform two stores.
// On Sandy Bridge, 256-bit memory operations are executed by two
// 128-bit ports. However, on Haswell it is better to issue a single 256-bit
// memory operation.
if (VT.is256BitVector() && !Subtarget->hasInt256() &&
- StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
- StoredVal.getNumOperands() == 2) {
- SDValue Value0 = StoredVal.getOperand(0);
- SDValue Value1 = StoredVal.getOperand(1);
+ StVT == VT && !IsAligned) {
+ unsigned NumElems = VT.getVectorNumElements();
+ if (NumElems < 2)
+ return SDValue();
+
+ SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl);
+ SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl);
SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
SDValue Ptr0 = St->getBasePtr();
@@ -16553,10 +16561,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0,
St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
+ St->isNonTemporal(), Alignment);
SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
+ St->isNonTemporal(),
+ std::max(Alignment/2U, 1U));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
}