diff options
author | Lang Hames <lhames@gmail.com> | 2011-11-02 22:52:45 +0000 |
---|---|---|
committer | Lang Hames <lhames@gmail.com> | 2011-11-02 22:52:45 +0000 |
commit | 1a1d1fcc0b955420cdbe0b94bd01c46d4e96b429 (patch) | |
tree | ebbb35c2786b45f442362268bd94bf67b3321d57 | |
parent | d69568723d3e95c010f7252127bcd9f3913ffa85 (diff) | |
download | llvm-1a1d1fcc0b955420cdbe0b94bd01c46d4e96b429.tar.gz llvm-1a1d1fcc0b955420cdbe0b94bd01c46d4e96b429.tar.bz2 llvm-1a1d1fcc0b955420cdbe0b94bd01c46d4e96b429.tar.xz |
Try to lower memset/memcpy/memmove to vector instructions on ARM where the alignment permits.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143582 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 28 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.h | 7 | ||||
-rw-r--r-- | test/CodeGen/ARM/2011-10-26-memset-with-neon.ll | 20 |
3 files changed, 54 insertions, 1 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 31e522d4d7..222a399c28 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -8127,6 +8127,34 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { } } +static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, + unsigned AlignCheck) { + return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && + (DstAlign == 0 || DstAlign % AlignCheck == 0)); +} + +EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, + bool MemcpyStrSrc, + MachineFunction &MF) const { + const Function *F = MF.getFunction(); + + // See if we can use NEON instructions for this... + if (NonScalarIntSafe && + !F->hasFnAttr(Attribute::NoImplicitFloat) && + Subtarget->hasNEON()) { + if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) { + return MVT::v4i32; + } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) { + return MVT::v2i32; + } + } + + // Let the target-independent logic figure it out. + return MVT::Other; +} + static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 5da9b27fca..43e43dd35b 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -266,9 +266,14 @@ namespace llvm { /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. - /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON? virtual bool allowsUnalignedMemoryAccesses(EVT VT) const; + virtual EVT getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, + bool MemcpyStrSrc, + MachineFunction &MF) const; + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll new file mode 100644 index 0000000000..3c9216cde7 --- /dev/null +++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s + +; Should trigger a NEON store. +; CHECK: vstr.64 +define void @f_0_12(i8* nocapture %c) nounwind optsize { +entry: + call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false) + ret void +} + +; Trigger multiple NEON stores. +; CHECK: vstmia +; CHECK-NEXT: vstmia +define void @f_0_40(i8* nocapture %c) nounwind optsize { +entry: + call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false) + ret void +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind |