summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLang Hames <lhames@gmail.com>2011-11-02 22:52:45 +0000
committerLang Hames <lhames@gmail.com>2011-11-02 22:52:45 +0000
commit1a1d1fcc0b955420cdbe0b94bd01c46d4e96b429 (patch)
treeebbb35c2786b45f442362268bd94bf67b3321d57
parentd69568723d3e95c010f7252127bcd9f3913ffa85 (diff)
downloadllvm-1a1d1fcc0b955420cdbe0b94bd01c46d4e96b429.tar.gz
llvm-1a1d1fcc0b955420cdbe0b94bd01c46d4e96b429.tar.bz2
llvm-1a1d1fcc0b955420cdbe0b94bd01c46d4e96b429.tar.xz
Try to lower memset/memcpy/memmove to vector instructions on ARM where the alignment permits.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143582 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp28
-rw-r--r--lib/Target/ARM/ARMISelLowering.h7
-rw-r--r--test/CodeGen/ARM/2011-10-26-memset-with-neon.ll20
3 files changed, 54 insertions, 1 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 31e522d4d7..222a399c28 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -8127,6 +8127,34 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
}
}
+static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
+ unsigned AlignCheck) {
+ return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
+ (DstAlign == 0 || DstAlign % AlignCheck == 0));
+}
+
+EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool NonScalarIntSafe,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ const Function *F = MF.getFunction();
+
+ // See if we can use NEON instructions for this...
+ if (NonScalarIntSafe &&
+ !F->hasFnAttr(Attribute::NoImplicitFloat) &&
+ Subtarget->hasNEON()) {
+ if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
+ return MVT::v4i32;
+ } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
+ return MVT::v2i32;
+ }
+ }
+
+ // Let the target-independent logic figure it out.
+ return MVT::Other;
+}
+
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 5da9b27fca..43e43dd35b 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -266,9 +266,14 @@ namespace llvm {
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses. of the specified type.
- /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
+ virtual EVT getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool NonScalarIntSafe,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const;
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
new file mode 100644
index 0000000000..3c9216cde7
--- /dev/null
+++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
+
+; Should trigger a NEON store.
+; CHECK: vstr.64
+define void @f_0_12(i8* nocapture %c) nounwind optsize {
+entry:
+ call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
+ ret void
+}
+
+; Trigger multiple NEON stores.
+; CHECK: vstmia
+; CHECK-NEXT: vstmia
+define void @f_0_40(i8* nocapture %c) nounwind optsize {
+entry:
+ call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind