summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Christopher <echristo@apple.com>2011-03-12 01:09:29 +0000
committerEric Christopher <echristo@apple.com>2011-03-12 01:09:29 +0000
commitaf3dce51494b366024958b6dc9a15b95cb03011f (patch)
tree216866c8203d89fc3eb3dab436c76f48a6a13be2
parent7d3a16a6f8a696a8d73fe369f5a0f57c9f3f9597 (diff)
downloadllvm-af3dce51494b366024958b6dc9a15b95cb03011f.tar.gz
llvm-af3dce51494b366024958b6dc9a15b95cb03011f.tar.bz2
llvm-af3dce51494b366024958b6dc9a15b95cb03011f.tar.xz
Sometimes isPredicable lies to us and tells us we don't need the operands.
Go ahead and add them on when we might want to use them and let later passes remove them. Fixes rdar://9118569 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127518 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp31
-rw-r--r--test/CodeGen/ARM/fast-isel-pred.ll60
2 files changed, 85 insertions, 6 deletions
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 26f48b3083..acb448569c 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -123,14 +123,15 @@ class ARMFastISel : public FastISel {
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
const ConstantFP *FPImm);
- virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- uint64_t Imm);
virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
uint64_t Imm);
+ virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+
virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
unsigned Op0, bool Op0IsKill,
uint32_t Idx);
@@ -193,6 +194,7 @@ class ARMFastISel : public FastISel {
// OptionalDef handling routines.
private:
+ bool isARMNEONPred(const MachineInstr *MI);
bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
void AddLoadStoreOperands(EVT VT, Address &Addr,
@@ -221,6 +223,21 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
return true;
}
+bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
+ const TargetInstrDesc &TID = MI->getDesc();
+
+ // If we're a thumb2 or not NEON function we were handled via isPredicable.
+ if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
+ AFI->isThumb2Function())
+ return false;
+
+ for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i)
+ if (TID.OpInfo[i].isPredicate())
+ return true;
+
+ return false;
+}
+
// If the machine is predicable go ahead and add the predicate operands, if
// it needs default CC operands add those.
// TODO: If we want to support thumb1 then we'll need to deal with optional
@@ -230,10 +247,12 @@ const MachineInstrBuilder &
ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
MachineInstr *MI = &*MIB;
- // Do we use a predicate?
- if (TII.isPredicable(MI))
+ // Do we use a predicate? or...
+ // Are we NEON in ARM mode and have a predicate operand? If so, I know
+ // we're not predicable but add it anyways.
+ if (TII.isPredicable(MI) || isARMNEONPred(MI))
AddDefaultPred(MIB);
-
+
// Do we optionally set a predicate? Preds is size > 0 iff the predicate
// defines CPSR. All other OptionalDefines in ARM are the CCR register.
bool CPSR = false;
diff --git a/test/CodeGen/ARM/fast-isel-pred.ll b/test/CodeGen/ARM/fast-isel-pred.ll
new file mode 100644
index 0000000000..ef436549e8
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-pred.ll
@@ -0,0 +1,60 @@
+; ModuleID = 'test.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv7-apple-darwin"
+
+define i32 @main() nounwind ssp {
+entry:
+ %retval = alloca i32, align 4
+ %X = alloca <4 x i32>, align 16
+ %Y = alloca <4 x float>, align 16
+ store i32 0, i32* %retval
+ %tmp = load <4 x i32>* %X, align 16
+ call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y)
+ %0 = load i32* %retval
+ ret i32 %0
+}
+
+define internal void @__aa(<4 x i32> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
+entry:
+ %__a.addr.i = alloca <4 x i32>, align 16
+ %v.addr = alloca <4 x i32>, align 16
+ %p.addr = alloca i8*, align 4
+ %offset.addr = alloca i32, align 4
+ %constants.addr = alloca <4 x float>*, align 4
+ store <4 x i32> %v, <4 x i32>* %v.addr, align 16
+ store i8* %p, i8** %p.addr, align 4
+ store i32 %offset, i32* %offset.addr, align 4
+ store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
+ %tmp = load <4 x i32>* %v.addr, align 16
+ store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16
+ %tmp.i = load <4 x i32>* %__a.addr.i, align 16
+ %0 = bitcast <4 x i32> %tmp.i to <16 x i8>
+ %1 = bitcast <16 x i8> %0 to <4 x i32>
+ %vcvt.i = sitofp <4 x i32> %1 to <4 x float>
+ %tmp1 = load i8** %p.addr, align 4
+ %tmp2 = load i32* %offset.addr, align 4
+ %tmp3 = load <4 x float>** %constants.addr, align 4
+ call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3)
+ ret void
+}
+
+define internal void @__bb(<4 x float> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
+entry:
+ %v.addr = alloca <4 x float>, align 16
+ %p.addr = alloca i8*, align 4
+ %offset.addr = alloca i32, align 4
+ %constants.addr = alloca <4 x float>*, align 4
+ %data = alloca i64, align 4
+ store <4 x float> %v, <4 x float>* %v.addr, align 16
+ store i8* %p, i8** %p.addr, align 4
+ store i32 %offset, i32* %offset.addr, align 4
+ store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
+ %tmp = load i64* %data, align 4
+ %tmp1 = load i8** %p.addr, align 4
+ %tmp2 = load i32* %offset.addr, align 4
+ %add.ptr = getelementptr i8* %tmp1, i32 %tmp2
+ %0 = bitcast i8* %add.ptr to i64*
+ %arrayidx = getelementptr inbounds i64* %0, i32 0
+ store i64 %tmp, i64* %arrayidx
+ ret void
+}