summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2010-05-14 23:21:14 +0000
committerEvan Cheng <evan.cheng@apple.com>2010-05-14 23:21:14 +0000
commitb990a2f249196ad3e0cc451d40a45fc2f9278eaf (patch)
treed734296d75a3d1a53af705283c116447cc699f23
parentfd726176886b946ff4eaaf85bd254c5fbbacabfd (diff)
downloadllvm-b990a2f249196ad3e0cc451d40a45fc2f9278eaf.tar.gz
llvm-b990a2f249196ad3e0cc451d40a45fc2f9278eaf.tar.bz2
llvm-b990a2f249196ad3e0cc451d40a45fc2f9278eaf.tar.xz
Teach two-address pass to do some coalescing while eliminating REG_SEQUENCE
instructions. e.g. %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0 %reg1027<def> = EXTRACT_SUBREG %reg1026, 6 %reg1028<def> = EXTRACT_SUBREG %reg1026<kill>, 5 ... %reg1029<def> = REG_SEQUENCE %reg1028<kill>, 5, %reg1027<kill>, 6, %reg1028, 7, %reg1027, 8, %reg1028, 9, %reg1027, 10, %reg1030<kill>, 11, %reg1032<kill>, 12 After REG_SEQUENCE is eliminated, we are left with: %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0 %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6 %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5 The regular coalescer will not be able to coalesce reg1026 and reg1029 because it doesn't know how to combine sub-register indices 5 and 6. Now 2-address pass will consult the target whether sub-registers 5 and 6 of reg1026 can be combined to into a larger sub-register (or combined to be reg1026 itself as is the case here). If it is possible, it will be able to replace references of reg1026 with reg1029 + the larger sub-register index. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@103835 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Target/TargetRegisterInfo.h12
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp49
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp117
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h9
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h3
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td11
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp6
7 files changed, 204 insertions, 3 deletions
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index 29b862aa02..aeb669d673 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -28,6 +28,7 @@ class BitVector;
class MachineFunction;
class MachineMove;
class RegScavenger;
+template<class T> class SmallVectorImpl;
/// TargetRegisterDesc - This record contains all of the information known about
/// a particular register. The AliasSet field (if not null) contains a pointer
@@ -479,6 +480,17 @@ public:
return 0;
}
+ /// canCombinedSubRegIndex - Given a register class and a list of sub-register
+ /// indices, return true if it's possible to combine the sub-register indices
+ /// into one that corresponds to a larger sub-register. Return the new sub-
+ /// register index by reference. Note the new index by be zero if the given
+ /// sub-registers combined to form the whole register.
+ virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const {
+ return 0;
+ }
+
/// getMatchingSuperRegClass - Return a subclass of the specified register
/// class A so that each register in it has a sub-register of the
/// specified sub-register index which is in the specified register class B.
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 9f00311fb4..80bb1a9e9d 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1166,6 +1166,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
llvm_unreachable(0);
}
+ SmallVector<unsigned, 4> RealSrcs;
SmallSet<unsigned, 4> Seen;
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
unsigned SrcReg = MI->getOperand(i).getReg();
@@ -1176,6 +1177,16 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
}
MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->isImplicitDef()) {
+ DefMI->eraseFromParent();
+ continue;
+ }
+
+ // Remember EXTRACT_SUBREG sources. These might be candidate for
+ // coalescing.
+ if (DefMI->isExtractSubreg())
+ RealSrcs.push_back(DefMI->getOperand(1).getReg());
+
if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent()) {
// REG_SEQUENCE cannot have duplicated operands, add a copy.
// Also add an copy if the source if live-in the block. We don't want
@@ -1216,6 +1227,44 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
DEBUG(dbgs() << "Eliminated: " << *MI);
MI->eraseFromParent();
+
+ // Try coalescing some EXTRACT_SUBREG instructions.
+ Seen.clear();
+ for (unsigned i = 0, e = RealSrcs.size(); i != e; ++i) {
+ unsigned SrcReg = RealSrcs[i];
+ if (!Seen.insert(SrcReg))
+ continue;
+
+ // If there are no other uses than extract_subreg which feed into
+ // the reg_sequence, then we might be able to coalesce them.
+ bool CanCoalesce = true;
+ SmallVector<unsigned, 4> SubIndices;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SrcReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (!UseMI->isExtractSubreg() ||
+ UseMI->getOperand(0).getReg() != DstReg) {
+ CanCoalesce = false;
+ break;
+ }
+ SubIndices.push_back(UseMI->getOperand(2).getImm());
+ }
+
+ if (!CanCoalesce)
+ continue;
+
+ // %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
+ // %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
+ // %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5
+ // Since D subregs 5, 6 can combine to a Q register, we can coalesce
+ // reg1026 to reg1029.
+ std::sort(SubIndices.begin(), SubIndices.end());
+ unsigned NewSubIdx = 0;
+ if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices,
+ NewSubIdx))
+ UpdateRegSequenceSrcs(SrcReg, DstReg, NewSubIdx, MRI);
+ }
}
RegSequences.clear();
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 9b7dc309c5..9dcdce05e4 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -351,6 +351,123 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return 0;
}
+bool
+ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const {
+
+ unsigned Size = RC->getSize() * 8;
+ if (Size < 6)
+ return 0;
+
+ NewSubIdx = 0; // Whole register.
+ unsigned NumRegs = SubIndices.size();
+ if (NumRegs == 8) {
+ // 8 D registers -> 1 QQQQ register.
+ return (Size == 512 &&
+ SubIndices[0] == ARM::DSUBREG_0 &&
+ SubIndices[1] == ARM::DSUBREG_1 &&
+ SubIndices[2] == ARM::DSUBREG_2 &&
+ SubIndices[3] == ARM::DSUBREG_3 &&
+ SubIndices[4] == ARM::DSUBREG_4 &&
+ SubIndices[5] == ARM::DSUBREG_5 &&
+ SubIndices[6] == ARM::DSUBREG_6 &&
+ SubIndices[7] == ARM::DSUBREG_7);
+ } else if (NumRegs == 4) {
+ if (SubIndices[0] == ARM::QSUBREG_0) {
+ // 4 Q registers -> 1 QQQQ register.
+ return (Size == 512 &&
+ SubIndices[1] == ARM::QSUBREG_1 &&
+ SubIndices[2] == ARM::QSUBREG_2 &&
+ SubIndices[3] == ARM::QSUBREG_3);
+ } else if (SubIndices[0] == ARM::DSUBREG_0) {
+ // 4 D registers -> 1 QQ register.
+ if (Size >= 256 &&
+ SubIndices[1] == ARM::DSUBREG_1 &&
+ SubIndices[2] == ARM::DSUBREG_2 &&
+ SubIndices[3] == ARM::DSUBREG_3) {
+ if (Size == 512)
+ NewSubIdx = ARM::QQSUBREG_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::DSUBREG_4) {
+ // 4 D registers -> 1 QQ register (2nd).
+ if (Size == 512 &&
+ SubIndices[1] == ARM::DSUBREG_5 &&
+ SubIndices[2] == ARM::DSUBREG_6 &&
+ SubIndices[3] == ARM::DSUBREG_7) {
+ NewSubIdx = ARM::QQSUBREG_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::SSUBREG_0) {
+ // 4 S registers -> 1 Q register.
+ if (Size >= 128 &&
+ SubIndices[1] == ARM::SSUBREG_1 &&
+ SubIndices[2] == ARM::SSUBREG_2 &&
+ SubIndices[3] == ARM::SSUBREG_3) {
+ if (Size >= 256)
+ NewSubIdx = ARM::QSUBREG_0;
+ return true;
+ }
+ }
+ } else if (NumRegs == 2) {
+ if (SubIndices[0] == ARM::QSUBREG_0) {
+ // 2 Q registers -> 1 QQ register.
+ if (Size >= 256 && SubIndices[1] == ARM::QSUBREG_1) {
+ if (Size == 512)
+ NewSubIdx = ARM::QQSUBREG_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::QSUBREG_2) {
+ // 2 Q registers -> 1 QQ register (2nd).
+ if (Size == 512 && SubIndices[1] == ARM::QSUBREG_3) {
+ NewSubIdx = ARM::QQSUBREG_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::DSUBREG_0) {
+ // 2 D registers -> 1 Q register.
+ if (Size >= 128 && SubIndices[1] == ARM::DSUBREG_1) {
+ if (Size >= 256)
+ NewSubIdx = ARM::QSUBREG_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::DSUBREG_2) {
+ // 2 D registers -> 1 Q register (2nd).
+ if (Size >= 256 && SubIndices[1] == ARM::DSUBREG_3) {
+ NewSubIdx = ARM::QSUBREG_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::DSUBREG_4) {
+ // 2 D registers -> 1 Q register (3rd).
+ if (Size == 512 && SubIndices[1] == ARM::DSUBREG_5) {
+ NewSubIdx = ARM::QSUBREG_2;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::DSUBREG_6) {
+ // 2 D registers -> 1 Q register (3rd).
+ if (Size == 512 && SubIndices[1] == ARM::DSUBREG_7) {
+ NewSubIdx = ARM::QSUBREG_3;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::SSUBREG_0) {
+ // 2 S registers -> 1 D register.
+ if (SubIndices[1] == ARM::SSUBREG_1) {
+ if (Size >= 128)
+ NewSubIdx = ARM::DSUBREG_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::SSUBREG_2) {
+ // 2 S registers -> 1 D register (2nd).
+ if (Size >= 128 && SubIndices[1] == ARM::SSUBREG_3) {
+ NewSubIdx = ARM::DSUBREG_1;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
const TargetRegisterClass *
ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
return ARM::GPRRegisterClass;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 456c39237d..2c9c82d031 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -81,6 +81,15 @@ public:
getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass *B, unsigned Idx) const;
+ /// canCombinedSubRegIndex - Given a register class and a list of sub-register
+ /// indices, return true if it's possible to combine the sub-register indices
+ /// into one that corresponds to a larger sub-register. Return the new sub-
+ /// register index by reference. Note the new index by be zero if the given
+ /// sub-registers combined to form the whole register.
+ virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const;
+
const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index efc0cbb065..62514c5c92 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -31,7 +31,8 @@ namespace ARM {
SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4,
DSUBREG_0 = 5, DSUBREG_1 = 6, DSUBREG_2 = 7, DSUBREG_3 = 8,
DSUBREG_4 = 9, DSUBREG_5 = 10, DSUBREG_6 = 11, DSUBREG_7 = 12,
- QSUBREG_0 = 13, QSUBREG_1 = 14, QSUBREG_2 = 15, QSUBREG_3 = 16
+ QSUBREG_0 = 13, QSUBREG_1 = 14, QSUBREG_2 = 15, QSUBREG_3 = 16,
+ QQSUBREG_0= 17, QQSUBREG_1= 18
};
}
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index ae2b95bef6..80325ae54e 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -465,6 +465,10 @@ def arm_qsubreg_1 : PatLeaf<(i32 14)>;
def arm_qsubreg_2 : PatLeaf<(i32 15)>;
def arm_qsubreg_3 : PatLeaf<(i32 16)>;
+def arm_qqsubreg_0 : PatLeaf<(i32 17)>;
+def arm_qqsubreg_1 : PatLeaf<(i32 18)>;
+
+
// S sub-registers of D registers.
def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7,
D8, D9, D10, D11, D12, D13, D14, D15],
@@ -552,3 +556,10 @@ def : SubRegSet<15, [QQQQ0, QQQQ1, QQQQ2, QQQQ3],
[Q2, Q6, Q10, Q14]>;
def : SubRegSet<16, [QQQQ0, QQQQ1, QQQQ2, QQQQ3],
[Q3, Q7, Q11, Q15]>;
+
+// QQ sub-registers of QQQQQQQQ registers.
+def : SubRegSet<17, [QQQQ0, QQQQ1, QQQQ2, QQQQ3],
+ [QQ0, QQ2, QQ4, QQ6]>;
+def : SubRegSet<18, [QQQQ0, QQQQ1, QQQQ2, QQQQ3],
+ [QQ1, QQ3, QQ5, QQ7]>;
+
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index 017e6f7443..77375e52d1 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -414,7 +414,9 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
return false;
LastSrcReg = VirtReg;
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
- if (RC != ARM::QPRRegisterClass && RC != ARM::QQPRRegisterClass)
+ if (RC != ARM::QPRRegisterClass &&
+ RC != ARM::QQPRRegisterClass &&
+ RC != ARM::QQQQPRRegisterClass)
return false;
unsigned SubIdx = DefMI->getOperand(2).getImm();
if (LastSubIdx) {
@@ -432,7 +434,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
// FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is
// currently required for correctness. e.g.
- // %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
+ // %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
// %reg1042<def> = EXTRACT_SUBREG %reg1041, 6
// %reg1043<def> = EXTRACT_SUBREG %reg1041, 5
// VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>,