summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Target/TargetRegisterInfo.h8
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp34
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp23
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h2
-rw-r--r--test/CodeGen/ARM/fabss.ll2
-rw-r--r--test/CodeGen/ARM/fadds.ll2
-rw-r--r--test/CodeGen/ARM/fdivs.ll2
-rw-r--r--test/CodeGen/ARM/fmuls.ll2
-rw-r--r--test/CodeGen/ARM/fp_convert.ll8
9 files changed, 65 insertions, 18 deletions
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index d06cdc4e8e..0fa3b38561 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -624,6 +624,14 @@ public:
return 0;
}
+ /// avoidWriteAfterWrite - Return true if the register allocator should avoid
+ /// writing a register from RC in two consecutive instructions.
+ /// This can avoid pipeline stalls on certain architectures.
+ /// It does cause increased register pressure, though.
+ virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
+ return false;
+ }
+
/// UpdateRegAllocHint - A callback to allow target a chance to update
/// register allocation hints when a register is "changed" (e.g. coalesced)
/// to another register. e.g. On ARM, some virtual registers should target
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index b8cb5a7c92..b91f312b72 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -67,6 +67,11 @@ TrivCoalesceEnds("trivial-coalesce-ends",
cl::desc("Attempt trivial coalescing of interval ends"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+AvoidWAWHazard("avoid-waw-hazard",
+ cl::desc("Avoid write-write hazards for some register classes"),
+ cl::init(false), cl::Hidden);
+
static RegisterRegAlloc
linearscanRegAlloc("linearscan", "linear scan register allocator",
createLinearScanRegisterAllocator);
@@ -110,6 +115,7 @@ namespace {
if (NumRecentlyUsedRegs > 0)
RecentRegs.resize(NumRecentlyUsedRegs, 0);
RecentNext = RecentRegs.begin();
+ avoidWAW_ = 0;
}
typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
@@ -180,6 +186,9 @@ namespace {
SmallVector<unsigned, 4> RecentRegs;
SmallVector<unsigned, 4>::iterator RecentNext;
+ // Last write-after-write register written.
+ unsigned avoidWAW_;
+
// Record that we just picked this register.
void recordRecentlyUsed(unsigned reg) {
assert(reg != 0 && "Recently used register is NOREG!");
@@ -227,8 +236,8 @@ namespace {
// Determine if we skip this register due to its being recently used.
bool isRecentlyUsed(unsigned reg) const {
- return std::find(RecentRegs.begin(), RecentRegs.end(), reg) !=
- RecentRegs.end();
+ return reg == avoidWAW_ ||
+ std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end();
}
private:
@@ -1116,6 +1125,12 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
active_.push_back(std::make_pair(cur, cur->begin()));
handled_.push_back(cur);
+ // Remember physReg for avoiding a write-after-write hazard in the next
+ // instruction.
+ if (AvoidWAWHazard &&
+ tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg)))
+ avoidWAW_ = physReg;
+
// "Upgrade" the physical register since it has been allocated.
UpgradeRegister(physReg);
if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
@@ -1446,7 +1461,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
if (reservedRegs_.test(Reg))
continue;
// Skip recently allocated registers.
- if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) {
+ if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) {
FreeReg = Reg;
if (FreeReg < inactiveCounts.size())
FreeRegInactiveCount = inactiveCounts[FreeReg];
@@ -1477,7 +1492,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
if (reservedRegs_.test(Reg))
continue;
if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
- FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) {
+ FreeRegInactiveCount < inactiveCounts[Reg] &&
+ (!SkipDGRegs || !isRecentlyUsed(Reg))) {
FreeReg = Reg;
FreeRegInactiveCount = inactiveCounts[Reg];
if (FreeRegInactiveCount == MaxInactiveCount)
@@ -1528,12 +1544,10 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
return Preference;
}
- if (!DowngradedRegs.empty()) {
- unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
- true);
- if (FreeReg)
- return FreeReg;
- }
+ unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
+ true);
+ if (FreeReg)
+ return FreeReg;
return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
}
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 3d1eaf0891..6eb9002df8 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -554,6 +554,29 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
}
}
+bool
+ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
+ // CortexA9 has a Write-after-write hazard for NEON registers.
+ if (!STI.isCortexA9())
+ return false;
+
+ switch (RC->getID()) {
+ case ARM::DPRRegClassID:
+ case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::QPRRegClassID:
+ case ARM::QPR_8RegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::SPRRegClassID:
+ case ARM::SPR_8RegClassID:
+ // Avoid reusing S, D, and Q registers.
+ // Don't increase register pressure for QQ and QQQQ.
+ return true;
+ default:
+ return false;
+ }
+}
+
bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 0507396f2c..480892ed3e 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -142,6 +142,8 @@ public:
void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const;
+ virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+
bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index f03282bdab..51efe51bf1 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -24,4 +24,4 @@ declare float @fabsf(float)
; CORTEXA8: test:
; CORTEXA8: vabs.f32 d1, d1
; CORTEXA9: test:
-; CORTEXA9: vabs.f32 s1, s1
+; CORTEXA9: vabs.f32 s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index 749690e98d..e35103c045 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -20,4 +20,4 @@ entry:
; CORTEXA8: test:
; CORTEXA8: vadd.f32 d0, d1, d0
; CORTEXA9: test:
-; CORTEXA9: vadd.f32 s0, s1, s0
+; CORTEXA9: vadd.f32 s{{.}}, s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
index 0c31495792..31c1ca9405 100644
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -20,4 +20,4 @@ entry:
; CORTEXA8: test:
; CORTEXA8: vdiv.f32 s0, s1, s0
; CORTEXA9: test:
-; CORTEXA9: vdiv.f32 s0, s1, s0
+; CORTEXA9: vdiv.f32 s{{.}}, s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index ef4e3e5281..bc118b8cb2 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -20,4 +20,4 @@ entry:
; CORTEXA8: test:
; CORTEXA8: vmul.f32 d0, d1, d0
; CORTEXA9: test:
-; CORTEXA9: vmul.f32 s0, s1, s0
+; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index 1ef9f7f321..86c06f1ddd 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -5,7 +5,7 @@
define i32 @test1(float %a, float %b) {
; VFP2: test1:
-; VFP2: vcvt.s32.f32 s0, s0
+; VFP2: vcvt.s32.f32 s{{.}}, s{{.}}
; NEON: test1:
; NEON: vcvt.s32.f32 d0, d0
entry:
@@ -16,7 +16,7 @@ entry:
define i32 @test2(float %a, float %b) {
; VFP2: test2:
-; VFP2: vcvt.u32.f32 s0, s0
+; VFP2: vcvt.u32.f32 s{{.}}, s{{.}}
; NEON: test2:
; NEON: vcvt.u32.f32 d0, d0
entry:
@@ -27,7 +27,7 @@ entry:
define float @test3(i32 %a, i32 %b) {
; VFP2: test3:
-; VFP2: vcvt.f32.u32 s0, s0
+; VFP2: vcvt.f32.u32 s{{.}}, s{{.}}
; NEON: test3:
; NEON: vcvt.f32.u32 d0, d0
entry:
@@ -38,7 +38,7 @@ entry:
define float @test4(i32 %a, i32 %b) {
; VFP2: test4:
-; VFP2: vcvt.f32.s32 s0, s0
+; VFP2: vcvt.f32.s32 s{{.}}, s{{.}}
; NEON: test4:
; NEON: vcvt.f32.s32 d0, d0
entry: