summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Goodwin <david_goodwin@apple.com>2009-10-22 23:19:17 +0000
committerDavid Goodwin <david_goodwin@apple.com>2009-10-22 23:19:17 +0000
commit4c3715c2e5e17d7216a96ac2baf9720630f04408 (patch)
tree2370e0e0b38f4750b9fa314960a88a88d5e976bf
parent8201ebd40b0270eb6d36ff95dc69fb5ca6bae1db (diff)
downloadllvm-4c3715c2e5e17d7216a96ac2baf9720630f04408.tar.gz
llvm-4c3715c2e5e17d7216a96ac2baf9720630f04408.tar.bz2
llvm-4c3715c2e5e17d7216a96ac2baf9720630f04408.tar.xz
Allow the target to select the level of anti-dependence breaking that should be performed by the post-RA scheduler. The default is none.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@84911 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Target/TargetSubtarget.h10
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp21
-rw-r--r--lib/Target/ARM/ARMSubtarget.h4
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--test/CodeGen/X86/2007-01-08-InstrSched.ll6
-rw-r--r--test/CodeGen/X86/sse2.ll8
-rw-r--r--test/CodeGen/X86/sse3.ll4
-rw-r--r--test/CodeGen/X86/vshift-1.ll2
-rw-r--r--test/CodeGen/X86/vshift-2.ll2
-rw-r--r--test/CodeGen/X86/vshift-3.ll2
-rw-r--r--test/CodeGen/X86/vshift-5.ll4
11 files changed, 44 insertions, 23 deletions
diff --git a/include/llvm/Target/TargetSubtarget.h b/include/llvm/Target/TargetSubtarget.h
index 5edb86f770..fd107e074c 100644
--- a/include/llvm/Target/TargetSubtarget.h
+++ b/include/llvm/Target/TargetSubtarget.h
@@ -33,6 +33,10 @@ class TargetSubtarget {
protected: // Can only create subclasses...
TargetSubtarget();
public:
+ // AntiDepBreakMode - Type of anti-dependence breaking that should
+ // be performed before post-RA scheduling.
+ typedef enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL } AntiDepBreakMode;
+
virtual ~TargetSubtarget();
/// getSpecialAddressLatency - For targets where it is beneficial to
@@ -43,8 +47,10 @@ public:
// enablePostRAScheduler - If the target can benefit from post-regalloc
// scheduling and the specified optimization level meets the requirement
- // return true to enable post-register-allocation scheduling.
- virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel) const {
+ // return true to enable post-register-allocation scheduling.
+ virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ AntiDepBreakMode& mode) const {
+ mode = ANTIDEP_NONE;
return false;
}
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 4da5496c07..8fdbe9b1d7 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -128,6 +128,9 @@ namespace {
/// AA - AliasAnalysis for making memory reference queries.
AliasAnalysis *AA;
+ /// AntiDepMode - Anti-dependence breaking mode
+ TargetSubtarget::AntiDepBreakMode AntiDepMode;
+
/// Classes - For live regs that are only used in one register class in a
/// live range, the register class. If the register is not live, the
/// corresponding value is null. If the register is live but used in
@@ -156,10 +159,11 @@ namespace {
const MachineLoopInfo &MLI,
const MachineDominatorTree &MDT,
ScheduleHazardRecognizer *HR,
- AliasAnalysis *aa)
+ AliasAnalysis *aa,
+ TargetSubtarget::AntiDepBreakMode adm)
: ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
AllocatableSet(TRI->getAllocatableSet(MF)),
- HazardRec(HR), AA(aa) {}
+ HazardRec(HR), AA(aa), AntiDepMode(adm) {}
~SchedulePostRATDList() {
delete HazardRec;
@@ -234,16 +238,23 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
AA = &getAnalysis<AliasAnalysis>();
// Check for explicit enable/disable of post-ra scheduling.
+ TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
if (EnablePostRAScheduler.getPosition() > 0) {
if (!EnablePostRAScheduler)
return false;
} else {
// Check that post-RA scheduling is enabled for this target.
const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
- if (!ST.enablePostRAScheduler(OptLevel))
+ if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode))
return false;
}
+ // Check for antidep breaking override...
+ if (EnableAntiDepBreaking.getPosition() > 0) {
+ AntiDepMode = (EnableAntiDepBreaking) ?
+ TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE;
+ }
+
DEBUG(errs() << "PostRAScheduler\n");
const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
@@ -253,7 +264,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
(ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) :
(ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
- SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, AA);
+ SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, AA, AntiDepMode);
// Loop over all of the basic blocks
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
@@ -393,7 +404,7 @@ void SchedulePostRATDList::Schedule() {
// Build the scheduling graph.
BuildSchedGraph(AA);
- if (EnableAntiDepBreaking) {
+ if (AntiDepMode != TargetSubtarget::ANTIDEP_NONE) {
if (BreakAntiDependencies()) {
// We made changes. Update the dependency graph.
// Theoretically we could update the graph in place:
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index bc5768e63a..74781593a0 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -128,7 +128,9 @@ protected:
/// enablePostRAScheduler - True at 'More' optimization except
/// for Thumb1.
- bool enablePostRAScheduler(CodeGenOpt::Level OptLevel) const {
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtarget::AntiDepBreakMode& mode) const {
+ mode = TargetSubtarget::ANTIDEP_NONE;
return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 27e43333f8..0e4cfde778 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -218,7 +218,9 @@ public:
/// enablePostRAScheduler - X86 target is enabling post-alloc scheduling
/// at 'More' optimization level.
- bool enablePostRAScheduler(CodeGenOpt::Level OptLevel) const {
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtarget::AntiDepBreakMode& mode) const {
+ mode = TargetSubtarget::ANTIDEP_NONE;
return OptLevel >= CodeGenOpt::Default;
}
};
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 81f0a1d724..1eae2d2f20 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -11,11 +11,11 @@ define float @foo(float %x) nounwind {
%tmp14 = fadd float %tmp12, %tmp7
ret float %tmp14
-; CHECK: mulss LCPI1_3(%rip)
-; CHECK-NEXT: mulss LCPI1_0(%rip)
+; CHECK: mulss LCPI1_0(%rip)
; CHECK-NEXT: mulss LCPI1_1(%rip)
-; CHECK-NEXT: mulss LCPI1_2(%rip)
; CHECK-NEXT: addss
+; CHECK: mulss LCPI1_3(%rip)
+; CHECK-NEXT: mulss LCPI1_2(%rip)
; CHECK-NEXT: addss
; CHECK-NEXT: addss
; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 58fe28b09f..a3faada661 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -10,10 +10,10 @@ define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
; CHECK: t1:
; CHECK: movl 8(%esp), %eax
-; CHECK-NEXT: movl 4(%esp), %ecx
; CHECK-NEXT: movapd (%eax), %xmm0
+; CHECK-NEXT: movl 4(%esp), %eax
; CHECK-NEXT: movlpd 12(%esp), %xmm0
-; CHECK-NEXT: movapd %xmm0, (%ecx)
+; CHECK-NEXT: movapd %xmm0, (%eax)
; CHECK-NEXT: ret
}
@@ -26,9 +26,9 @@ define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
; CHECK: t2:
; CHECK: movl 8(%esp), %eax
-; CHECK-NEXT: movl 4(%esp), %ecx
; CHECK-NEXT: movapd (%eax), %xmm0
+; CHECK-NEXT: movl 4(%esp), %eax
; CHECK-NEXT: movhpd 12(%esp), %xmm0
-; CHECK-NEXT: movapd %xmm0, (%ecx)
+; CHECK-NEXT: movapd %xmm0, (%eax)
; CHECK-NEXT: ret
}
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 6319cb887a..f43e5a34ae 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -168,11 +168,11 @@ define internal void @t10() nounwind {
ret void
; X64: t10:
; X64: pextrw $4, %xmm0, %eax
-; X64: pextrw $6, %xmm0, %edx
; X64: movlhps %xmm1, %xmm1
; X64: pshuflw $8, %xmm1, %xmm1
; X64: pinsrw $2, %eax, %xmm1
-; X64: pinsrw $3, %edx, %xmm1
+; X64: pextrw $6, %xmm0, %eax
+; X64: pinsrw $3, %eax, %xmm1
}
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index ae845e0a33..edcff6a7e7 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -63,7 +63,7 @@ entry:
; CHECK: shift3b:
; CHECK: movzwl
; CHECK: movd
-; CHECK-NEXT: psllw
+; CHECK: psllw
%0 = insertelement <8 x i16> undef, i16 %amt, i32 0
%1 = insertelement <8 x i16> %0, i16 %amt, i32 1
%2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 36feb11603..be8972f2f4 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -63,7 +63,7 @@ entry:
; CHECK: shift3b:
; CHECK: movzwl
; CHECK: movd
-; CHECK-NEXT: psrlw
+; CHECK: psrlw
%0 = insertelement <8 x i16> undef, i16 %amt, i32 0
%1 = insertelement <8 x i16> %0, i16 %amt, i32 1
%2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll
index 20d3f48a1a..bdb5af133e 100644
--- a/test/CodeGen/X86/vshift-3.ll
+++ b/test/CodeGen/X86/vshift-3.ll
@@ -52,7 +52,7 @@ entry:
; CHECK: shift3b:
; CHECK: movzwl
; CHECK: movd
-; CHECK-NEXT: psraw
+; CHECK: psraw
%0 = insertelement <8 x i16> undef, i16 %amt, i32 0
%1 = insertelement <8 x i16> %0, i16 %amt, i32 1
%2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
index a543f382b5..675e33ffb3 100644
--- a/test/CodeGen/X86/vshift-5.ll
+++ b/test/CodeGen/X86/vshift-5.ll
@@ -6,7 +6,7 @@ define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
entry:
; CHECK: shift5a:
; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
%amt = load i32* %pamt
%tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -20,7 +20,7 @@ define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
entry:
; CHECK: shift5b:
; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
%amt = load i32* %pamt
%tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer