summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2014-03-27 23:12:31 +0000
committerHal Finkel <hfinkel@anl.gov>2014-03-27 23:12:31 +0000
commite2ee98ab169fe8d1d4bd39fe0ecb89274eceb438 (patch)
treed4f276d3816f78e5f52764f62813ead41fb29df0
parentd9524d66cd6a69ded63b29bed5413217444b162a (diff)
downloadllvm-e2ee98ab169fe8d1d4bd39fe0ecb89274eceb438.tar.gz
llvm-e2ee98ab169fe8d1d4bd39fe0ecb89274eceb438.tar.bz2
llvm-e2ee98ab169fe8d1d4bd39fe0ecb89274eceb438.tar.xz
[PowerPC] Use a small cleanup pass to remove VSX self copies
As explained in r204976, because of how the allocation of VSX registers interacts with the call-lowering code, we sometimes end up generating self VSX copies. Specifically, things like this: %VSL2<def> = COPY %F2, %VSL2<imp-use,kill> (where %F2 is really a sub-register of %VSL2, and so this copy is a nop) This adds a small cleanup pass to remove these prior to post-RA scheduling. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204980 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/PowerPC/PPC.h1
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp74
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp3
-rw-r--r--test/CodeGen/PowerPC/vsx-self-copy.ll27
4 files changed, 105 insertions, 0 deletions
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index df8dd8f28b..c42c5be14b 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -37,6 +37,7 @@ namespace llvm {
#endif
FunctionPass *createPPCEarlyReturnPass();
FunctionPass *createPPCVSXCopyPass();
+ FunctionPass *createPPCVSXCopyCleanupPass();
FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index be4dafa0e3..0f79901791 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1970,6 +1970,80 @@ FunctionPass*
llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-vsx-copy-cleanup"
+
+namespace llvm {
+ void initializePPCVSXCopyCleanupPass(PassRegistry&);
+}
+
+namespace {
+ // PPCVSXCopyCleanup pass - We sometimes end up generating self copies of VSX
+ // registers (mostly because the ABI code still places all values into the
+ // "traditional" floating-point and vector registers). Remove them here.
+ struct PPCVSXCopyCleanup : public MachineFunctionPass {
+ static char ID;
+ PPCVSXCopyCleanup() : MachineFunctionPass(ID) {
+ initializePPCVSXCopyCleanupPass(*PassRegistry::getPassRegistry());
+ }
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ SmallVector<MachineInstr *, 4> ToDelete;
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+ if (MI->getOpcode() == PPC::XXLOR &&
+ MI->getOperand(0).getReg() == MI->getOperand(1).getReg() &&
+ MI->getOperand(0).getReg() == MI->getOperand(2).getReg())
+ ToDelete.push_back(MI);
+ }
+
+ if (!ToDelete.empty())
+ Changed = true;
+
+ for (unsigned i = 0, ie = ToDelete.size(); i != ie; ++i) {
+ DEBUG(dbgs() << "Removing VSX self-copy: " << *ToDelete[i]);
+ ToDelete[i]->eraseFromParent();
+ }
+
+ return Changed;
+ }
+
+public:
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ TII = TM->getInstrInfo();
+
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCVSXCopyCleanup, DEBUG_TYPE,
+ "PowerPC VSX Copy Cleanup", false, false)
+
+char PPCVSXCopyCleanup::ID = 0;
+FunctionPass*
+llvm::createPPCVSXCopyCleanupPass() { return new PPCVSXCopyCleanup(); }
+
+#undef DEBUG_TYPE
#define DEBUG_TYPE "ppc-early-ret"
STATISTIC(NumBCLR, "Number of early conditional returns");
STATISTIC(NumBLR, "Number of early returns");
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index cb869bd91e..e7438f394c 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -182,6 +182,9 @@ bool PPCPassConfig::addPreRegAlloc() {
}
bool PPCPassConfig::addPreSched2() {
+ if (getPPCSubtarget().hasVSX())
+ addPass(createPPCVSXCopyCleanupPass());
+
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
diff --git a/test/CodeGen/PowerPC/vsx-self-copy.ll b/test/CodeGen/PowerPC/vsx-self-copy.ll
new file mode 100644
index 0000000000..23615ca10c
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-self-copy.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @takFP(double %x, double %y, double %z) #0 {
+entry:
+ br i1 undef, label %if.then, label %return
+
+if.then: ; preds = %if.then, %entry
+ %x.tr16 = phi double [ %call, %if.then ], [ %x, %entry ]
+ %call = tail call double @takFP(double undef, double undef, double undef)
+ %call4 = tail call double @takFP(double undef, double %x.tr16, double undef)
+ %cmp = fcmp olt double undef, %call
+ br i1 %cmp, label %if.then, label %return
+
+return: ; preds = %if.then, %entry
+ %z.tr.lcssa = phi double [ %z, %entry ], [ %call4, %if.then ]
+ ret double %z.tr.lcssa
+
+; CHECK: @takFP
+; CHECK-NOT: xxlor 0, 0, 0
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+