summaryrefslogtreecommitdiff
path: root/lib/Target/ARM/ARMExpandPseudoInsts.cpp
diff options
context:
space:
mode:
authorJames Molloy <james.molloy@arm.com>2012-09-06 09:16:01 +0000
committerJames Molloy <james.molloy@arm.com>2012-09-06 09:16:01 +0000
commit6c822eea47dbef96940819b1ea085fabc49a1e71 (patch)
treee9d2318a4f16d3ae149ba9e4f2794ba5f28c5f07 /lib/Target/ARM/ARMExpandPseudoInsts.cpp
parent7859f438e198fe441abef3d2c95c1cb9517f575b (diff)
downloadllvm-6c822eea47dbef96940819b1ea085fabc49a1e71.tar.gz
llvm-6c822eea47dbef96940819b1ea085fabc49a1e71.tar.bz2
llvm-6c822eea47dbef96940819b1ea085fabc49a1e71.tar.xz
Optimize codegen for VSETLNi{8,16,32} operating on Q registers. Degenerate to a VSETLN on D registers, instead of an (INSERT_SUBREG (VSETLN (EXTRACT_SUBREG ))) sequence to help the register coalescer.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163298 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM/ARMExpandPseudoInsts.cpp')
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp51
1 files changed, 51 insertions, 0 deletions
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 15bb32eb14..8ed6b751f3 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1208,6 +1208,57 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandLaneOp(MBBI);
return true;
+ case ARM::VSETLNi8Q:
+ case ARM::VSETLNi16Q: {
+ // Expand VSETLNs acting on a Q register to equivalent VSETLNs acting
+ // on the respective D register.
+
+ unsigned QReg = MI.getOperand(1).getReg();
+ unsigned QLane = MI.getOperand(3).getImm();
+
+ unsigned NewOpcode, DLane, DSubReg;
+ switch (Opcode) {
+ default: llvm_unreachable("Invalid opcode!");
+ case ARM::VSETLNi8Q:
+ // 4 possible 8-bit lanes per DPR:
+ NewOpcode = ARM::VSETLNi8;
+ DLane = QLane % 8;
+ DSubReg = (QLane / 8) ? ARM::dsub_1 : ARM::dsub_0;
+ break;
+ case ARM::VSETLNi16Q:
+ // 4 possible 16-bit lanes per DPR.
+ NewOpcode = ARM::VSETLNi16;
+ DLane = QLane % 4;
+ DSubReg = (QLane / 4) ? ARM::dsub_1 : ARM::dsub_0;
+ break;
+ }
+
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpcode));
+
+ unsigned DReg = TRI->getSubReg(QReg, DSubReg);
+
+ MIB.addReg(DReg, RegState::Define); // Output DPR
+ MIB.addReg(DReg); // Input DPR
+ MIB.addOperand(MI.getOperand(2)); // Input GPR
+ MIB.addImm(DLane); // Lane
+
+ // Add the predicate operands.
+ MIB.addOperand(MI.getOperand(4));
+ MIB.addOperand(MI.getOperand(5));
+
+ if (MI.getOperand(1).isKill()) // Add an implicit kill for the Q register.
+ MIB->addRegisterKilled(QReg, TRI, true);
+ // And an implicit def of the output register (which should always be the
+ // same as the input register).
+ MIB->addRegisterDefined(QReg, TRI);
+
+ TransferImpOps(MI, MIB, MIB);
+
+ MI.eraseFromParent();
+ return true;
+ }
+
case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;