diff options
author | Richard Sandiford <rsandifo@linux.vnet.ibm.com> | 2013-08-01 10:39:40 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@linux.vnet.ibm.com> | 2013-08-01 10:39:40 +0000 |
commit | 8f0ad5ae8f2699f6ab13a229941a0b192273cae8 (patch) | |
tree | 9d0bb8af50d25824ea6549b972bee2930b765bba /lib | |
parent | 3237f88882eed8a67fa679f7071a5441c4306ac3 (diff) | |
download | llvm-8f0ad5ae8f2699f6ab13a229941a0b192273cae8.tar.gz llvm-8f0ad5ae8f2699f6ab13a229941a0b192273cae8.tar.bz2 llvm-8f0ad5ae8f2699f6ab13a229941a0b192273cae8.tar.xz |
[SystemZ] Reuse CC results for integer comparisons with zero
This also fixes a bug in the predication of LR to LOCR: I'd forgotten
that with these in-place instruction builds, the implicit operands need
to be added manually. I think this was latent until now, but is tested
by int-cmp-45.c. It also adds a CC valid mask to STOC, again tested by
int-cmp-45.c.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187573 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/SystemZ/SystemZISelLowering.cpp | 3 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZInstrFormats.td | 49 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZInstrInfo.cpp | 3 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZInstrInfo.h | 24 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZInstrInfo.td | 71 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZLongBranch.cpp | 185 |
6 files changed, 277 insertions, 58 deletions
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index ffd842d49a..6acdcd4bef 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1813,7 +1813,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, if (Invert) CCMask ^= CCValid; BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) - .addReg(SrcReg).addOperand(Base).addImm(Disp).addImm(CCMask); + .addReg(SrcReg).addOperand(Base).addImm(Disp) + .addImm(CCValid).addImm(CCMask); MI->eraseFromParent(); return MBB; } diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 915891d09d..9883714903 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -61,12 +61,41 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr, // The access size of all memory operands in bytes, or 0 if not known. bits<5> AccessBytes = 0; - let TSFlags{0} = SimpleBDXLoad; - let TSFlags{1} = SimpleBDXStore; - let TSFlags{2} = Has20BitOffset; - let TSFlags{3} = HasIndex; - let TSFlags{4} = Is128Bit; - let TSFlags{9-5} = AccessBytes; + // If the instruction sets CC to a useful value, this gives the mask + // of all possible CC results. The mask has the same form as + // SystemZ::CCMASK_*. + bits<4> CCValues = 0; + + // True if the instruction sets CC to 0 when the result is 0. + bit CCHasZero = 0; + + // True if the instruction sets CC to 1 when the result is less than 0 + // and to 2 when the result is greater than 0. + bit CCHasOrder = 0; + + // True if the instruction is conditional and if the CC mask operand + // comes first (as for BRC, etc.). + bit CCMaskFirst = 0; + + // Similar, but true if the CC mask operand comes last (as for LOC, etc.). + bit CCMaskLast = 0; + + // True if the instruction is the "logical" rather than "arithmetic" form, + // in cases where a distinction exists. + bit IsLogical = 0; + + let TSFlags{0} = SimpleBDXLoad; + let TSFlags{1} = SimpleBDXStore; + let TSFlags{2} = Has20BitOffset; + let TSFlags{3} = HasIndex; + let TSFlags{4} = Is128Bit; + let TSFlags{9-5} = AccessBytes; + let TSFlags{13-10} = CCValues; + let TSFlags{14} = CCHasZero; + let TSFlags{15} = CCHasOrder; + let TSFlags{16} = CCMaskFirst; + let TSFlags{17} = CCMaskLast; + let TSFlags{18} = IsLogical; } //===----------------------------------------------------------------------===// @@ -623,11 +652,12 @@ multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, class CondStoreRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, bits<5> bytes, AddressingMode mode = bdaddr20only> - : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$R3), + : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3), mnemonic#"$R3\t$R1, $BD2", []>, Requires<[FeatureLoadStoreOnCond]> { let mayStore = 1; let AccessBytes = bytes; + let CCMaskLast = 1; } // Like CondStoreRSY, but used for the raw assembly form. The condition-code @@ -686,7 +716,9 @@ class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRRF<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$R3), mnemonic#"r$R3\t$R1, $R2", []>, - Requires<[FeatureLoadStoreOnCond]>; + Requires<[FeatureLoadStoreOnCond]> { + let CCMaskLast = 1; +} // Like CondUnaryRRF, but used for the raw assembly form. The condition-code // mask is the third operand rather than being part of the mnemonic. @@ -748,6 +780,7 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode, let DisableEncoding = "$R1src"; let mayLoad = 1; let AccessBytes = bytes; + let CCMaskLast = 1; } // Like CondUnaryRSY, but used for the raw assembly form. The condition-code diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 2b604a99fd..9913db7b0e 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -341,7 +341,8 @@ PredicateInstruction(MachineInstr *MI, if (unsigned CondOpcode = getConditionalMove(Opcode)) { MI->setDesc(get(CondOpcode)); MachineInstrBuilder(*MI->getParent()->getParent(), MI) - .addImm(CCValid).addImm(CCMask); + .addImm(CCValid).addImm(CCMask) + .addReg(SystemZ::CC, RegState::Implicit);; return true; } } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 917ac6e348..763a3956fc 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -28,17 +28,27 @@ class SystemZTargetMachine; namespace SystemZII { enum { // See comments in SystemZInstrFormats.td. - SimpleBDXLoad = (1 << 0), - SimpleBDXStore = (1 << 1), - Has20BitOffset = (1 << 2), - HasIndex = (1 << 3), - Is128Bit = (1 << 4), - AccessSizeMask = (31 << 5), - AccessSizeShift = 5 + SimpleBDXLoad = (1 << 0), + SimpleBDXStore = (1 << 1), + Has20BitOffset = (1 << 2), + HasIndex = (1 << 3), + Is128Bit = (1 << 4), + AccessSizeMask = (31 << 5), + AccessSizeShift = 5, + CCValuesMask = (15 << 10), + CCValuesShift = 10, + CCHasZero = (1 << 14), + CCHasOrder = (1 << 15), + CCMaskFirst = (1 << 16), + CCMaskLast = (1 << 17), + IsLogical = (1 << 18) }; static inline unsigned getAccessSize(unsigned int Flags) { return (Flags & AccessSizeMask) >> AccessSizeShift; } + static inline unsigned getCCValues(unsigned int Flags) { + return (Flags & CCValuesMask) >> CCValuesShift; + } // SystemZ MachineOperand target flags. enum { diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 341eb90404..748539aa5b 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -59,7 +59,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in { // the first operand. It seems friendlier to use mnemonic forms like // JE and JLH when writing out the assembly though. let isBranch = 1, isTerminator = 1, Uses = [CC] in { - let isCodeGenOnly = 1 in { + let isCodeGenOnly = 1, CCMaskFirst = 1 in { def BRC : InstRI<0xA74, (outs), (ins cond4:$valid, cond4:$R1, brtarget16:$I2), "j$R1\t$I2", [(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>; @@ -195,7 +195,7 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store, // The definitions here are for the call-clobbered registers. let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D, - F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D], + F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC], R1 = 14, isCodeGenOnly = 1 in { def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$I2, variable_ops), "bras\t%r14, $I2", []>; @@ -512,9 +512,12 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1, //===----------------------------------------------------------------------===// let Defs = [CC] in { - def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>; - def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>; - def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>; + let CCValues = 0xF, CCHasZero = 1 in { + def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>; + def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>; + } + let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in + def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>; } defm : SXU<ineg, LCGFR>; @@ -566,7 +569,7 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm), //===----------------------------------------------------------------------===// // Plain addition. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in { // Addition of a register. let isCommutable = 1 in { defm AR : BinaryRRAndK<"a", 0x1A, 0xB9F8, add, GR32, GR32>; @@ -637,7 +640,7 @@ let Defs = [CC], Uses = [CC] in { // Plain substraction. Although immediate forms exist, we use the // add-immediate instruction instead. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in { // Subtraction of a register. defm SR : BinaryRRAndK<"s", 0x1B, 0xB9F9, sub, GR32, GR32>; def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>; @@ -687,13 +690,14 @@ let Defs = [CC], Uses = [CC] in { let Defs = [CC] in { // ANDs of a register. - let isCommutable = 1 in { + let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in { defm NR : BinaryRRAndK<"n", 0x14, 0xB9F4, and, GR32, GR32>; defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>; } let isConvertibleToThreeAddress = 1 in { // ANDs of a 16-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 16-bit field, not the full register. let isCodeGenOnly = 1 in { def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>; def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>; @@ -704,15 +708,19 @@ let Defs = [CC] in { def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>; // ANDs of a 32-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>; def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>; def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>; } // ANDs of memory. - defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>; - def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; + let CCValues = 0xC, CCHasZero = 1 in { + defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>; + def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; + } // AND to memory defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>; @@ -726,12 +734,13 @@ defm : RMWIByte<and, bdaddr20pair, NIY>; let Defs = [CC] in { // ORs of a register. - let isCommutable = 1 in { + let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in { defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>; defm OGR : BinaryRREAndK<"og", 0xB981, 0xB9E6, or, GR64, GR64>; } // ORs of a 16-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 16-bit field, not the full register. let isCodeGenOnly = 1 in { def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>; def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>; @@ -742,14 +751,18 @@ let Defs = [CC] in { def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>; // ORs of a 32-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>; def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>; def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>; // ORs of memory. - defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>; - def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>; + let CCValues = 0xC, CCHasZero = 1 in { + defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>; + def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>; + } // OR to memory defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>; @@ -763,20 +776,24 @@ defm : RMWIByte<or, bdaddr20pair, OIY>; let Defs = [CC] in { // XORs of a register. - let isCommutable = 1 in { + let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in { defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>; defm XGR : BinaryRREAndK<"xg", 0xB982, 0xB9E7, xor, GR64, GR64>; } // XORs of a 32-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>; def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>; def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>; // XORs of memory. - defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>; - def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>; + let CCValues = 0xC, CCHasZero = 1 in { + defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>; + def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>; + } // XOR to memory defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>; @@ -849,7 +866,7 @@ let neverHasSideEffects = 1 in { } // Arithmetic shift right. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in { defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>; def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>; } @@ -862,11 +879,12 @@ let neverHasSideEffects = 1 in { // Rotate second operand left and inserted selected bits into first operand. // These can act like 32-bit operands provided that the constant start and -// end bits (operands 2 and 3) are in the range [32, 64) +// end bits (operands 2 and 3) are in the range [32, 64). let Defs = [CC] in { let isCodeGenOnly = 1 in - def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>; - def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; + def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>; + let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in + def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; } // Forms of RISBG that only affect one word of the destination register. @@ -880,7 +898,8 @@ def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR64, GR64>, Requires<[FeatureHighWord]>; // Rotate second operand left and perform a logical operation with selected -// bits of the first operand. +// bits of the first operand. The CC result only describes the selected bits, +// so isn't useful for a full comparison against zero. let Defs = [CC] in { def RNSBG : RotateSelectRIEf<"rnsbg", 0xEC54, GR64, GR64>; def ROSBG : RotateSelectRIEf<"rosbg", 0xEC56, GR64, GR64>; @@ -892,7 +911,7 @@ let Defs = [CC] in { //===----------------------------------------------------------------------===// // Signed comparisons. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xE in { // Comparison with a register. def CR : CompareRR <"c", 0x19, z_cmp, GR32, GR32>; def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>; @@ -926,7 +945,7 @@ let Defs = [CC] in { defm : SXB<z_cmp, GR64, CGFR>; // Unsigned comparisons. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { // Comparison with a register. def CLR : CompareRR <"cl", 0x15, z_ucmp, GR32, GR32>; def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>; diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp index 9b637c01c6..f0ea3e20be 100644 --- a/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -7,18 +7,36 @@ // //===----------------------------------------------------------------------===// // -// This pass does two things: -// (1) fuse compares and branches into COMPARE AND BRANCH instructions -// (2) make sure that all branches are in range. +// This pass does three things: +// (1) try to remove compares if CC already contains the required information +// (2) fuse compares and branches into COMPARE AND BRANCH instructions +// (3) make sure that all branches are in range. // -// We do (1) here rather than earlier because the fused form prevents -// predication. +// We do (1) here rather than earlier because some transformations can +// change the set of available CC values and we generally want those +// transformations to have priority over (1). This is especially true in +// the commonest case where the CC value is used by a single in-range branch +// instruction, since (2) will then be able to fuse the compare and the +// branch instead. // -// Doing it so late makes it more likely that a register will be reused +// For example, two-address NILF can sometimes be converted into +// three-address RISBLG. NILF produces a CC value that indicates whether +// the low word is zero, but RISBLG does not modify CC at all. On the +// other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG. +// The CC value produced by NILL isn't useful for our purposes, but the +// value produced by RISBG can be used for any comparison with zero +// (not just equality). So there are some transformations that lose +// CC values (while still being worthwhile) and others that happen to make +// the CC result more useful than it was originally. +// +// We do (2) here rather than earlier because the fused form prevents +// predication. It also has to happen after (1). +// +// Doing (2) so late makes it more likely that a register will be reused // between the compare and the branch, but it isn't clear whether preventing // that would be a win or not. // -// There are several ways in which (2) could be done. One aggressive +// There are several ways in which (3) could be done. One aggressive // approach is to assume that all branches are in range and successively // replace those that turn out not to be in range with a longer form // (branch relaxation). A simple implementation is to continually walk @@ -156,6 +174,7 @@ namespace { void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator, bool AssumeRelaxed); TerminatorInfo describeTerminator(MachineInstr *MI); + bool optimizeCompareZero(MachineInstr *PrevCCSetter, MachineInstr *Compare); bool fuseCompareAndBranch(MachineInstr *Compare); uint64_t initMBBInfo(); bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address); @@ -254,6 +273,15 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) { return Terminator; } +// Return true if CC is live out of MBB. +static bool isCCLiveOut(MachineBasicBlock *MBB) { + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(SystemZ::CC)) + return true; + return false; +} + // Return true if CC is live after MBBI. static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI, const TargetRegisterInfo *TRI) { @@ -269,12 +297,130 @@ static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI, return false; } - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) - if ((*SI)->isLiveIn(SystemZ::CC)) + return isCCLiveOut(MBB); +} + +// Return true if all uses of the CC value produced by MBBI could make do +// with the CC values in ReusableCCMask. When returning true, point AlterMasks +// to the "CC valid" and "CC mask" operands for each condition. +static bool canRestrictCCMask(MachineBasicBlock::iterator MBBI, + unsigned ReusableCCMask, + SmallVectorImpl<MachineOperand *> &AlterMasks, + const TargetRegisterInfo *TRI) { + MachineBasicBlock *MBB = MBBI->getParent(); + MachineBasicBlock::iterator MBBE = MBB->end(); + for (++MBBI; MBBI != MBBE; ++MBBI) { + if (MBBI->readsRegister(SystemZ::CC, TRI)) { + // Fail if this isn't a use of CC that we understand. + unsigned MBBIFlags = MBBI->getDesc().TSFlags; + unsigned FirstOpNum; + if (MBBIFlags & SystemZII::CCMaskFirst) + FirstOpNum = 0; + else if (MBBIFlags & SystemZII::CCMaskLast) + FirstOpNum = MBBI->getNumExplicitOperands() - 2; + else + return false; + + // Check whether the instruction predicate treats all CC values + // outside of ReusableCCMask in the same way. In that case it + // doesn't matter what those CC values mean. + unsigned CCValid = MBBI->getOperand(FirstOpNum).getImm(); + unsigned CCMask = MBBI->getOperand(FirstOpNum + 1).getImm(); + unsigned OutValid = ~ReusableCCMask & CCValid; + unsigned OutMask = ~ReusableCCMask & CCMask; + if (OutMask != 0 && OutMask != OutValid) + return false; + + AlterMasks.push_back(&MBBI->getOperand(FirstOpNum)); + AlterMasks.push_back(&MBBI->getOperand(FirstOpNum + 1)); + + // Succeed if this was the final use of the CC value. + if (MBBI->killsRegister(SystemZ::CC, TRI)) + return true; + } + // Succeed if the instruction redefines CC. + if (MBBI->definesRegister(SystemZ::CC, TRI)) return true; + } + // Fail if there are other uses of CC that we didn't see. + return !isCCLiveOut(MBB); +} - return false; +// Try to make Compare redundant with PrevCCSetter, the previous setter of CC, +// by looking for cases where Compare compares the result of PrevCCSetter +// against zero. Return true on success and if Compare can therefore +// be deleted. +bool SystemZLongBranch::optimizeCompareZero(MachineInstr *PrevCCSetter, + MachineInstr *Compare) { + if (MF->getTarget().getOptLevel() == CodeGenOpt::None) + return false; + + // Check whether this is a comparison against zero. + if (Compare->getNumExplicitOperands() != 2 || + !Compare->getOperand(1).isImm() || + Compare->getOperand(1).getImm() != 0) + return false; + + // See which compare-style condition codes are available after PrevCCSetter. + unsigned PrevFlags = PrevCCSetter->getDesc().TSFlags; + unsigned ReusableCCMask = 0; + if (PrevFlags & SystemZII::CCHasZero) + ReusableCCMask |= SystemZ::CCMASK_CMP_EQ; + + // For unsigned comparisons with zero, only equality makes sense. + unsigned CompareFlags = Compare->getDesc().TSFlags; + if (!(CompareFlags & SystemZII::IsLogical) && + (PrevFlags & SystemZII::CCHasOrder)) + ReusableCCMask |= SystemZ::CCMASK_CMP_LT | SystemZ::CCMASK_CMP_GT; + + if (ReusableCCMask == 0) + return false; + + // Make sure that PrevCCSetter sets the value being compared. + unsigned SrcReg = Compare->getOperand(0).getReg(); + unsigned SrcSubReg = Compare->getOperand(0).getSubReg(); + if (!PrevCCSetter->getOperand(0).isReg() || + !PrevCCSetter->getOperand(0).isDef() || + PrevCCSetter->getOperand(0).getReg() != SrcReg || + PrevCCSetter->getOperand(0).getSubReg() != SrcSubReg) + return false; + + // Make sure that SrcReg survives until Compare. + MachineBasicBlock::iterator MBBI = PrevCCSetter, MBBE = Compare; + const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); + for (++MBBI; MBBI != MBBE; ++MBBI) + if (MBBI->modifiesRegister(SrcReg, TRI)) + return false; + + // See whether all uses of Compare's CC value could make do with + // the values produced by PrevCCSetter. + SmallVector<MachineOperand *, 4> AlterMasks; + if (!canRestrictCCMask(Compare, ReusableCCMask, AlterMasks, TRI)) + return false; + + // Alter the CC masks that canRestrictCCMask says need to be altered. + unsigned CCValues = SystemZII::getCCValues(PrevFlags); + assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); + for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { + AlterMasks[I]->setImm(CCValues); + unsigned CCMask = AlterMasks[I + 1]->getImm(); + if (CCMask & ~ReusableCCMask) + AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) | + (CCValues & ~ReusableCCMask)); + } + + // CC is now live after PrevCCSetter. + int CCDef = PrevCCSetter->findRegisterDefOperandIdx(SystemZ::CC, false, + true, TRI); + assert(CCDef >= 0 && "Couldn't find CC set"); + PrevCCSetter->getOperand(CCDef).setIsDead(false); + + // Clear any intervening kills of CC. + MBBI = PrevCCSetter; + for (++MBBI; MBBI != MBBE; ++MBBI) + MBBI->clearRegisterKills(SystemZ::CC, TRI); + + return true; } // Try to fuse compare instruction Compare into a later branch. Return @@ -345,6 +491,8 @@ bool SystemZLongBranch::fuseCompareAndBranch(MachineInstr *Compare) { // that no branches need relaxation. Return the size of the function under // this assumption. uint64_t SystemZLongBranch::initMBBInfo() { + const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); + MF->RenumberBlocks(); unsigned NumBlocks = MF->size(); @@ -365,13 +513,20 @@ uint64_t SystemZLongBranch::initMBBInfo() { // Calculate the size of the fixed part of the block. MachineBasicBlock::iterator MI = MBB->begin(); MachineBasicBlock::iterator End = MBB->end(); + MachineInstr *PrevCCSetter = 0; while (MI != End && !MI->isTerminator()) { MachineInstr *Current = MI; ++MI; - if (Current->isCompare() && fuseCompareAndBranch(Current)) - Current->removeFromParent(); - else - Block.Size += TII->getInstSizeInBytes(Current); + if (Current->isCompare()) { + if ((PrevCCSetter && optimizeCompareZero(PrevCCSetter, Current)) || + fuseCompareAndBranch(Current)) { + Current->removeFromParent(); + continue; + } + } + if (Current->modifiesRegister(SystemZ::CC, TRI)) + PrevCCSetter = Current; + Block.Size += TII->getInstSizeInBytes(Current); } skipNonTerminators(Position, Block); |