summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorRichard Sandiford <rsandifo@linux.vnet.ibm.com>2013-08-01 10:39:40 +0000
committerRichard Sandiford <rsandifo@linux.vnet.ibm.com>2013-08-01 10:39:40 +0000
commit8f0ad5ae8f2699f6ab13a229941a0b192273cae8 (patch)
tree9d0bb8af50d25824ea6549b972bee2930b765bba /lib
parent3237f88882eed8a67fa679f7071a5441c4306ac3 (diff)
downloadllvm-8f0ad5ae8f2699f6ab13a229941a0b192273cae8.tar.gz
llvm-8f0ad5ae8f2699f6ab13a229941a0b192273cae8.tar.bz2
llvm-8f0ad5ae8f2699f6ab13a229941a0b192273cae8.tar.xz
[SystemZ] Reuse CC results for integer comparisons with zero
This also fixes a bug in the predication of LR to LOCR: I'd forgotten that with these in-place instruction builds, the implicit operands need to be added manually. I think this was latent until now, but is tested by int-cmp-45.c. It also adds a CC valid mask to STOC, again tested by int-cmp-45.c. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187573 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp3
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td49
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp3
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h24
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td71
-rw-r--r--lib/Target/SystemZ/SystemZLongBranch.cpp185
6 files changed, 277 insertions, 58 deletions
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index ffd842d49a..6acdcd4bef 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1813,7 +1813,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
if (Invert)
CCMask ^= CCValid;
BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
- .addReg(SrcReg).addOperand(Base).addImm(Disp).addImm(CCMask);
+ .addReg(SrcReg).addOperand(Base).addImm(Disp)
+ .addImm(CCValid).addImm(CCMask);
MI->eraseFromParent();
return MBB;
}
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 915891d09d..9883714903 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -61,12 +61,41 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
// The access size of all memory operands in bytes, or 0 if not known.
bits<5> AccessBytes = 0;
- let TSFlags{0} = SimpleBDXLoad;
- let TSFlags{1} = SimpleBDXStore;
- let TSFlags{2} = Has20BitOffset;
- let TSFlags{3} = HasIndex;
- let TSFlags{4} = Is128Bit;
- let TSFlags{9-5} = AccessBytes;
+ // If the instruction sets CC to a useful value, this gives the mask
+ // of all possible CC results. The mask has the same form as
+ // SystemZ::CCMASK_*.
+ bits<4> CCValues = 0;
+
+ // True if the instruction sets CC to 0 when the result is 0.
+ bit CCHasZero = 0;
+
+ // True if the instruction sets CC to 1 when the result is less than 0
+ // and to 2 when the result is greater than 0.
+ bit CCHasOrder = 0;
+
+ // True if the instruction is conditional and if the CC mask operand
+ // comes first (as for BRC, etc.).
+ bit CCMaskFirst = 0;
+
+ // Similar, but true if the CC mask operand comes last (as for LOC, etc.).
+ bit CCMaskLast = 0;
+
+ // True if the instruction is the "logical" rather than "arithmetic" form,
+ // in cases where a distinction exists.
+ bit IsLogical = 0;
+
+ let TSFlags{0} = SimpleBDXLoad;
+ let TSFlags{1} = SimpleBDXStore;
+ let TSFlags{2} = Has20BitOffset;
+ let TSFlags{3} = HasIndex;
+ let TSFlags{4} = Is128Bit;
+ let TSFlags{9-5} = AccessBytes;
+ let TSFlags{13-10} = CCValues;
+ let TSFlags{14} = CCHasZero;
+ let TSFlags{15} = CCHasOrder;
+ let TSFlags{16} = CCMaskFirst;
+ let TSFlags{17} = CCMaskLast;
+ let TSFlags{18} = IsLogical;
}
//===----------------------------------------------------------------------===//
@@ -623,11 +652,12 @@ multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
class CondStoreRSY<string mnemonic, bits<16> opcode,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
- : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$R3),
+ : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3),
mnemonic#"$R3\t$R1, $BD2", []>,
Requires<[FeatureLoadStoreOnCond]> {
let mayStore = 1;
let AccessBytes = bytes;
+ let CCMaskLast = 1;
}
// Like CondStoreRSY, but used for the raw assembly form. The condition-code
@@ -686,7 +716,9 @@ class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
: InstRRF<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$R3),
mnemonic#"r$R3\t$R1, $R2", []>,
- Requires<[FeatureLoadStoreOnCond]>;
+ Requires<[FeatureLoadStoreOnCond]> {
+ let CCMaskLast = 1;
+}
// Like CondUnaryRRF, but used for the raw assembly form. The condition-code
// mask is the third operand rather than being part of the mnemonic.
@@ -748,6 +780,7 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,
let DisableEncoding = "$R1src";
let mayLoad = 1;
let AccessBytes = bytes;
+ let CCMaskLast = 1;
}
// Like CondUnaryRSY, but used for the raw assembly form. The condition-code
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 2b604a99fd..9913db7b0e 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -341,7 +341,8 @@ PredicateInstruction(MachineInstr *MI,
if (unsigned CondOpcode = getConditionalMove(Opcode)) {
MI->setDesc(get(CondOpcode));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
- .addImm(CCValid).addImm(CCMask);
+ .addImm(CCValid).addImm(CCMask)
+ .addReg(SystemZ::CC, RegState::Implicit);;
return true;
}
}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 917ac6e348..763a3956fc 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -28,17 +28,27 @@ class SystemZTargetMachine;
namespace SystemZII {
enum {
// See comments in SystemZInstrFormats.td.
- SimpleBDXLoad = (1 << 0),
- SimpleBDXStore = (1 << 1),
- Has20BitOffset = (1 << 2),
- HasIndex = (1 << 3),
- Is128Bit = (1 << 4),
- AccessSizeMask = (31 << 5),
- AccessSizeShift = 5
+ SimpleBDXLoad = (1 << 0),
+ SimpleBDXStore = (1 << 1),
+ Has20BitOffset = (1 << 2),
+ HasIndex = (1 << 3),
+ Is128Bit = (1 << 4),
+ AccessSizeMask = (31 << 5),
+ AccessSizeShift = 5,
+ CCValuesMask = (15 << 10),
+ CCValuesShift = 10,
+ CCHasZero = (1 << 14),
+ CCHasOrder = (1 << 15),
+ CCMaskFirst = (1 << 16),
+ CCMaskLast = (1 << 17),
+ IsLogical = (1 << 18)
};
static inline unsigned getAccessSize(unsigned int Flags) {
return (Flags & AccessSizeMask) >> AccessSizeShift;
}
+ static inline unsigned getCCValues(unsigned int Flags) {
+ return (Flags & CCValuesMask) >> CCValuesShift;
+ }
// SystemZ MachineOperand target flags.
enum {
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 341eb90404..748539aa5b 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -59,7 +59,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
// the first operand. It seems friendlier to use mnemonic forms like
// JE and JLH when writing out the assembly though.
let isBranch = 1, isTerminator = 1, Uses = [CC] in {
- let isCodeGenOnly = 1 in {
+ let isCodeGenOnly = 1, CCMaskFirst = 1 in {
def BRC : InstRI<0xA74, (outs), (ins cond4:$valid, cond4:$R1,
brtarget16:$I2), "j$R1\t$I2",
[(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>;
@@ -195,7 +195,7 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store,
// The definitions here are for the call-clobbered registers.
let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
- F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D],
+ F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC],
R1 = 14, isCodeGenOnly = 1 in {
def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$I2, variable_ops),
"bras\t%r14, $I2", []>;
@@ -512,9 +512,12 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
//===----------------------------------------------------------------------===//
let Defs = [CC] in {
- def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>;
- def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>;
- def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>;
+ let CCValues = 0xF, CCHasZero = 1 in {
+ def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>;
+ def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>;
+ }
+ let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in
+ def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>;
}
defm : SXU<ineg, LCGFR>;
@@ -566,7 +569,7 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
//===----------------------------------------------------------------------===//
// Plain addition.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in {
// Addition of a register.
let isCommutable = 1 in {
defm AR : BinaryRRAndK<"a", 0x1A, 0xB9F8, add, GR32, GR32>;
@@ -637,7 +640,7 @@ let Defs = [CC], Uses = [CC] in {
// Plain substraction. Although immediate forms exist, we use the
// add-immediate instruction instead.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in {
// Subtraction of a register.
defm SR : BinaryRRAndK<"s", 0x1B, 0xB9F9, sub, GR32, GR32>;
def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>;
@@ -687,13 +690,14 @@ let Defs = [CC], Uses = [CC] in {
let Defs = [CC] in {
// ANDs of a register.
- let isCommutable = 1 in {
+ let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
defm NR : BinaryRRAndK<"n", 0x14, 0xB9F4, and, GR32, GR32>;
defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>;
}
let isConvertibleToThreeAddress = 1 in {
// ANDs of a 16-bit immediate, leaving other bits unaffected.
+ // The CC result only reflects the 16-bit field, not the full register.
let isCodeGenOnly = 1 in {
def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
@@ -704,15 +708,19 @@ let Defs = [CC] in {
def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>;
// ANDs of a 32-bit immediate, leaving other bits unaffected.
- let isCodeGenOnly = 1 in
+ // The CC result only reflects the 32-bit field, which means we can
+ // use it as a zero indicator for i32 operations but not otherwise.
+ let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>;
def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>;
}
// ANDs of memory.
- defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
- def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
+ let CCValues = 0xC, CCHasZero = 1 in {
+ defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
+ def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
+ }
// AND to memory
defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>;
@@ -726,12 +734,13 @@ defm : RMWIByte<and, bdaddr20pair, NIY>;
let Defs = [CC] in {
// ORs of a register.
- let isCommutable = 1 in {
+ let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>;
defm OGR : BinaryRREAndK<"og", 0xB981, 0xB9E6, or, GR64, GR64>;
}
// ORs of a 16-bit immediate, leaving other bits unaffected.
+ // The CC result only reflects the 16-bit field, not the full register.
let isCodeGenOnly = 1 in {
def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
@@ -742,14 +751,18 @@ let Defs = [CC] in {
def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>;
// ORs of a 32-bit immediate, leaving other bits unaffected.
- let isCodeGenOnly = 1 in
+ // The CC result only reflects the 32-bit field, which means we can
+ // use it as a zero indicator for i32 operations but not otherwise.
+ let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>;
def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>;
// ORs of memory.
- defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
- def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
+ let CCValues = 0xC, CCHasZero = 1 in {
+ defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
+ def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
+ }
// OR to memory
defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>;
@@ -763,20 +776,24 @@ defm : RMWIByte<or, bdaddr20pair, OIY>;
let Defs = [CC] in {
// XORs of a register.
- let isCommutable = 1 in {
+ let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>;
defm XGR : BinaryRREAndK<"xg", 0xB982, 0xB9E7, xor, GR64, GR64>;
}
// XORs of a 32-bit immediate, leaving other bits unaffected.
- let isCodeGenOnly = 1 in
+ // The CC result only reflects the 32-bit field, which means we can
+ // use it as a zero indicator for i32 operations but not otherwise.
+ let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>;
def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>;
// XORs of memory.
- defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
- def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
+ let CCValues = 0xC, CCHasZero = 1 in {
+ defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
+ def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
+ }
// XOR to memory
defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>;
@@ -849,7 +866,7 @@ let neverHasSideEffects = 1 in {
}
// Arithmetic shift right.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in {
defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>;
}
@@ -862,11 +879,12 @@ let neverHasSideEffects = 1 in {
// Rotate second operand left and inserted selected bits into first operand.
// These can act like 32-bit operands provided that the constant start and
-// end bits (operands 2 and 3) are in the range [32, 64)
+// end bits (operands 2 and 3) are in the range [32, 64).
let Defs = [CC] in {
let isCodeGenOnly = 1 in
- def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>;
- def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
+ def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>;
+ let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in
+ def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
}
// Forms of RISBG that only affect one word of the destination register.
@@ -880,7 +898,8 @@ def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR64, GR64>,
Requires<[FeatureHighWord]>;
// Rotate second operand left and perform a logical operation with selected
-// bits of the first operand.
+// bits of the first operand. The CC result only describes the selected bits,
+// so isn't useful for a full comparison against zero.
let Defs = [CC] in {
def RNSBG : RotateSelectRIEf<"rnsbg", 0xEC54, GR64, GR64>;
def ROSBG : RotateSelectRIEf<"rosbg", 0xEC56, GR64, GR64>;
@@ -892,7 +911,7 @@ let Defs = [CC] in {
//===----------------------------------------------------------------------===//
// Signed comparisons.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xE in {
// Comparison with a register.
def CR : CompareRR <"c", 0x19, z_cmp, GR32, GR32>;
def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>;
@@ -926,7 +945,7 @@ let Defs = [CC] in {
defm : SXB<z_cmp, GR64, CGFR>;
// Unsigned comparisons.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
// Comparison with a register.
def CLR : CompareRR <"cl", 0x15, z_ucmp, GR32, GR32>;
def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>;
diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp
index 9b637c01c6..f0ea3e20be 100644
--- a/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -7,18 +7,36 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass does two things:
-// (1) fuse compares and branches into COMPARE AND BRANCH instructions
-// (2) make sure that all branches are in range.
+// This pass does three things:
+// (1) try to remove compares if CC already contains the required information
+// (2) fuse compares and branches into COMPARE AND BRANCH instructions
+// (3) make sure that all branches are in range.
//
-// We do (1) here rather than earlier because the fused form prevents
-// predication.
+// We do (1) here rather than earlier because some transformations can
+// change the set of available CC values and we generally want those
+// transformations to have priority over (1). This is especially true in
+// the commonest case where the CC value is used by a single in-range branch
+// instruction, since (2) will then be able to fuse the compare and the
+// branch instead.
//
-// Doing it so late makes it more likely that a register will be reused
+// For example, two-address NILF can sometimes be converted into
+// three-address RISBLG. NILF produces a CC value that indicates whether
+// the low word is zero, but RISBLG does not modify CC at all. On the
+// other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG.
+// The CC value produced by NILL isn't useful for our purposes, but the
+// value produced by RISBG can be used for any comparison with zero
+// (not just equality). So there are some transformations that lose
+// CC values (while still being worthwhile) and others that happen to make
+// the CC result more useful than it was originally.
+//
+// We do (2) here rather than earlier because the fused form prevents
+// predication. It also has to happen after (1).
+//
+// Doing (2) so late makes it more likely that a register will be reused
// between the compare and the branch, but it isn't clear whether preventing
// that would be a win or not.
//
-// There are several ways in which (2) could be done. One aggressive
+// There are several ways in which (3) could be done. One aggressive
// approach is to assume that all branches are in range and successively
// replace those that turn out not to be in range with a longer form
// (branch relaxation). A simple implementation is to continually walk
@@ -156,6 +174,7 @@ namespace {
void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
bool AssumeRelaxed);
TerminatorInfo describeTerminator(MachineInstr *MI);
+ bool optimizeCompareZero(MachineInstr *PrevCCSetter, MachineInstr *Compare);
bool fuseCompareAndBranch(MachineInstr *Compare);
uint64_t initMBBInfo();
bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
@@ -254,6 +273,15 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
return Terminator;
}
+// Return true if CC is live out of MBB.
+static bool isCCLiveOut(MachineBasicBlock *MBB) {
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(SystemZ::CC))
+ return true;
+ return false;
+}
+
// Return true if CC is live after MBBI.
static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
const TargetRegisterInfo *TRI) {
@@ -269,12 +297,130 @@ static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
return false;
}
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI)
- if ((*SI)->isLiveIn(SystemZ::CC))
+ return isCCLiveOut(MBB);
+}
+
+// Return true if all uses of the CC value produced by MBBI could make do
+// with the CC values in ReusableCCMask. When returning true, point AlterMasks
+// to the "CC valid" and "CC mask" operands for each condition.
+static bool canRestrictCCMask(MachineBasicBlock::iterator MBBI,
+ unsigned ReusableCCMask,
+ SmallVectorImpl<MachineOperand *> &AlterMasks,
+ const TargetRegisterInfo *TRI) {
+ MachineBasicBlock *MBB = MBBI->getParent();
+ MachineBasicBlock::iterator MBBE = MBB->end();
+ for (++MBBI; MBBI != MBBE; ++MBBI) {
+ if (MBBI->readsRegister(SystemZ::CC, TRI)) {
+ // Fail if this isn't a use of CC that we understand.
+ unsigned MBBIFlags = MBBI->getDesc().TSFlags;
+ unsigned FirstOpNum;
+ if (MBBIFlags & SystemZII::CCMaskFirst)
+ FirstOpNum = 0;
+ else if (MBBIFlags & SystemZII::CCMaskLast)
+ FirstOpNum = MBBI->getNumExplicitOperands() - 2;
+ else
+ return false;
+
+ // Check whether the instruction predicate treats all CC values
+ // outside of ReusableCCMask in the same way. In that case it
+ // doesn't matter what those CC values mean.
+ unsigned CCValid = MBBI->getOperand(FirstOpNum).getImm();
+ unsigned CCMask = MBBI->getOperand(FirstOpNum + 1).getImm();
+ unsigned OutValid = ~ReusableCCMask & CCValid;
+ unsigned OutMask = ~ReusableCCMask & CCMask;
+ if (OutMask != 0 && OutMask != OutValid)
+ return false;
+
+ AlterMasks.push_back(&MBBI->getOperand(FirstOpNum));
+ AlterMasks.push_back(&MBBI->getOperand(FirstOpNum + 1));
+
+ // Succeed if this was the final use of the CC value.
+ if (MBBI->killsRegister(SystemZ::CC, TRI))
+ return true;
+ }
+ // Succeed if the instruction redefines CC.
+ if (MBBI->definesRegister(SystemZ::CC, TRI))
return true;
+ }
+ // Fail if there are other uses of CC that we didn't see.
+ return !isCCLiveOut(MBB);
+}
- return false;
+// Try to make Compare redundant with PrevCCSetter, the previous setter of CC,
+// by looking for cases where Compare compares the result of PrevCCSetter
+// against zero. Return true on success and if Compare can therefore
+// be deleted.
+bool SystemZLongBranch::optimizeCompareZero(MachineInstr *PrevCCSetter,
+ MachineInstr *Compare) {
+ if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
+ return false;
+
+ // Check whether this is a comparison against zero.
+ if (Compare->getNumExplicitOperands() != 2 ||
+ !Compare->getOperand(1).isImm() ||
+ Compare->getOperand(1).getImm() != 0)
+ return false;
+
+ // See which compare-style condition codes are available after PrevCCSetter.
+ unsigned PrevFlags = PrevCCSetter->getDesc().TSFlags;
+ unsigned ReusableCCMask = 0;
+ if (PrevFlags & SystemZII::CCHasZero)
+ ReusableCCMask |= SystemZ::CCMASK_CMP_EQ;
+
+ // For unsigned comparisons with zero, only equality makes sense.
+ unsigned CompareFlags = Compare->getDesc().TSFlags;
+ if (!(CompareFlags & SystemZII::IsLogical) &&
+ (PrevFlags & SystemZII::CCHasOrder))
+ ReusableCCMask |= SystemZ::CCMASK_CMP_LT | SystemZ::CCMASK_CMP_GT;
+
+ if (ReusableCCMask == 0)
+ return false;
+
+ // Make sure that PrevCCSetter sets the value being compared.
+ unsigned SrcReg = Compare->getOperand(0).getReg();
+ unsigned SrcSubReg = Compare->getOperand(0).getSubReg();
+ if (!PrevCCSetter->getOperand(0).isReg() ||
+ !PrevCCSetter->getOperand(0).isDef() ||
+ PrevCCSetter->getOperand(0).getReg() != SrcReg ||
+ PrevCCSetter->getOperand(0).getSubReg() != SrcSubReg)
+ return false;
+
+ // Make sure that SrcReg survives until Compare.
+ MachineBasicBlock::iterator MBBI = PrevCCSetter, MBBE = Compare;
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+ for (++MBBI; MBBI != MBBE; ++MBBI)
+ if (MBBI->modifiesRegister(SrcReg, TRI))
+ return false;
+
+ // See whether all uses of Compare's CC value could make do with
+ // the values produced by PrevCCSetter.
+ SmallVector<MachineOperand *, 4> AlterMasks;
+ if (!canRestrictCCMask(Compare, ReusableCCMask, AlterMasks, TRI))
+ return false;
+
+ // Alter the CC masks that canRestrictCCMask says need to be altered.
+ unsigned CCValues = SystemZII::getCCValues(PrevFlags);
+ assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues");
+ for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) {
+ AlterMasks[I]->setImm(CCValues);
+ unsigned CCMask = AlterMasks[I + 1]->getImm();
+ if (CCMask & ~ReusableCCMask)
+ AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) |
+ (CCValues & ~ReusableCCMask));
+ }
+
+ // CC is now live after PrevCCSetter.
+ int CCDef = PrevCCSetter->findRegisterDefOperandIdx(SystemZ::CC, false,
+ true, TRI);
+ assert(CCDef >= 0 && "Couldn't find CC set");
+ PrevCCSetter->getOperand(CCDef).setIsDead(false);
+
+ // Clear any intervening kills of CC.
+ MBBI = PrevCCSetter;
+ for (++MBBI; MBBI != MBBE; ++MBBI)
+ MBBI->clearRegisterKills(SystemZ::CC, TRI);
+
+ return true;
}
// Try to fuse compare instruction Compare into a later branch. Return
@@ -345,6 +491,8 @@ bool SystemZLongBranch::fuseCompareAndBranch(MachineInstr *Compare) {
// that no branches need relaxation. Return the size of the function under
// this assumption.
uint64_t SystemZLongBranch::initMBBInfo() {
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+
MF->RenumberBlocks();
unsigned NumBlocks = MF->size();
@@ -365,13 +513,20 @@ uint64_t SystemZLongBranch::initMBBInfo() {
// Calculate the size of the fixed part of the block.
MachineBasicBlock::iterator MI = MBB->begin();
MachineBasicBlock::iterator End = MBB->end();
+ MachineInstr *PrevCCSetter = 0;
while (MI != End && !MI->isTerminator()) {
MachineInstr *Current = MI;
++MI;
- if (Current->isCompare() && fuseCompareAndBranch(Current))
- Current->removeFromParent();
- else
- Block.Size += TII->getInstSizeInBytes(Current);
+ if (Current->isCompare()) {
+ if ((PrevCCSetter && optimizeCompareZero(PrevCCSetter, Current)) ||
+ fuseCompareAndBranch(Current)) {
+ Current->removeFromParent();
+ continue;
+ }
+ }
+ if (Current->modifiesRegister(SystemZ::CC, TRI))
+ PrevCCSetter = Current;
+ Block.Size += TII->getInstSizeInBytes(Current);
}
skipNonTerminators(Position, Block);