summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Target/TargetLowering.h7
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp94
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp5
-rw-r--r--lib/Target/X86/X86ISelLowering.h5
-rw-r--r--test/CodeGen/X86/narrow_op-1.ll23
-rw-r--r--test/CodeGen/X86/narrow_op-2.ll23
6 files changed, 156 insertions, 1 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 0576e3e1a8..dc66e55a1f 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -1420,6 +1420,13 @@ public:
return false;
}
+ /// isNarrowingProfitable - Return true if it's profitable to narrow
+ /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
+ /// from i32 to i8 but not from i32 to i16.
+ virtual bool isNarrowingProfitable(MVT VT1, MVT VT2) const {
+ return true;
+ }
+
//===--------------------------------------------------------------------===//
// Div utility functions
//
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a866cb5629..6a47aa52a2 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -41,6 +41,7 @@ using namespace llvm;
STATISTIC(NodesCombined , "Number of dag nodes combined");
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
namespace {
static cl::opt<bool>
@@ -222,6 +223,7 @@ namespace {
SDValue BuildUDIV(SDNode *N);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
SDValue ReduceLoadWidth(SDNode *N);
+ SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -4900,6 +4902,96 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
return SDValue();
}
+
+/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
+/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
+/// of the loaded bits, try narrowing the load and store if it would end up
+/// being a win for performance or code size.
+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+ MVT VT = Value.getValueType();
+
+ if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+ return SDValue(0, 0);
+
+ unsigned Opc = Value.getOpcode();
+ if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+ Value.getOperand(1).getOpcode() != ISD::Constant)
+ return SDValue(0, 0);
+
+ SDValue N0 = Value.getOperand(0);
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LD = cast<LoadSDNode>(N0);
+ if (LD->getBasePtr() != Ptr/* || Chain != N0.getValue(1)*/)
+ return SDValue(0, 0);
+
+ // Find the type to narrow it the load / op / store to.
+ SDValue N1 = Value.getOperand(1);
+ unsigned BitWidth = N1.getValueSizeInBits();
+ APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+ if (Opc == ISD::AND)
+ Imm ^= APInt::getAllOnesValue(BitWidth);
+ unsigned ShAmt = Imm.countTrailingZeros();
+ unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+ unsigned NewBW = NextPowerOf2(MSB - ShAmt);
+ MVT NewVT = MVT::getIntegerVT(NewBW);
+ while (NewBW < BitWidth &&
+ !(TLI.isTypeLegal(NewVT) &&
+ TLI.isOperationLegalOrCustom(Opc, NewVT) &&
+ TLI.isNarrowingProfitable(VT, NewVT))) {
+ NewBW = NextPowerOf2(NewBW);
+ NewVT = MVT::getIntegerVT(NewBW);
+ }
+ if (NewBW == BitWidth)
+ return SDValue(0, 0);
+
+ // If the lsb changed does not start at the type bitwidth boundary,
+ // start at the previous one.
+ if (ShAmt % NewBW)
+ ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+ APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW);
+ if ((Imm & Mask) == Imm) {
+ APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
+ if (Opc == ISD::AND)
+ NewImm ^= APInt::getAllOnesValue(NewBW);
+ uint64_t PtrOff = ShAmt / 8;
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian())
+ PtrOff = (BitWidth - NewBW) / 8 - PtrOff;
+
+ unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
+ Ptr.getValueType(), Ptr,
+ DAG.getConstant(PtrOff, Ptr.getValueType()));
+ SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
+ LD->getChain(), NewPtr,
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile(), NewAlign);
+ SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
+ DAG.getConstant(NewImm, NewVT));
+ SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
+ NewVal, NewPtr,
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->isVolatile(), NewAlign);
+
+ AddToWorkList(NewPtr.getNode());
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewVal.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),
+ &DeadNodes);
+ ++OpsNarrowed;
+ return NewST;
+ }
+ }
+
+ return SDValue(0, 0);
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -5086,7 +5178,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
ST->isVolatile(), ST->getAlignment());
}
- return SDValue();
+ return ReduceLoadOpStoreWidth(N);
}
SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b89eef0fb2..0136f90ec4 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -6877,6 +6877,11 @@ bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const {
return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
}
+bool X86TargetLowering::isNarrowingProfitable(MVT VT1, MVT VT2) const {
+ // i16 instructions are longer (0x66 prefix) and potentially slower.
+ return !(VT1 == MVT::i32 && VT2 == MVT::i16);
+}
+
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index badbd2462f..550f8bdf9b 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -466,6 +466,11 @@ namespace llvm {
virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
virtual bool isZExtFree(MVT VT1, MVT VT2) const;
+ /// isNarrowingProfitable - Return true if it's profitable to narrow
+ /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
+ /// from i32 to i8 but not from i32 to i16.
+ virtual bool isNarrowingProfitable(MVT VT1, MVT VT2) const;
+
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask
diff --git a/test/CodeGen/X86/narrow_op-1.ll b/test/CodeGen/X86/narrow_op-1.ll
new file mode 100644
index 0000000000..0ee11b4955
--- /dev/null
+++ b/test/CodeGen/X86/narrow_op-1.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep orb | count 1
+; RUN: llvm-as < %s | llc -march=x86-64 | grep orb | grep 1
+; RUN: llvm-as < %s | llc -march=x86-64 | grep orl | count 1
+; RUN: llvm-as < %s | llc -march=x86-64 | grep orl | grep 16842752
+
+ %struct.bf = type { i64, i16, i16, i32 }
+@bfi = common global %struct.bf zeroinitializer, align 16
+
+define void @t1() nounwind optsize ssp {
+entry:
+ %0 = load i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+ %1 = or i32 %0, 65536
+ store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+ ret void
+}
+
+define void @t2() nounwind optsize ssp {
+entry:
+ %0 = load i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+ %1 = or i32 %0, 16842752
+ store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+ ret void
+}
diff --git a/test/CodeGen/X86/narrow_op-2.ll b/test/CodeGen/X86/narrow_op-2.ll
new file mode 100644
index 0000000000..b441794f42
--- /dev/null
+++ b/test/CodeGen/X86/narrow_op-2.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | count 2
+; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | grep 254
+; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | grep 253
+
+ %struct.bf = type { i64, i16, i16, i32 }
+@bfi = external global %struct.bf*
+
+define void @t1() nounwind ssp {
+entry:
+ %0 = load %struct.bf** @bfi, align 8
+ %1 = getelementptr %struct.bf* %0, i64 0, i32 1
+ %2 = bitcast i16* %1 to i32*
+ %3 = load i32* %2, align 1
+ %4 = and i32 %3, -65537
+ store i32 %4, i32* %2, align 1
+ %5 = load %struct.bf** @bfi, align 8
+ %6 = getelementptr %struct.bf* %5, i64 0, i32 1
+ %7 = bitcast i16* %6 to i32*
+ %8 = load i32* %7, align 1
+ %9 = and i32 %8, -131073
+ store i32 %9, i32* %7, align 1
+ ret void
+}