summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2013-02-04 15:19:18 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2013-02-04 15:19:18 +0000
commit4969310052f45b1e2e5d21735e38641a20be0e21 (patch)
tree04e90cfba49332cd63f2bb6fbf495761d54f5a5c
parent17174e59004d4df12f9037f0dc99ae530d71eab9 (diff)
downloadllvm-4969310052f45b1e2e5d21735e38641a20be0e21.tar.gz
llvm-4969310052f45b1e2e5d21735e38641a20be0e21.tar.bz2
llvm-4969310052f45b1e2e5d21735e38641a20be0e21.tar.xz
SelectionDAG: Teach FoldConstantArithmetic how to deal with vectors.
This required disabling a PowerPC optimization that did the following: input: x = BUILD_VECTOR <i32 16, i32 16, i32 16, i32 16> lowered to: tmp = BUILD_VECTOR <i32 8, i32 8, i32 8, i32 8> x = ADD tmp, tmp The add now gets folded immediately and we're back at the BUILD_VECTOR we started from. I don't see a way to fix this currently so I left it disabled for now. Fix some trivially foldable X86 tests too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174325 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/CodeGen/SelectionDAG.h6
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp159
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp5
-rw-r--r--test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll1
-rw-r--r--test/CodeGen/X86/2011-11-30-or.ll4
-rw-r--r--test/CodeGen/X86/blend-msb.ll6
-rw-r--r--test/CodeGen/X86/vector-gep.ll3
7 files changed, 129 insertions, 55 deletions
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 69bc3bb57f..c25497aa97 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -992,10 +992,8 @@ public:
SDValue CreateStackTemporary(EVT VT1, EVT VT2);
/// FoldConstantArithmetic -
- SDValue FoldConstantArithmetic(unsigned Opcode,
- EVT VT,
- ConstantSDNode *Cst1,
- ConstantSDNode *Cst2);
+ SDValue FoldConstantArithmetic(unsigned Opcode, EVT VT,
+ SDNode *Cst1, SDNode *Cst2);
/// FoldSetCC - Constant fold a setcc to true or false.
SDValue FoldSetCC(EVT VT, SDValue N1,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 57241227d9..09885d8dea 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2680,44 +2680,117 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
return SDValue(N, 0);
}
-SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
- EVT VT,
- ConstantSDNode *Cst1,
- ConstantSDNode *Cst2) {
- const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
+ SDNode *Cst1, SDNode *Cst2) {
+ SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs;
+ SmallVector<SDValue, 4> Outputs;
+ EVT SVT = VT.getScalarType();
+
+ ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1);
+ ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2);
+ if (Scalar1 && Scalar2) {
+ // Scalar instruction.
+ Inputs.push_back(std::make_pair(Scalar1, Scalar2));
+ } else {
+ // For vectors extract each constant element into Inputs so we can constant
+ // fold them individually.
+ BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
+ BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
+ if (!BV1 || !BV2)
+ return SDValue();
+
+ assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
+
+ for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
+ ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
+ ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
+ if (!V1 || !V2) // Not a constant, bail.
+ return SDValue();
+
+ // Avoid BUILD_VECTOR nodes that perform implicit truncation.
+ // FIXME: This is valid and could be handled by truncating the APInts.
+ if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
+ return SDValue();
+
+ Inputs.push_back(std::make_pair(V1, V2));
+ }
+ }
- switch (Opcode) {
- case ISD::ADD: return getConstant(C1 + C2, VT);
- case ISD::SUB: return getConstant(C1 - C2, VT);
- case ISD::MUL: return getConstant(C1 * C2, VT);
- case ISD::UDIV:
- if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);
- break;
- case ISD::UREM:
- if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);
- break;
- case ISD::SDIV:
- if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);
- break;
- case ISD::SREM:
- if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);
- break;
- case ISD::AND: return getConstant(C1 & C2, VT);
- case ISD::OR: return getConstant(C1 | C2, VT);
- case ISD::XOR: return getConstant(C1 ^ C2, VT);
- case ISD::SHL: return getConstant(C1 << C2, VT);
- case ISD::SRL: return getConstant(C1.lshr(C2), VT);
- case ISD::SRA: return getConstant(C1.ashr(C2), VT);
- case ISD::ROTL: return getConstant(C1.rotl(C2), VT);
- case ISD::ROTR: return getConstant(C1.rotr(C2), VT);
- default: break;
+ // We have a number of constant values, constant fold them element by element.
+ for (unsigned I = 0, E = Inputs.size(); I != E; ++I) {
+ const APInt &C1 = Inputs[I].first->getAPIntValue();
+ const APInt &C2 = Inputs[I].second->getAPIntValue();
+
+ switch (Opcode) {
+ case ISD::ADD:
+ Outputs.push_back(getConstant(C1 + C2, SVT));
+ break;
+ case ISD::SUB:
+ Outputs.push_back(getConstant(C1 - C2, SVT));
+ break;
+ case ISD::MUL:
+ Outputs.push_back(getConstant(C1 * C2, SVT));
+ break;
+ case ISD::UDIV:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.udiv(C2), SVT));
+ break;
+ case ISD::UREM:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.urem(C2), SVT));
+ break;
+ case ISD::SDIV:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.sdiv(C2), SVT));
+ break;
+ case ISD::SREM:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.srem(C2), SVT));
+ break;
+ case ISD::AND:
+ Outputs.push_back(getConstant(C1 & C2, SVT));
+ break;
+ case ISD::OR:
+ Outputs.push_back(getConstant(C1 | C2, SVT));
+ break;
+ case ISD::XOR:
+ Outputs.push_back(getConstant(C1 ^ C2, SVT));
+ break;
+ case ISD::SHL:
+ Outputs.push_back(getConstant(C1 << C2, SVT));
+ break;
+ case ISD::SRL:
+ Outputs.push_back(getConstant(C1.lshr(C2), SVT));
+ break;
+ case ISD::SRA:
+ Outputs.push_back(getConstant(C1.ashr(C2), SVT));
+ break;
+ case ISD::ROTL:
+ Outputs.push_back(getConstant(C1.rotl(C2), SVT));
+ break;
+ case ISD::ROTR:
+ Outputs.push_back(getConstant(C1.rotr(C2), SVT));
+ break;
+ default:
+ return SDValue();
+ }
}
- return SDValue();
+ // Handle the scalar case first.
+ if (Outputs.size() == 1)
+ return Outputs.back();
+
+ // Otherwise build a big vector out of the scalar elements we generated.
+ return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(),
+ Outputs.size());
}
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
- SDValue N1, SDValue N2) {
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1,
+ SDValue N2) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
switch (Opcode) {
@@ -3013,16 +3086,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
}
}
- if (N1C) {
- if (N2C) {
- SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);
- if (SV.getNode()) return SV;
- } else { // Cannonicalize constant to RHS if commutative
- if (isCommutativeBinOp(Opcode)) {
- std::swap(N1C, N2C);
- std::swap(N1, N2);
- }
- }
+ // Perform trivial constant folding.
+ SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode());
+ if (SV.getNode()) return SV;
+
+ // Canonicalize constant to RHS if commutative.
+ if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
}
// Constant fold FP operations.
@@ -3030,7 +3101,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
if (N1CFP) {
if (!N2CFP && isCommutativeBinOp(Opcode)) {
- // Cannonicalize constant to RHS if commutative
+ // Canonicalize constant to RHS if commutative.
std::swap(N1CFP, N2CFP);
std::swap(N1, N2);
} else if (N2CFP) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 9a68927103..4cd0fc484a 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5032,9 +5032,14 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// If this value is in the range [-32,30] and is even, use:
// tmp = VSPLTI[bhw], result = add tmp, tmp
if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
+ // FIXME: This is currently disabled because the ADD will be folded back
+ // into an invalid BUILD_VECTOR immediately.
+ return SDValue();
+#if 0
SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+#endif
}
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
diff --git a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
index 8802b97d2a..00a402e0e4 100644
--- a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
+++ b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
@@ -1,5 +1,6 @@
; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vadduhm
; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubuhm
+; XFAIL: *
define <4 x i32> @test() nounwind {
ret <4 x i32> < i32 4293066722, i32 4293066722, i32 4293066722, i32 4293066722>
diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll
index f66248bc5a..8ac4632329 100644
--- a/test/CodeGen/X86/2011-11-30-or.ll
+++ b/test/CodeGen/X86/2011-11-30-or.ll
@@ -8,9 +8,9 @@ target triple = "x86_64-apple-macosx10.6.6"
; CHECK: pblendvb %xmm1, %xmm2
; CHECK: ret
-define void @select_func() {
+define void @select_func(<8 x i16> %in) {
entry:
- %c.lobit.i.i.i = ashr <8 x i16> <i16 17, i16 5, i16 1, i16 15, i16 19, i16 15, i16 4, i16 1> , <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %c.lobit.i.i.i = ashr <8 x i16> %in, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
%and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64>
%neg.i.i.i.i = xor <8 x i16> %c.lobit.i.i.i, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
index 34445428ea..2b6b9ef8f3 100644
--- a/test/CodeGen/X86/blend-msb.ll
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -5,7 +5,8 @@
; shifting the needed bit to the MSB, and not using shl+sra.
;CHECK: vsel_float
-;CHECK: pslld
+;CHECK: movl $-2147483648
+;CHECK-NEXT: movd
;CHECK-NEXT: blendvps
;CHECK: ret
define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
@@ -14,7 +15,8 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
}
;CHECK: vsel_4xi8
-;CHECK: pslld
+;CHECK: movl $-2147483648
+;CHECK-NEXT: movd
;CHECK-NEXT: blendvps
;CHECK: ret
define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll
index d08e2a0746..ec93ce0761 100644
--- a/test/CodeGen/X86/vector-gep.ll
+++ b/test/CodeGen/X86/vector-gep.ll
@@ -8,10 +8,8 @@ entry:
%vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
%vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
%vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
-;CHECK: pslld $2
;CHECK: padd
%A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-;CHECK: pslld $2
;CHECK: padd
%A3 = getelementptr <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
ret <4 x i32*> %A3
@@ -21,7 +19,6 @@ entry:
;CHECK: AGEP1:
define i32 @AGEP1(<4 x i32*> %param) nounwind {
entry:
-;CHECK: pslld $2
;CHECK: padd
%A2 = getelementptr <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
%k = extractelement <4 x i32*> %A2, i32 3