summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-05-08 21:50:26 +0000
committerChris Lattner <sabre@nondot.org>2010-05-08 21:50:26 +0000
commit6745191070503f669b07bc14cd6f7e518bd4d5b0 (patch)
tree5afeb2bbf1b18c6edcbf2c110169b1353dbdb388
parent7944c21cae06c5124cf1ee1f811d2a13f8e1b939 (diff)
downloadllvm-6745191070503f669b07bc14cd6f7e518bd4d5b0.tar.gz
llvm-6745191070503f669b07bc14cd6f7e518bd4d5b0.tar.bz2
llvm-6745191070503f669b07bc14cd6f7e518bd4d5b0.tar.xz
Teach instcombine to transform a bitcast/(zext|trunc)/bitcast sequence
with a vector input and output into a shuffle vector. This sort of sequence happens when the input code stores with one type and reloads with another type and then SROA promotes to i96 integers, which make everyone sad. This fixes rdar://7896024 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@103354 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp70
-rw-r--r--test/Transforms/InstCombine/cast.ll33
2 files changed, 103 insertions, 0 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index eb7628e741..01f49d2ca5 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1252,6 +1252,64 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
return commonPointerCastTransforms(CI);
}
+/// OptimizeVectorResize - This input value (which is known to have vector type)
+/// is being zero extended or truncated to the specified vector type. Try to
+/// replace it with a shuffle (and vector/vector bitcast) if possible.
+///
+/// The source and destination vector types may have different element types.
+static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
+ InstCombiner &IC) {
+ // We can only do this optimization if the output is a multiple of the input
+ // element size, or the input is a multiple of the output element size.
+ // Convert the input type to have the same element type as the output.
+ const VectorType *SrcTy = cast<VectorType>(InVal->getType());
+
+ if (SrcTy->getElementType() != DestTy->getElementType()) {
+ // The input types don't need to be identical, but for now they must be the
+ // same size. There is no specific reason we couldn't handle things like
+ // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
+ // there yet.
+ if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
+ DestTy->getElementType()->getPrimitiveSizeInBits())
+ return 0;
+
+ SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+ InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+ }
+
+ // Now that the element types match, get the shuffle mask and RHS of the
+ // shuffle to use, which depends on whether we're increasing or decreasing the
+ // size of the input.
+ SmallVector<Constant*, 16> ShuffleMask;
+ Value *V2;
+ const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
+
+ if (SrcTy->getNumElements() > DestTy->getNumElements()) {
+ // If we're shrinking the number of elements, just shuffle in the low
+ // elements from the input and use undef as the second shuffle input.
+ V2 = UndefValue::get(SrcTy);
+ for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
+ ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+ } else {
+ // If we're increasing the number of elements, shuffle in all of the
+ // elements from InVal and fill the rest of the result elements with zeros
+ // from a constant zero.
+ V2 = Constant::getNullValue(SrcTy);
+ unsigned SrcElts = SrcTy->getNumElements();
+ for (unsigned i = 0, e = SrcElts; i != e; ++i)
+ ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+ // The excess elements reference the first element of the zero input.
+ ShuffleMask.append(DestTy->getNumElements()-SrcElts,
+ ConstantInt::get(Int32Ty, SrcElts));
+ }
+
+ Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size());
+ return new ShuffleVectorInst(InVal, V2, Mask);
+}
+
+
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If the operands are integer typed then apply the integer transforms,
// otherwise just apply the common ones.
@@ -1310,6 +1368,18 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
// FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
}
+
+ // If this is a cast from an integer to vector, check to see if the input
+ // is a trunc or zext of a bitcast from vector. If so, we can replace all
+ // the casts with a shuffle and (potentially) a bitcast.
+ if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){
+ CastInst *SrcCast = cast<CastInst>(Src);
+ if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+ if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+ if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+ cast<VectorType>(DestTy), *this))
+ return I;
+ }
}
if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 878da6894f..77fccdfa52 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -605,3 +605,36 @@ define i64 @test59(i8 %A, i8 %B) nounwind {
; CHECK-NOT: i32
; CHECK: ret i64 %H
}
+
+define <3 x i32> @test60(<4 x i32> %call4) nounwind {
+ %tmp11 = bitcast <4 x i32> %call4 to i128
+ %tmp9 = trunc i128 %tmp11 to i96
+ %tmp10 = bitcast i96 %tmp9 to <3 x i32>
+ ret <3 x i32> %tmp10
+
+; CHECK: @test60
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test61(<3 x i32> %call4) nounwind {
+ %tmp11 = bitcast <3 x i32> %call4 to i96
+ %tmp9 = zext i96 %tmp11 to i128
+ %tmp10 = bitcast i128 %tmp9 to <4 x i32>
+ ret <4 x i32> %tmp10
+; CHECK: @test61
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test62(<3 x float> %call4) nounwind {
+ %tmp11 = bitcast <3 x float> %call4 to i96
+ %tmp9 = zext i96 %tmp11 to i128
+ %tmp10 = bitcast i128 %tmp9 to <4 x i32>
+ ret <4 x i32> %tmp10
+; CHECK: @test62
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+}
+