From 8b77d958fd90f18a58b97547c0abcd9e2a13f483 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Mon, 3 Mar 2014 13:47:42 +0000 Subject: [msan] Handle X86 SIMD bitshift intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@202712 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 119 +++++++++++++++++++++ .../MemorySanitizer/vector_shift.ll | 100 +++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 test/Instrumentation/MemorySanitizer/vector_shift.ll diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 27fd9d4d49..95129d2df1 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1827,6 +1827,48 @@ struct MemorySanitizerVisitor : public InstVisitor { } } + // Given a scalar or vector, extract lower 64 bits (or less), and return all + // zeroes if it is zero, and all ones otherwise. + Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) { + if (S->getType()->isVectorTy()) + S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true); + assert(S->getType()->getPrimitiveSizeInBits() <= 64); + Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S)); + return CreateShadowCast(IRB, S2, T, /* Signed */ true); + } + + Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) { + Type *T = S->getType(); + assert(T->isVectorTy()); + Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S)); + return IRB.CreateSExt(S2, T); + } + + // \brief Instrument vector shift instrinsic. + // + // This function instruments intrinsics like int_x86_avx2_psll_w. + // Intrinsic shifts %In by %ShiftSize bits. + // %ShiftSize may be a vector. In that case the lower 64 bits determine shift + // size, and the rest is ignored. Behavior is defined even if shift size is + // greater than register (or field) width. + void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) { + assert(I.getNumArgOperands() == 2); + IRBuilder<> IRB(&I); + // If any of the S2 bits are poisoned, the whole thing is poisoned. + // Otherwise perform the same shift on S1. + Value *S1 = getShadow(&I, 0); + Value *S2 = getShadow(&I, 1); + Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2) + : Lower64ShadowExtend(IRB, S2, getShadowTy(&I)); + Value *V1 = I.getOperand(0); + Value *V2 = I.getOperand(1); + Value *Shift = IRB.CreateCall2(I.getCalledValue(), + IRB.CreateBitCast(S1, V1->getType()), V2); + Shift = IRB.CreateBitCast(Shift, getShadowTy(&I)); + setShadow(&I, IRB.CreateOr(Shift, S2Conv)); + setOriginForNaryOp(I); + } + void visitIntrinsicInst(IntrinsicInst &I) { switch (I.getIntrinsicID()) { case llvm::Intrinsic::bswap: @@ -1866,6 +1908,83 @@ struct MemorySanitizerVisitor : public InstVisitor { case llvm::Intrinsic::x86_sse_cvttps2pi: handleVectorConvertIntrinsic(I, 2); break; + case llvm::Intrinsic::x86_avx512_psll_dq: + case llvm::Intrinsic::x86_avx512_psrl_dq: + case llvm::Intrinsic::x86_avx2_psll_w: + case llvm::Intrinsic::x86_avx2_psll_d: + case llvm::Intrinsic::x86_avx2_psll_q: + case llvm::Intrinsic::x86_avx2_pslli_w: + case llvm::Intrinsic::x86_avx2_pslli_d: + case llvm::Intrinsic::x86_avx2_pslli_q: + case llvm::Intrinsic::x86_avx2_psll_dq: + case llvm::Intrinsic::x86_avx2_psrl_w: + case llvm::Intrinsic::x86_avx2_psrl_d: + case llvm::Intrinsic::x86_avx2_psrl_q: + case llvm::Intrinsic::x86_avx2_psra_w: + case llvm::Intrinsic::x86_avx2_psra_d: + case llvm::Intrinsic::x86_avx2_psrli_w: + case llvm::Intrinsic::x86_avx2_psrli_d: + case llvm::Intrinsic::x86_avx2_psrli_q: + case llvm::Intrinsic::x86_avx2_psrai_w: + case llvm::Intrinsic::x86_avx2_psrai_d: + case llvm::Intrinsic::x86_avx2_psrl_dq: + case llvm::Intrinsic::x86_sse2_psll_w: + case llvm::Intrinsic::x86_sse2_psll_d: + case llvm::Intrinsic::x86_sse2_psll_q: + case llvm::Intrinsic::x86_sse2_pslli_w: + case llvm::Intrinsic::x86_sse2_pslli_d: + case llvm::Intrinsic::x86_sse2_pslli_q: + case llvm::Intrinsic::x86_sse2_psll_dq: + case llvm::Intrinsic::x86_sse2_psrl_w: + case llvm::Intrinsic::x86_sse2_psrl_d: + case llvm::Intrinsic::x86_sse2_psrl_q: + case llvm::Intrinsic::x86_sse2_psra_w: + case llvm::Intrinsic::x86_sse2_psra_d: + case llvm::Intrinsic::x86_sse2_psrli_w: + case llvm::Intrinsic::x86_sse2_psrli_d: + case llvm::Intrinsic::x86_sse2_psrli_q: + case llvm::Intrinsic::x86_sse2_psrai_w: + case llvm::Intrinsic::x86_sse2_psrai_d: + case llvm::Intrinsic::x86_sse2_psrl_dq: + case llvm::Intrinsic::x86_mmx_psll_w: + case llvm::Intrinsic::x86_mmx_psll_d: + case llvm::Intrinsic::x86_mmx_psll_q: + case llvm::Intrinsic::x86_mmx_pslli_w: + case llvm::Intrinsic::x86_mmx_pslli_d: + case llvm::Intrinsic::x86_mmx_pslli_q: + case llvm::Intrinsic::x86_mmx_psrl_w: + case llvm::Intrinsic::x86_mmx_psrl_d: + case llvm::Intrinsic::x86_mmx_psrl_q: + case llvm::Intrinsic::x86_mmx_psra_w: + case llvm::Intrinsic::x86_mmx_psra_d: + case llvm::Intrinsic::x86_mmx_psrli_w: + case llvm::Intrinsic::x86_mmx_psrli_d: + case llvm::Intrinsic::x86_mmx_psrli_q: + case llvm::Intrinsic::x86_mmx_psrai_w: + case llvm::Intrinsic::x86_mmx_psrai_d: + handleVectorShiftIntrinsic(I, /* Variable */ false); + break; + case llvm::Intrinsic::x86_avx2_psllv_d: + case llvm::Intrinsic::x86_avx2_psllv_d_256: + case llvm::Intrinsic::x86_avx2_psllv_q: + case llvm::Intrinsic::x86_avx2_psllv_q_256: + case llvm::Intrinsic::x86_avx2_psrlv_d: + case llvm::Intrinsic::x86_avx2_psrlv_d_256: + case llvm::Intrinsic::x86_avx2_psrlv_q: + case llvm::Intrinsic::x86_avx2_psrlv_q_256: + case llvm::Intrinsic::x86_avx2_psrav_d: + case llvm::Intrinsic::x86_avx2_psrav_d_256: + handleVectorShiftIntrinsic(I, /* Variable */ true); + break; + + // Byte shifts are not implemented. + // case llvm::Intrinsic::x86_avx512_psll_dq_bs: + // case llvm::Intrinsic::x86_avx512_psrl_dq_bs: + // case llvm::Intrinsic::x86_avx2_psll_dq_bs: + // case llvm::Intrinsic::x86_avx2_psrl_dq_bs: + // case llvm::Intrinsic::x86_sse2_psll_dq_bs: + // case llvm::Intrinsic::x86_sse2_psrl_dq_bs: + default: if (!handleUnknownIntrinsic(I)) visitInstruction(I); diff --git a/test/Instrumentation/MemorySanitizer/vector_shift.ll b/test/Instrumentation/MemorySanitizer/vector_shift.ll new file mode 100644 index 0000000000..d32f51b0b9 --- /dev/null +++ b/test/Instrumentation/MemorySanitizer/vector_shift.ll @@ -0,0 +1,100 @@ +; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s + +; Test instrumentation of vector shift instructions. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) +declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) +declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) +declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) +declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) + +define i64 @test_mmx(i64 %x.coerce, i64 %y.coerce) { +entry: + %0 = bitcast i64 %x.coerce to <2 x i32> + %1 = bitcast <2 x i32> %0 to x86_mmx + %2 = bitcast i64 %y.coerce to x86_mmx + %3 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to <2 x i32> + %5 = bitcast <2 x i32> %4 to <1 x i64> + %6 = extractelement <1 x i64> %5, i32 0 + ret i64 %6 +} + +; CHECK: @test_mmx +; CHECK: = icmp ne i64 {{.*}}, 0 +; CHECK: [[C:%.*]] = sext i1 {{.*}} to i64 +; CHECK: [[A:%.*]] = call x86_mmx @llvm.x86.mmx.psll.d( +; CHECK: [[B:%.*]] = bitcast x86_mmx {{.*}}[[A]] to i64 +; CHECK: = or i64 {{.*}}[[B]], {{.*}}[[C]] +; CHECK: call x86_mmx @llvm.x86.mmx.psll.d( +; CHECK: ret i64 + + +define <8 x i16> @test_sse2_scalar(<8 x i16> %x, i32 %y) { +entry: + %0 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %x, i32 %y) + ret <8 x i16> %0 +} + +; CHECK: @test_sse2_scalar +; CHECK: = icmp ne i32 {{.*}}, 0 +; CHECK: = sext i1 {{.*}} to i128 +; CHECK: = bitcast i128 {{.*}} to <8 x i16> +; CHECK: = call <8 x i16> @llvm.x86.sse2.pslli.w( +; CHECK: = or <8 x i16> +; CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w( +; CHECK: ret <8 x i16> + + +define <8 x i16> @test_sse2(<8 x i16> %x, <8 x i16> %y) { +entry: + %0 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %x, <8 x i16> %y) + ret <8 x i16> %0 +} + +; CHECK: @test_sse2 +; CHECK: = bitcast <8 x i16> {{.*}} to i128 +; CHECK: = trunc i128 {{.*}} to i64 +; CHECK: = icmp ne i64 {{.*}}, 0 +; CHECK: = sext i1 {{.*}} to i128 +; CHECK: = bitcast i128 {{.*}} to <8 x i16> +; CHECK: = call <8 x i16> @llvm.x86.sse2.psrl.w( +; CHECK: = or <8 x i16> +; CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w( +; CHECK: ret <8 x i16> + + +; Test variable shift (i.e. vector by vector). + +define <4 x i32> @test_avx2(<4 x i32> %x, <4 x i32> %y) { +entry: + %0 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %x, <4 x i32> %y) + ret <4 x i32> %0 +} + +; CHECK: @test_avx2 +; CHECK: = icmp ne <4 x i32> {{.*}}, zeroinitializer +; CHECK: = sext <4 x i1> {{.*}} to <4 x i32> +; CHECK: = call <4 x i32> @llvm.x86.avx2.psllv.d( +; CHECK: = or <4 x i32> +; CHECK: = tail call <4 x i32> @llvm.x86.avx2.psllv.d( +; CHECK: ret <4 x i32> + +define <8 x i32> @test_avx2_256(<8 x i32> %x, <8 x i32> %y) { +entry: + %0 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %x, <8 x i32> %y) + ret <8 x i32> %0 +} + +; CHECK: @test_avx2_256 +; CHECK: = icmp ne <8 x i32> {{.*}}, zeroinitializer +; CHECK: = sext <8 x i1> {{.*}} to <8 x i32> +; CHECK: = call <8 x i32> @llvm.x86.avx2.psllv.d.256( +; CHECK: = or <8 x i32> +; CHECK: = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256( +; CHECK: ret <8 x i32> -- cgit v1.2.3