From 8258d0b4bfd5fe40c29fa19e24c23ba3ac157e23 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 30 Mar 2010 18:49:01 +0000 Subject: Remove the pmulld intrinsic and autoupdate it as a vector multiply. Rewrite the pmulld patterns, and make sure that they fold in loads of arguments into the instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99910 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IntrinsicsX86.td | 3 --- lib/Target/X86/X86InstrInfo.cpp | 1 - lib/Target/X86/X86InstrSSE.td | 24 ++++++++++++++++++++++-- lib/VMCore/AutoUpgrade.cpp | 17 +++++++++++++++++ test/Bitcode/sse41_pmulld.ll | 2 ++ test/Bitcode/sse41_pmulld.ll.bc | Bin 0 -> 560 bytes test/CodeGen/X86/pmul.ll | 2 +- test/CodeGen/X86/pmulld.ll | 16 ++++++++++++++++ 8 files changed, 58 insertions(+), 7 deletions(-) create mode 100644 test/Bitcode/sse41_pmulld.ll create mode 100644 test/Bitcode/sse41_pmulld.ll.bc create mode 100644 test/CodeGen/X86/pmulld.ll diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index d6e1db4299..6be6eb16df 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -810,9 +810,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_pmuldq : GCCBuiltin<"__builtin_ia32_pmuldq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem, Commutative]>; - def int_x86_sse41_pmulld : GCCBuiltin<"__builtin_ia32_pmulld128">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem, Commutative]>; } // Vector extract diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 5def78737f..614a21182b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -597,7 +597,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, { X86::PMULHWrr, X86::PMULHWrm, 16 }, { X86::PMULLDrr, X86::PMULLDrm, 16 }, - { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 }, { X86::PMULLWrr, X86::PMULLWrm, 16 }, { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, { X86::PORrr, X86::PORrm, 16 }, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 077d92de4f..e207598144 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3448,8 +3448,28 @@ let Constraints = "$src1 = $dst" in { OpSize; } } -defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul, - int_x86_sse41_pmulld, 1>; + +/// SS48I_binop_rm - Simple SSE41 binary operator. +let Constraints = "$src1 = $dst" in { +multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, bit Commutable = 0> { + def rr : SS48I, + OpSize { + let isCommutable = Commutable; + } + def rm : SS48I, + OpSize; +} +} + +defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>; /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate let Constraints = "$src1 = $dst" in { diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 5e4c9fb766..b9aa5c3467 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -225,7 +225,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { // Calls to these intrinsics are transformed into ShuffleVector's. NewFn = 0; return true; + } else if (Name.compare(5, 16, "x86.sse41.pmulld", 16) == 0) { + // Calls to these intrinsics are transformed into vector multiplies. + NewFn = 0; + return true; } + break; } @@ -355,6 +360,18 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Clean up the old call now that it has been completely upgraded. CI->eraseFromParent(); + } else if (F->getName() == "llvm.x86.sse41.pmulld") { + // Upgrade this set of intrinsics into vector multiplies. + Instruction *Mul = BinaryOperator::CreateMul(CI->getOperand(1), + CI->getOperand(2), + CI->getName(), + CI); + // Fix up all the uses with our new multiply. + if (!CI->use_empty()) + CI->replaceAllUsesWith(Mul); + + // Remove upgraded multiply. + CI->eraseFromParent(); } else { llvm_unreachable("Unknown function for CallInst upgrade."); } diff --git a/test/Bitcode/sse41_pmulld.ll b/test/Bitcode/sse41_pmulld.ll new file mode 100644 index 0000000000..caf85479bb --- /dev/null +++ b/test/Bitcode/sse41_pmulld.ll @@ -0,0 +1,2 @@ +; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.pmulld} +; RUN: llvm-dis < %s.bc | grep mul \ No newline at end of file diff --git a/test/Bitcode/sse41_pmulld.ll.bc b/test/Bitcode/sse41_pmulld.ll.bc new file mode 100644 index 0000000000..bd66f0a05c Binary files /dev/null and b/test/Bitcode/sse41_pmulld.ll.bc differ diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll index e2746a8c06..bf5229aa1e 100644 --- a/test/CodeGen/X86/pmul.ll +++ b/test/CodeGen/X86/pmul.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 > %t ; RUN: grep pmul %t | count 12 -; RUN: grep mov %t | count 12 +; RUN: grep mov %t | count 11 define <4 x i32> @a(<4 x i32> %i) nounwind { %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 > diff --git a/test/CodeGen/X86/pmulld.ll b/test/CodeGen/X86/pmulld.ll new file mode 100644 index 0000000000..3ef594112b --- /dev/null +++ b/test/CodeGen/X86/pmulld.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -march=x86-64 -mattr=+sse41 -asm-verbose=0 | FileCheck %s + +define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind { +; CHECK: test1: +; CHECK-NEXT: pmulld + %C = mul <4 x i32> %A, %B + ret <4 x i32> %C +} + +define <4 x i32> @test1a(<4 x i32> %A, <4 x i32> *%Bp) nounwind { +; CHECK: test1a: +; CHECK-NEXT: pmulld + %B = load <4 x i32>* %Bp + %C = mul <4 x i32> %A, %B + ret <4 x i32> %C +} -- cgit v1.2.3