summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/IntrinsicsX86.td77
-rw-r--r--lib/Target/X86/X86.td15
-rw-r--r--lib/Target/X86/X86Instr3DNow.td109
-rw-r--r--test/CodeGen/X86/3dnow-intrinsics.ll297
4 files changed, 449 insertions, 49 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index 06b482e2f5..b44101a11c 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -18,6 +18,83 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
}
//===----------------------------------------------------------------------===//
+// 3DNow!
+
+let TargetPrefix = "x86" in {
+ def int_x86_3dnow_pavgusb : GCCBuiltin<"__builtin_ia32_pavgusb">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnow_pf2id : GCCBuiltin<"__builtin_ia32_pf2id">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+ def int_x86_3dnow_pfacc : GCCBuiltin<"__builtin_ia32_pfacc">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnow_pfadd : GCCBuiltin<"__builtin_ia32_pfadd">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnow_pfcmpeq : GCCBuiltin<"__builtin_ia32_pfcmpeq">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnow_pfcmpge : GCCBuiltin<"__builtin_ia32_pfcmpge">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnow_pfcmpgt : GCCBuiltin<"__builtin_ia32_pfcmpgt">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnow_pfmax : GCCBuiltin<"__builtin_ia32_pfmax">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnow_pfmin : GCCBuiltin<"__builtin_ia32_pfmin">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnow_pfmul : GCCBuiltin<"__builtin_ia32_pfmul">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnow_pfrcp : GCCBuiltin<"__builtin_ia32_pfrcp">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+ def int_x86_3dnow_pfrcpit1 : GCCBuiltin<"__builtin_ia32_pfrcpit1">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnow_pfrcpit2 : GCCBuiltin<"__builtin_ia32_pfrcpit2">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnow_pfrsqrt : GCCBuiltin<"__builtin_ia32_pfrsqrt">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+ def int_x86_3dnow_pfrsqit1 : GCCBuiltin<"__builtin_ia32_pfrsqit1">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnow_pfsub : GCCBuiltin<"__builtin_ia32_pfsub">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnow_pfsubr : GCCBuiltin<"__builtin_ia32_pfsubr">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnow_pi2fd : GCCBuiltin<"__builtin_ia32_pi2fd">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+ def int_x86_3dnow_pmulhrw : GCCBuiltin<"__builtin_ia32_pmulhrw">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// 3DNow! extensions
+
+let TargetPrefix = "x86" in {
+ def int_x86_3dnowa_pf2iw : GCCBuiltin<"__builtin_ia32_pf2iw">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+ def int_x86_3dnowa_pfnacc : GCCBuiltin<"__builtin_ia32_pfnacc">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnowa_pfpnacc : GCCBuiltin<"__builtin_ia32_pfpnacc">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+ [IntrNoMem]>;
+ def int_x86_3dnowa_pi2fw : GCCBuiltin<"__builtin_ia32_pi2fw">,
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+ def int_x86_3dnowa_pswapd :
+ Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
// SSE1
// Arithmetic ops
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 00ff97ea4e..912dff0f1d 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -50,7 +50,8 @@ def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
[FeatureSSE41, FeaturePOPCNT]>;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
- "Enable 3DNow! instructions">;
+ "Enable 3DNow! instructions",
+ [FeatureMMX]>;
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
"Enable 3DNow! Athlon instructions",
[Feature3DNow]>;
@@ -125,10 +126,10 @@ def : Proc<"sandybridge", [FeatureSSE42, Feature64Bit,
FeatureAES, FeatureCLMUL]>;
def : Proc<"k6", [FeatureMMX]>;
-def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
-def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>;
-def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
-def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k6-2", [Feature3DNow]>;
+def : Proc<"k6-3", [Feature3DNow]>;
+def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
@@ -156,8 +157,8 @@ def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A,
Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
-def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;
-def : Proc<"c3", [FeatureMMX, Feature3DNow]>;
+def : Proc<"winchip2", [Feature3DNow]>;
+def : Proc<"c3", [Feature3DNow]>;
def : Proc<"c3-2", [FeatureSSE1]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index 45d1c6bc9d..dd4f6a5a85 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -12,66 +12,91 @@
//
//===----------------------------------------------------------------------===//
-// FIXME: We don't support any intrinsics for these instructions yet.
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
+ : I<o, F, outs, ins, asm, pat>, TB, Requires<[Has3DNow]> {
+}
-class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, Requires<[Has3DNow]> {
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+ : I3DNow<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>,
+ Has3DNow0F0FOpcode {
+ // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src1 = $dst";
}
-class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic>
- : I<o, F, (outs VR64:$dst), ins,
- !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), []>,
- TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode {
+class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+ : I3DNow<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>,
+ Has3DNow0F0FOpcode {
// FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
let isAsmParserOnly = 1;
}
+multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn, []>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>;
+}
+
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+}
+
+multiclass I3DNow_conv_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src1), Mn, []>;
+ def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src1), Mn, []>;
+}
-let Constraints = "$src1 = $dst" in {
- // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
- // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
- multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
- def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn>;
- def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn>;
- }
+multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+ def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>;
+ def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn))
+ (bitconvert (load_mmx addr:$src))))]>;
}
-defm PAVGUSB : I3DNow_binop_rm<0xBF, "pavgusb">;
-defm PF2ID : I3DNow_binop_rm<0x1D, "pf2id">;
-defm PFACC : I3DNow_binop_rm<0xAE, "pfacc">;
-defm PFADD : I3DNow_binop_rm<0x9E, "pfadd">;
-defm PFCMPEQ : I3DNow_binop_rm<0xB0, "pfcmpeq">;
-defm PFCMPGE : I3DNow_binop_rm<0x90, "pfcmpge">;
-defm PFCMPGT : I3DNow_binop_rm<0xA0, "pfcmpgt">;
-defm PFMAX : I3DNow_binop_rm<0xA4, "pfmax">;
-defm PFMIN : I3DNow_binop_rm<0x94, "pfmin">;
-defm PFMUL : I3DNow_binop_rm<0xB4, "pfmul">;
-defm PFRCP : I3DNow_binop_rm<0x96, "pfrcp">;
-defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">;
-defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">;
-defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">;
-defm PFRSQRT : I3DNow_binop_rm<0x97, "pfrsqrt">;
-defm PFSUB : I3DNow_binop_rm<0x9A, "pfsub">;
-defm PFSUBR : I3DNow_binop_rm<0xAA, "pfsubr">;
-defm PI2FD : I3DNow_binop_rm<0x0D, "pi2fd">;
-defm PMULHRW : I3DNow_binop_rm<0xB7, "pmulhrw">;
+defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">;
+defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">;
+defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">;
+defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">;
+defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
+defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">;
+defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
+defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">;
+defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">;
+defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">;
+defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">;
+defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
+defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
+defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">;
+defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">;
+defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">;
+defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">;
+defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
+defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
"prefetch $addr", []>;
-
+
// FIXME: Diassembler gets a bogus decode conflict.
-let isAsmParserOnly = 1 in {
+let isAsmParserOnly = 1 in
def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
"prefetchw $addr", []>;
-}
// "3DNowA" instructions
-defm PF2IW : I3DNow_binop_rm<0x1C, "pf2iw">;
-defm PI2FW : I3DNow_binop_rm<0x0C, "pi2fw">;
-defm PFNACC : I3DNow_binop_rm<0x8A, "pfnacc">;
-defm PFPNACC : I3DNow_binop_rm<0x8E, "pfpnacc">;
-defm PSWAPD : I3DNow_binop_rm<0xBB, "pswapd">;
+defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
+defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
+defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
+defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
+defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;
diff --git a/test/CodeGen/X86/3dnow-intrinsics.ll b/test/CodeGen/X86/3dnow-intrinsics.ll
new file mode 100644
index 0000000000..0b27bf2d18
--- /dev/null
+++ b/test/CodeGen/X86/3dnow-intrinsics.ll
@@ -0,0 +1,297 @@
+; RUN: llc < %s -march=x86 -mattr=+3dnow | FileCheck %s
+
+define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
+; CHECK: pavgusb
+entry:
+ %0 = bitcast x86_mmx %a.coerce to <8 x i8>
+ %1 = bitcast x86_mmx %b.coerce to <8 x i8>
+ %2 = bitcast <8 x i8> %0 to x86_mmx
+ %3 = bitcast <8 x i8> %1 to x86_mmx
+ %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3)
+ %5 = bitcast x86_mmx %4 to <8 x i8>
+ ret <8 x i8> %5
+}
+
+declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone {
+; CHECK: pf2id
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0)
+ %2 = bitcast x86_mmx %1 to <2 x i32>
+ ret <2 x i32> %2
+}
+
+declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfacc
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x float>
+ ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfadd
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x float>
+ ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfcmpeq
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ ret <2 x i32> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfcmpge
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ ret <2 x i32> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfcmpgt
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ ret <2 x i32> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfmax
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x float>
+ ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfmin
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x float>
+ ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfmul
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x float>
+ ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone {
+; CHECK: pfrcp
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0)
+ %2 = bitcast x86_mmx %1 to <2 x float>
+ ret <2 x float> %2
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfrcpit1
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x float>
+ ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfrcpit2
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x float>
+ ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone {
+; CHECK: pfrsqrt
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0)
+ %2 = bitcast x86_mmx %1 to <2 x float>
+ ret <2 x float> %2
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfrsqit1
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x float>
+ ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfsub
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x float>
+ ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfsubr
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x float>
+ ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone {
+; CHECK: pi2fd
+entry:
+ %0 = bitcast x86_mmx %a.coerce to <2 x i32>
+ %1 = bitcast <2 x i32> %0 to x86_mmx
+ %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x float>
+ ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
+
+define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
+; CHECK: pmulhrw
+entry:
+ %0 = bitcast x86_mmx %a.coerce to <4 x i16>
+ %1 = bitcast x86_mmx %b.coerce to <4 x i16>
+ %2 = bitcast <4 x i16> %0 to x86_mmx
+ %3 = bitcast <4 x i16> %1 to x86_mmx
+ %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3)
+ %5 = bitcast x86_mmx %4 to <4 x i16>
+ ret <4 x i16> %5
+}
+
+declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone {
+; CHECK: pf2iw
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0)
+ %2 = bitcast x86_mmx %1 to <2 x i32>
+ ret <2 x i32> %2
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfnacc
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x float>
+ ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfpnacc
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = bitcast <2 x float> %b to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x float>
+ ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone {
+; CHECK: pi2fw
+entry:
+ %0 = bitcast x86_mmx %a.coerce to <2 x i32>
+ %1 = bitcast <2 x i32> %0 to x86_mmx
+ %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
+ %3 = bitcast x86_mmx %2 to <2 x float>
+ ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone {
+; CHECK: pswapd
+entry:
+ %0 = bitcast <2 x float> %a to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
+ %2 = bitcast x86_mmx %1 to <2 x float>
+ ret <2 x float> %2
+}
+
+define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
+; CHECK: pswapd
+entry:
+ %0 = bitcast <2 x i32> %a to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
+ %2 = bitcast x86_mmx %1 to <2 x i32>
+ ret <2 x i32> %2
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone