summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2012-01-01 19:40:22 +0000
committerCraig Topper <craig.topper@gmail.com>2012-01-01 19:40:22 +0000
commitde9e4c728e9016d8d701c2e78a96647fbb98779a (patch)
tree6e1682b693fb46098a8eb3d908c00d05741e0d18
parent8943574b44bdc9470df67d91cec39ece85b7332f (diff)
downloadllvm-de9e4c728e9016d8d701c2e78a96647fbb98779a.tar.gz
llvm-de9e4c728e9016d8d701c2e78a96647fbb98779a.tar.bz2
llvm-de9e4c728e9016d8d701c2e78a96647fbb98779a.tar.xz
Fix sfence, lfence, mfence, and clflush to be able to be selected when AVX is enabled. Fix monitor and mwait to require SSE3 or AVX, previously they worked even if SSE3 was disabled. Make prefetch instructions not set the execution domain since they don't use XMM registers.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147409 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86InstrFormats.td4
-rw-r--r--lib/Target/X86/X86InstrInfo.td1
-rw-r--r--lib/Target/X86/X86InstrSSE.td36
-rw-r--r--test/CodeGen/X86/apm.ll4
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86.ll48
5 files changed, 71 insertions, 22 deletions
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 58fd1f3f8d..2da9b4baeb 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -339,10 +339,6 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB,
Requires<[HasAVX]>;
-class VoPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
- Requires<[HasXMM]>;
// SSE2 Instruction Templates:
//
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 0bc3afa77b..fe83ae9ddd 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -475,6 +475,7 @@ def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
def HasXMM : Predicate<"Subtarget->hasXMM()">;
def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
+def HasSSE3orAVX : Predicate<"Subtarget->hasSSE3orAVX()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 78143f6977..d6ae3af0d4 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3253,19 +3253,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
//===----------------------------------------------------------------------===//
// Prefetch intrinsic.
-def PREFETCHT0 : VoPSI<0x18, MRM1m, (outs), (ins i8mem:$src),
- "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
-def PREFETCHT1 : VoPSI<0x18, MRM2m, (outs), (ins i8mem:$src),
- "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
-def PREFETCHT2 : VoPSI<0x18, MRM3m, (outs), (ins i8mem:$src),
- "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
-def PREFETCHNTA : VoPSI<0x18, MRM0m, (outs), (ins i8mem:$src),
- "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
+let Predicates = [HasXMM] in {
+def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
+ "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB;
+def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src),
+ "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB;
+def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src),
+ "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB;
+def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src),
+ "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB;
+}
// Flush cache
def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
"clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
- TB, Requires<[HasSSE2]>;
+ TB, Requires<[HasXMMInt]>;
// Pause. This "instruction" is encoded as "rep; nop", so even though it
// was introduced with SSE2, it's backward compatible.
@@ -3273,11 +3275,11 @@ def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
// Load, store, and memory fence
def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
- "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>;
+ "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasXMM]>;
def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
- "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+ "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasXMMInt]>;
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
- "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+ "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasXMMInt]>;
def : Pat<(X86SFence), (SFENCE)>;
def : Pat<(X86LFence), (LFENCE)>;
@@ -5463,17 +5465,19 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
let usesCustomInserter = 1 in {
def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
- [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
+ [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
+ Requires<[HasSSE3orAVX]>;
def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
- [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>;
+ [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>,
+ Requires<[HasSSE3orAVX]>;
}
let Uses = [EAX, ECX, EDX] in
def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
- Requires<[HasSSE3]>;
+ Requires<[HasSSE3orAVX]>;
let Uses = [ECX, EAX] in
def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
- Requires<[HasSSE3]>;
+ Requires<[HasSSE3orAVX]>;
def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll
index b514cf6427..aaedf18481 100644
--- a/test/CodeGen/X86/apm.ll
+++ b/test/CodeGen/X86/apm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse3 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse3 | FileCheck %s -check-prefix=WIN64
; PR8573
; CHECK: foo:
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index f58391469c..b4f04ceb8f 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -2481,4 +2481,52 @@ define void @test_x86_avx_vzeroupper() {
}
declare void @llvm.x86.avx.vzeroupper() nounwind
+; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
+; CHECK: monitor
+define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
+entry:
+ tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
+ ret void
+}
+declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
+
+; CHECK: mwait
+define void @mwait(i32 %E, i32 %H) nounwind {
+entry:
+ tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
+ ret void
+}
+declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
+
+; CHECK: sfence
+define void @sfence() nounwind {
+entry:
+ tail call void @llvm.x86.sse.sfence()
+ ret void
+}
+declare void @llvm.x86.sse.sfence() nounwind
+
+; CHECK: lfence
+define void @lfence() nounwind {
+entry:
+ tail call void @llvm.x86.sse2.lfence()
+ ret void
+}
+declare void @llvm.x86.sse2.lfence() nounwind
+
+; CHECK: mfence
+define void @mfence() nounwind {
+entry:
+ tail call void @llvm.x86.sse2.mfence()
+ ret void
+}
+declare void @llvm.x86.sse2.mfence() nounwind
+
+; CHECK: clflush
+define void @clflush(i8* %p) nounwind {
+entry:
+ tail call void @llvm.x86.sse2.clflush(i8* %p)
+ ret void
+}
+declare void @llvm.x86.sse2.clflush(i8*) nounwind