summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86InstrFPStack.td
blob: 848d370db4d06969f1e0accd6da7cdc3b2572a2f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
//==- X86InstrFPStack.td - Describe the X86 Instruction Set -------*- C++ -*-=//
// 
//                     The LLVM Compiler Infrastructure
//
// This file was developed by the Evan Cheng and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
// 
//===----------------------------------------------------------------------===//
//
// This file describes the X86 x87 FPU instruction set, defining the
// instructions, and properties of the instructions which are needed for code
// generation, machine code emission, and analysis.
//
//===----------------------------------------------------------------------===//

//===----------------------------------------------------------------------===//
// FPStack specific DAG Nodes.
//===----------------------------------------------------------------------===//

def SDTX86FpGet   : SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>;
def SDTX86FpSet   : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
def SDTX86Fld     : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
                                         SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>;
def SDTX86Fst     : SDTypeProfile<0, 3, [SDTCisFP<0>,
                                         SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>;
def SDTX86Fild    : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisPtrTy<1>,
                                         SDTCisVT<2, OtherVT>]>;
def SDTX86FpToIMem: SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;

def X86fpget   : SDNode<"X86ISD::FP_GET_RESULT", SDTX86FpGet,
                        [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
def X86fpset   : SDNode<"X86ISD::FP_SET_RESULT", SDTX86FpSet,
                        [SDNPHasChain, SDNPOutFlag]>;
def X86fld     : SDNode<"X86ISD::FLD",      SDTX86Fld,
                        [SDNPHasChain]>;
def X86fst     : SDNode<"X86ISD::FST",      SDTX86Fst,
                        [SDNPHasChain, SDNPInFlag]>;
def X86fild    : SDNode<"X86ISD::FILD",     SDTX86Fild,
                        [SDNPHasChain]>;
def X86fildflag: SDNode<"X86ISD::FILD_FLAG",SDTX86Fild,
                        [SDNPHasChain, SDNPOutFlag]>;
def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
                        [SDNPHasChain]>;
def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
                        [SDNPHasChain]>;
def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
                        [SDNPHasChain]>;

//===----------------------------------------------------------------------===//
// FPStack pattern fragments
//===----------------------------------------------------------------------===//

def fp64imm0 : PatLeaf<(f64 fpimm), [{
  return N->isExactlyValue(+0.0);
}]>;

def fp64immneg0 : PatLeaf<(f64 fpimm), [{
  return N->isExactlyValue(-0.0);
}]>;

def fp64imm1 : PatLeaf<(f64 fpimm), [{
  return N->isExactlyValue(+1.0);
}]>;

def fp64immneg1 : PatLeaf<(f64 fpimm), [{
  return N->isExactlyValue(-1.0);
}]>;

def extloadf64f32  : PatFrag<(ops node:$ptr), (f64 (extloadf32 node:$ptr))>;

// Some 'special' instructions
let usesCustomDAGSchedInserter = 1 in {  // Expanded by the scheduler.
  def FP_TO_INT16_IN_MEM : I<0, Pseudo,
                            (ops i16mem:$dst, RFP:$src),
                           "#FP_TO_INT16_IN_MEM PSEUDO!",
                           [(X86fp_to_i16mem RFP:$src, addr:$dst)]>;
  def FP_TO_INT32_IN_MEM : I<0, Pseudo,
                            (ops i32mem:$dst, RFP:$src),
                           "#FP_TO_INT32_IN_MEM PSEUDO!",
                           [(X86fp_to_i32mem RFP:$src, addr:$dst)]>;
  def FP_TO_INT64_IN_MEM : I<0, Pseudo,
                            (ops i64mem:$dst, RFP:$src),
                           "#FP_TO_INT64_IN_MEM PSEUDO!",
                           [(X86fp_to_i64mem RFP:$src, addr:$dst)]>;
}

let isTerminator = 1 in
  let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
    def FP_REG_KILL  : I<0, Pseudo, (ops), "#FP_REG_KILL", []>;

// All FP Stack operations are represented with two instructions here.  The
// first instruction, generated by the instruction selector, uses "RFP"
// registers: a traditional register file to reference floating point values.
// These instructions are all psuedo instructions and use the "Fp" prefix.
// The second instruction is defined with FPI, which is the actual instruction
// emitted by the assembler.  The FP stackifier pass converts one to the other
// after register allocation occurs.
//
// Note that the FpI instruction should have instruction selection info (e.g.
// a pattern) and the FPI instruction should have emission info (e.g. opcode
// encoding and asm printing info).

// FPI - Floating Point Instruction template.
class FPI<bits<8> o, Format F, dag ops, string asm> : I<o, F, ops, asm, []> {}

// FpI_ - Floating Point Psuedo Instruction template. Not Predicated.
class FpI_<dag ops, FPFormat fp, list<dag> pattern>
  : X86Inst<0, Pseudo, NoImm, ops, ""> {
  let FPForm = fp; let FPFormBits = FPForm.Value;
  let Pattern = pattern;
}

// Random Pseudo Instructions.
def FpGETRESULT : FpI_<(ops RFP:$dst), SpecialFP,
                  [(set RFP:$dst, X86fpget)]>;                    // FPR = ST(0)

let noResults = 1 in 
  def FpSETRESULT : FpI_<(ops RFP:$src), SpecialFP,
                        [(X86fpset RFP:$src)]>, Imp<[], [ST0]>;   // ST(0) = FPR

// FpI - Floating Point Psuedo Instruction template. Predicated on FPStack.
class FpI<dag ops, FPFormat fp, list<dag> pattern> :
  FpI_<ops, fp, pattern>, Requires<[FPStack]>;


def FpMOV       : FpI<(ops RFP:$dst, RFP:$src), SpecialFP, []>; // f1 = fmov f2

// Arithmetic
// Add, Sub, Mul, Div.
def FpADD : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
                [(set RFP:$dst, (fadd RFP:$src1, RFP:$src2))]>;
def FpSUB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
                [(set RFP:$dst, (fsub RFP:$src1, RFP:$src2))]>;
def FpMUL : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
                [(set RFP:$dst, (fmul RFP:$src1, RFP:$src2))]>;
def FpDIV : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
                [(set RFP:$dst, (fdiv RFP:$src1, RFP:$src2))]>;

class FPST0rInst<bits<8> o, string asm>
  : FPI<o, AddRegFrm, (ops RST:$op), asm>, D8;
class FPrST0Inst<bits<8> o, string asm>
  : FPI<o, AddRegFrm, (ops RST:$op), asm>, DC;
class FPrST0PInst<bits<8> o, string asm>
  : FPI<o, AddRegFrm, (ops RST:$op), asm>, DE;

// Binary Ops with a memory source.
def FpADD32m  : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fadd RFP:$src1,
                                     (extloadf64f32 addr:$src2)))]>;
                // ST(0) = ST(0) + [mem32]
def FpADD64m  : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fadd RFP:$src1, (loadf64 addr:$src2)))]>;
                // ST(0) = ST(0) + [mem64]
def FpMUL32m  : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fmul RFP:$src1,
                                     (extloadf64f32 addr:$src2)))]>;
                // ST(0) = ST(0) * [mem32]
def FpMUL64m  : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fmul RFP:$src1, (loadf64 addr:$src2)))]>;
                // ST(0) = ST(0) * [mem64]
def FpSUB32m  : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fsub RFP:$src1,
                                    (extloadf64f32 addr:$src2)))]>;
                // ST(0) = ST(0) - [mem32]
def FpSUB64m  : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fsub RFP:$src1, (loadf64 addr:$src2)))]>;
                // ST(0) = ST(0) - [mem64]
def FpSUBR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fsub (extloadf64f32 addr:$src2),
                                     RFP:$src1))]>;
                // ST(0) = [mem32] - ST(0)
def FpSUBR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fsub (loadf64 addr:$src2), RFP:$src1))]>;
                // ST(0) = [mem64] - ST(0)
def FpDIV32m  : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fdiv RFP:$src1,
                                    (extloadf64f32 addr:$src2)))]>;
                // ST(0) = ST(0) / [mem32]
def FpDIV64m  : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fdiv RFP:$src1, (loadf64 addr:$src2)))]>;
                // ST(0) = ST(0) / [mem64]
def FpDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fdiv (extloadf64f32 addr:$src2),
                                     RFP:$src1))]>;
                // ST(0) = [mem32] / ST(0)
def FpDIVR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fdiv (loadf64 addr:$src2), RFP:$src1))]>;
                // ST(0) = [mem64] / ST(0)


def FADD32m  : FPI<0xD8, MRM0m, (ops f32mem:$src), "fadd{s} $src">;
def FADD64m  : FPI<0xDC, MRM0m, (ops f64mem:$src), "fadd{l} $src">;
def FMUL32m  : FPI<0xD8, MRM1m, (ops f32mem:$src), "fmul{s} $src">;
def FMUL64m  : FPI<0xDC, MRM1m, (ops f64mem:$src), "fmul{l} $src">;
def FSUB32m  : FPI<0xD8, MRM4m, (ops f32mem:$src), "fsub{s} $src">;
def FSUB64m  : FPI<0xDC, MRM4m, (ops f64mem:$src), "fsub{l} $src">;
def FSUBR32m : FPI<0xD8, MRM5m, (ops f32mem:$src), "fsubr{s} $src">;
def FSUBR64m : FPI<0xDC, MRM5m, (ops f64mem:$src), "fsubr{l} $src">;
def FDIV32m  : FPI<0xD8, MRM6m, (ops f32mem:$src), "fdiv{s} $src">;
def FDIV64m  : FPI<0xDC, MRM6m, (ops f64mem:$src), "fdiv{l} $src">;
def FDIVR32m : FPI<0xD8, MRM7m, (ops f32mem:$src), "fdivr{s} $src">;
def FDIVR64m : FPI<0xDC, MRM7m, (ops f64mem:$src), "fdivr{l} $src">;

def FpIADD16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fadd RFP:$src1,
                                     (X86fild addr:$src2, i16)))]>;
                // ST(0) = ST(0) + [mem16int]
def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fadd RFP:$src1,
                                     (X86fild addr:$src2, i32)))]>;
                // ST(0) = ST(0) + [mem32int]
def FpIMUL16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fmul RFP:$src1,
                                     (X86fild addr:$src2, i16)))]>;
                // ST(0) = ST(0) * [mem16int]
def FpIMUL32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fmul RFP:$src1,
                                     (X86fild addr:$src2, i32)))]>;
                // ST(0) = ST(0) * [mem32int]
def FpISUB16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fsub RFP:$src1,
                                     (X86fild addr:$src2, i16)))]>;
                // ST(0) = ST(0) - [mem16int]
def FpISUB32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fsub RFP:$src1,
                                     (X86fild addr:$src2, i32)))]>;
                // ST(0) = ST(0) - [mem32int]
def FpISUBR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
                     [(set RFP:$dst, (fsub (X86fild addr:$src2, i16),
                                      RFP:$src1))]>;
                // ST(0) = [mem16int] - ST(0)
def FpISUBR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
                     [(set RFP:$dst, (fsub (X86fild addr:$src2, i32),
                                      RFP:$src1))]>;
                // ST(0) = [mem32int] - ST(0)
def FpIDIV16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fdiv RFP:$src1,
                                     (X86fild addr:$src2, i16)))]>;
                // ST(0) = ST(0) / [mem16int]
def FpIDIV32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
                    [(set RFP:$dst, (fdiv RFP:$src1,
                                     (X86fild addr:$src2, i32)))]>;
                // ST(0) = ST(0) / [mem32int]
def FpIDIVR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
                     [(set RFP:$dst, (fdiv (X86fild addr:$src2, i16),
                                      RFP:$src1))]>;
                // ST(0) = [mem16int] / ST(0)
def FpIDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
                     [(set RFP:$dst, (fdiv (X86fild addr:$src2, i32),
                                      RFP:$src1))]>;
                // ST(0) = [mem32int] / ST(0)

def FIADD16m  : FPI<0xDE, MRM0m, (ops i16mem:$src), "fiadd{s} $src">;
def FIADD32m  : FPI<0xDA, MRM0m, (ops i32mem:$src), "fiadd{l} $src">;
def FIMUL16m  : FPI<0xDE, MRM1m, (ops i16mem:$src), "fimul{s} $src">;
def FIMUL32m  : FPI<0xDA, MRM1m, (ops i32mem:$src), "fimul{l} $src">;
def FISUB16m  : FPI<0xDE, MRM4m, (ops i16mem:$src), "fisub{s} $src">;
def FISUB32m  : FPI<0xDA, MRM4m, (ops i32mem:$src), "fisub{l} $src">;
def FISUBR16m : FPI<0xDE, MRM5m, (ops i16mem:$src), "fisubr{s} $src">;
def FISUBR32m : FPI<0xDA, MRM5m, (ops i32mem:$src), "fisubr{l} $src">;
def FIDIV16m  : FPI<0xDE, MRM6m, (ops i16mem:$src), "fidiv{s} $src">;
def FIDIV32m  : FPI<0xDA, MRM6m, (ops i32mem:$src), "fidiv{l} $src">;
def FIDIVR16m : FPI<0xDE, MRM7m, (ops i16mem:$src), "fidivr{s} $src">;
def FIDIVR32m : FPI<0xDA, MRM7m, (ops i32mem:$src), "fidivr{l} $src">;

// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
// of some of the 'reverse' forms of the fsub and fdiv instructions.  As such,
// we have to put some 'r's in and take them out of weird places.
def FADDST0r   : FPST0rInst <0xC0, "fadd $op">;
def FADDrST0   : FPrST0Inst <0xC0, "fadd {%st(0), $op|$op, %ST(0)}">;
def FADDPrST0  : FPrST0PInst<0xC0, "faddp $op">;
def FSUBRST0r  : FPST0rInst <0xE8, "fsubr $op">;
def FSUBrST0   : FPrST0Inst <0xE8, "fsub{r} {%st(0), $op|$op, %ST(0)}">;
def FSUBPrST0  : FPrST0PInst<0xE8, "fsub{r}p $op">;
def FSUBST0r   : FPST0rInst <0xE0, "fsub $op">;
def FSUBRrST0  : FPrST0Inst <0xE0, "fsub{|r} {%st(0), $op|$op, %ST(0)}">;
def FSUBRPrST0 : FPrST0PInst<0xE0, "fsub{|r}p $op">;
def FMULST0r   : FPST0rInst <0xC8, "fmul $op">;
def FMULrST0   : FPrST0Inst <0xC8, "fmul {%st(0), $op|$op, %ST(0)}">;
def FMULPrST0  : FPrST0PInst<0xC8, "fmulp $op">;
def FDIVRST0r  : FPST0rInst <0xF8, "fdivr $op">;
def FDIVrST0   : FPrST0Inst <0xF8, "fdiv{r} {%st(0), $op|$op, %ST(0)}">;
def FDIVPrST0  : FPrST0PInst<0xF8, "fdiv{r}p $op">;
def FDIVST0r   : FPST0rInst <0xF0, "fdiv $op">;
def FDIVRrST0  : FPrST0Inst <0xF0, "fdiv{|r} {%st(0), $op|$op, %ST(0)}">;
def FDIVRPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p $op">;


// Unary operations.
def FpCHS  : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
                 [(set RFP:$dst, (fneg RFP:$src))]>;
def FpABS  : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
                 [(set RFP:$dst, (fabs RFP:$src))]>;
def FpSQRT : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
                 [(set RFP:$dst, (fsqrt RFP:$src))]>;
def FpSIN  : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
                 [(set RFP:$dst, (fsin RFP:$src))]>;
def FpCOS  : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
                 [(set RFP:$dst, (fcos RFP:$src))]>;
def FpTST  : FpI<(ops RFP:$src), OneArgFP,
                 []>;

def FCHS  : FPI<0xE0, RawFrm, (ops), "fchs">, D9;
def FABS  : FPI<0xE1, RawFrm, (ops), "fabs">, D9;
def FSQRT : FPI<0xFA, RawFrm, (ops), "fsqrt">, D9;
def FSIN  : FPI<0xFE, RawFrm, (ops), "fsin">, D9;
def FCOS  : FPI<0xFF, RawFrm, (ops), "fcos">, D9;
def FTST  : FPI<0xE4, RawFrm, (ops), "ftst">, D9;


// Floating point cmovs.
let isTwoAddress = 1 in {
  def FpCMOVB  : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
                                      X86_COND_B))]>;
  def FpCMOVBE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
                                      X86_COND_BE))]>;
  def FpCMOVE  : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
                                      X86_COND_E))]>;
  def FpCMOVP  : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
                                      X86_COND_P))]>;
  def FpCMOVNB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
                                      X86_COND_AE))]>;
  def FpCMOVNBE: FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
                                      X86_COND_A))]>;
  def FpCMOVNE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
                                      X86_COND_NE))]>;
  def FpCMOVNP : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
                     [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
                                      X86_COND_NP))]>;
}

def FCMOVB  : FPI<0xC0, AddRegFrm, (ops RST:$op),
                  "fcmovb {$op, %st(0)|%ST(0), $op}">, DA;
def FCMOVBE : FPI<0xD0, AddRegFrm, (ops RST:$op),
                  "fcmovbe {$op, %st(0)|%ST(0), $op}">, DA;
def FCMOVE  : FPI<0xC8, AddRegFrm, (ops RST:$op),
                  "fcmove {$op, %st(0)|%ST(0), $op}">, DA;
def FCMOVP  : FPI<0xD8, AddRegFrm, (ops RST:$op),
                  "fcmovu  {$op, %st(0)|%ST(0), $op}">, DA;
def FCMOVNB : FPI<0xC0, AddRegFrm, (ops RST:$op),
                  "fcmovnb {$op, %st(0)|%ST(0), $op}">, DB;
def FCMOVNBE  : FPI<0xD0, AddRegFrm, (ops RST:$op),
                  "fcmovnbe {$op, %st(0)|%ST(0), $op}">, DB;
def FCMOVNE : FPI<0xC8, AddRegFrm, (ops RST:$op),
                  "fcmovne {$op, %st(0)|%ST(0), $op}">, DB;
def FCMOVNP : FPI<0xD8, AddRegFrm, (ops RST:$op),
                  "fcmovnu {$op, %st(0)|%ST(0), $op}">, DB;

// Floating point loads & stores.
def FpLD32m  : FpI<(ops RFP:$dst, f32mem:$src), ZeroArgFP,
                   [(set RFP:$dst, (extloadf64f32 addr:$src))]>;
def FpLD64m  : FpI<(ops RFP:$dst, f64mem:$src), ZeroArgFP,
                   [(set RFP:$dst, (loadf64 addr:$src))]>;
def FpILD16m : FpI<(ops RFP:$dst, i16mem:$src), ZeroArgFP,
                   [(set RFP:$dst, (X86fild addr:$src, i16))]>;
def FpILD32m : FpI<(ops RFP:$dst, i32mem:$src), ZeroArgFP,
                   [(set RFP:$dst, (X86fild addr:$src, i32))]>;
def FpILD64m : FpI<(ops RFP:$dst, i64mem:$src), ZeroArgFP,
                   [(set RFP:$dst, (X86fild addr:$src, i64))]>;

def FpST32m   : FpI<(ops f32mem:$op, RFP:$src), OneArgFP,
                [(truncstoref32 RFP:$src, addr:$op)]>;
def FpST64m   : FpI<(ops f64mem:$op, RFP:$src), OneArgFP,
                [(store RFP:$src, addr:$op)]>;

def FpSTP32m  : FpI<(ops f32mem:$op, RFP:$src), OneArgFP, []>;
def FpSTP64m  : FpI<(ops f64mem:$op, RFP:$src), OneArgFP, []>;
def FpIST16m  : FpI<(ops i16mem:$op, RFP:$src), OneArgFP, []>;
def FpIST32m  : FpI<(ops i32mem:$op, RFP:$src), OneArgFP, []>;
def FpIST64m  : FpI<(ops i64mem:$op, RFP:$src), OneArgFP, []>;

def FLD32m   : FPI<0xD9, MRM0m, (ops f32mem:$src), "fld{s} $src">;
def FLD64m   : FPI<0xDD, MRM0m, (ops f64mem:$src), "fld{l} $src">;
def FILD16m  : FPI<0xDF, MRM0m, (ops i16mem:$src), "fild{s} $src">;
def FILD32m  : FPI<0xDB, MRM0m, (ops i32mem:$src), "fild{l} $src">;
def FILD64m  : FPI<0xDF, MRM5m, (ops i64mem:$src), "fild{ll} $src">;
def FST32m   : FPI<0xD9, MRM2m, (ops f32mem:$dst), "fst{s} $dst">;
def FST64m   : FPI<0xDD, MRM2m, (ops f64mem:$dst), "fst{l} $dst">;
def FSTP32m  : FPI<0xD9, MRM3m, (ops f32mem:$dst), "fstp{s} $dst">;
def FSTP64m  : FPI<0xDD, MRM3m, (ops f64mem:$dst), "fstp{l} $dst">;
def FIST16m  : FPI<0xDF, MRM2m, (ops i16mem:$dst), "fist{s} $dst">;
def FIST32m  : FPI<0xDB, MRM2m, (ops i32mem:$dst), "fist{l} $dst">;
def FISTP16m : FPI<0xDF, MRM3m, (ops i16mem:$dst), "fistp{s} $dst">;
def FISTP32m : FPI<0xDB, MRM3m, (ops i32mem:$dst), "fistp{l} $dst">;
def FISTP64m : FPI<0xDF, MRM7m, (ops i64mem:$dst), "fistp{ll} $dst">;

// FISTTP requires SSE3 even though it's a FPStack op.
def FpISTT16m  : FpI_<(ops i16mem:$op, RFP:$src), OneArgFP,
                [(X86fp_to_i16mem RFP:$src, addr:$op)]>,
                Requires<[HasSSE3]>;
def FpISTT32m  : FpI_<(ops i32mem:$op, RFP:$src), OneArgFP,
                [(X86fp_to_i32mem RFP:$src, addr:$op)]>,
                Requires<[HasSSE3]>;
def FpISTT64m  : FpI_<(ops i64mem:$op, RFP:$src), OneArgFP,
                [(X86fp_to_i64mem RFP:$src, addr:$op)]>,
                Requires<[HasSSE3]>;

def FISTTP16m : FPI<0xDF, MRM1m, (ops i16mem:$dst), "fisttp{s} $dst">;
def FISTTP32m : FPI<0xDB, MRM1m, (ops i32mem:$dst), "fisttp{l} $dst">;
def FISTTP64m : FPI<0xDD, MRM1m, (ops i64mem:$dst), "fisttp{ll} $dst">;

// FP Stack manipulation instructions.
def FLDrr   : FPI<0xC0, AddRegFrm, (ops RST:$op), "fld $op">, D9;
def FSTrr   : FPI<0xD0, AddRegFrm, (ops RST:$op), "fst $op">, DD;
def FSTPrr  : FPI<0xD8, AddRegFrm, (ops RST:$op), "fstp $op">, DD;
def FXCH    : FPI<0xC8, AddRegFrm, (ops RST:$op), "fxch $op">, D9;

// Floating point constant loads.
let isReMaterializable = 1 in {
def FpLD0 : FpI<(ops RFP:$dst), ZeroArgFP,
                [(set RFP:$dst, fp64imm0)]>;
def FpLD1 : FpI<(ops RFP:$dst), ZeroArgFP,
                [(set RFP:$dst, fp64imm1)]>;
}

def FLD0 : FPI<0xEE, RawFrm, (ops), "fldz">, D9;
def FLD1 : FPI<0xE8, RawFrm, (ops), "fld1">, D9;


// Floating point compares.
def FpUCOMr   : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP,
                    []>;  // FPSW = cmp ST(0) with ST(i)
def FpUCOMIr  : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP,
                    [(X86cmp RFP:$lhs, RFP:$rhs)]>; // CC = cmp ST(0) with ST(i)

def FUCOMr    : FPI<0xE0, AddRegFrm,    // FPSW = cmp ST(0) with ST(i)
                    (ops RST:$reg),
                    "fucom $reg">, DD, Imp<[ST0],[]>;
def FUCOMPr   : FPI<0xE8, AddRegFrm,    // FPSW = cmp ST(0) with ST(i), pop
                  (ops RST:$reg),
                  "fucomp $reg">, DD, Imp<[ST0],[]>;
def FUCOMPPr  : FPI<0xE9, RawFrm,       // cmp ST(0) with ST(1), pop, pop
                  (ops),
                  "fucompp">, DA, Imp<[ST0],[]>;

def FUCOMIr  : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i)
                   (ops RST:$reg),
                   "fucomi {$reg, %st(0)|%ST(0), $reg}">, DB, Imp<[ST0],[]>;
def FUCOMIPr : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i), pop
                 (ops RST:$reg),
                 "fucomip {$reg, %st(0)|%ST(0), $reg}">, DF, Imp<[ST0],[]>;


// Floating point flag ops.
def FNSTSW8r  : I<0xE0, RawFrm,                  // AX = fp flags
                  (ops), "fnstsw", []>, DF, Imp<[],[AX]>;

def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
                  (ops i16mem:$dst), "fnstcw $dst", []>;
def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
                  (ops i16mem:$dst), "fldcw $dst", []>;

//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//

// Required for RET of f32 / f64 values.
def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>;
def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>;

// Required for CALL which return f32 / f64 values.
def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>;
def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>;

// Floating point constant -0.0 and -1.0
def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>;
def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>;

// Used to conv. i64 to f64 since there isn't a SSE version.
def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>;