summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2004-02-12 17:53:22 +0000
committerChris Lattner <sabre@nondot.org>2004-02-12 17:53:22 +0000
commit915e5e56d7cc8e140d33202eed6244ed0356ed1f (patch)
tree4591d2b30c5a36850de5ccb513f9ccc15ce8ee16
parent33aec9efa926690c1cbd92314a92a8aec563b329 (diff)
downloadllvm-915e5e56d7cc8e140d33202eed6244ed0356ed1f.tar.gz
llvm-915e5e56d7cc8e140d33202eed6244ed0356ed1f.tar.bz2
llvm-915e5e56d7cc8e140d33202eed6244ed0356ed1f.tar.xz
Add support for the rep movs[bwd] instructions, and emit them when code
generating the llvm.memcpy intrinsic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@11351 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/InstSelectSimple.cpp53
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp4
-rw-r--r--lib/Target/X86/X86ISelSimple.cpp53
-rw-r--r--lib/Target/X86/X86InstrInfo.h18
-rw-r--r--lib/Target/X86/X86InstrInfo.td25
5 files changed, 138 insertions, 15 deletions
diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp
index 9df7697344..67849e2edb 100644
--- a/lib/Target/X86/InstSelectSimple.cpp
+++ b/lib/Target/X86/InstSelectSimple.cpp
@@ -1157,6 +1157,7 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
case Intrinsic::va_start:
case Intrinsic::va_copy:
case Intrinsic::va_end:
+ case Intrinsic::memcpy:
// We directly implement these intrinsics
break;
default:
@@ -1188,6 +1189,58 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) {
return;
case Intrinsic::va_end: return; // Noop on X86
+ case Intrinsic::memcpy: {
+ assert(CI.getNumOperands() == 5 && "Illegal llvm.memcpy call!");
+ unsigned Align = 1;
+ if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) {
+ Align = AlignC->getRawValue();
+ if (Align == 0) Align = 1;
+ }
+
+ // Turn the byte code into # iterations
+ unsigned ByteReg = getReg(CI.getOperand(3));
+ unsigned CountReg;
+
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ CountReg = makeAnotherReg(Type::IntTy);
+ BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(1);
+ break;
+ case 0: // DWORD aligned
+ CountReg = makeAnotherReg(Type::IntTy);
+ BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(2);
+ break;
+ case 1: // BYTE aligned
+ case 3: // BYTE aligned
+ CountReg = ByteReg;
+ break;
+ }
+
+ // No matter what the alignment is, we put the source in ESI, the
+ // destination in EDI, and the count in ECX.
+ TmpReg1 = getReg(CI.getOperand(1));
+ TmpReg2 = getReg(CI.getOperand(2));
+ BuildMI(BB, X86::MOVrr32, 1, X86::ECX).addReg(CountReg);
+ BuildMI(BB, X86::MOVrr32, 1, X86::EDI).addReg(TmpReg1);
+ BuildMI(BB, X86::MOVrr32, 1, X86::ESI).addReg(TmpReg2);
+
+ unsigned Bytes = getReg(CI.getOperand(3));
+ switch (Align & 3) {
+ case 1: // BYTE aligned
+ case 3: // BYTE aligned
+ BuildMI(BB, X86::REP_MOVSB, 0);
+ break;
+ case 2: // WORD aligned
+ BuildMI(BB, X86::REP_MOVSW, 0);
+ break;
+ case 0: // DWORD aligned
+ BuildMI(BB, X86::REP_MOVSD, 0);
+ break;
+ }
+
+ return;
+ }
+
default: assert(0 && "Error: unknown intrinsics should have been lowered!");
}
}
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index be6319cd0c..83e5e102d5 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -470,6 +470,9 @@ void Emitter::emitInstruction(MachineInstr &MI) {
unsigned Opcode = MI.getOpcode();
const TargetInstrDescriptor &Desc = II->get(Opcode);
+ // Emit the repeat opcode prefix as needed.
+ if ((Desc.TSFlags & X86II::Op0Mask) == X86II::REP) MCE.emitByte(0xF3);
+
// Emit instruction prefixes if necessary
if (Desc.TSFlags & X86II::OpSize) MCE.emitByte(0x66);// Operand size...
@@ -477,6 +480,7 @@ void Emitter::emitInstruction(MachineInstr &MI) {
case X86II::TB:
MCE.emitByte(0x0F); // Two-byte opcode prefix
break;
+ case X86II::REP: break; // already handled.
case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
MCE.emitByte(0xD8+
diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp
index 9df7697344..67849e2edb 100644
--- a/lib/Target/X86/X86ISelSimple.cpp
+++ b/lib/Target/X86/X86ISelSimple.cpp
@@ -1157,6 +1157,7 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
case Intrinsic::va_start:
case Intrinsic::va_copy:
case Intrinsic::va_end:
+ case Intrinsic::memcpy:
// We directly implement these intrinsics
break;
default:
@@ -1188,6 +1189,58 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) {
return;
case Intrinsic::va_end: return; // Noop on X86
+ case Intrinsic::memcpy: {
+ assert(CI.getNumOperands() == 5 && "Illegal llvm.memcpy call!");
+ unsigned Align = 1;
+ if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) {
+ Align = AlignC->getRawValue();
+ if (Align == 0) Align = 1;
+ }
+
+ // Turn the byte code into # iterations
+ unsigned ByteReg = getReg(CI.getOperand(3));
+ unsigned CountReg;
+
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ CountReg = makeAnotherReg(Type::IntTy);
+ BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(1);
+ break;
+ case 0: // DWORD aligned
+ CountReg = makeAnotherReg(Type::IntTy);
+ BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(2);
+ break;
+ case 1: // BYTE aligned
+ case 3: // BYTE aligned
+ CountReg = ByteReg;
+ break;
+ }
+
+ // No matter what the alignment is, we put the source in ESI, the
+ // destination in EDI, and the count in ECX.
+ TmpReg1 = getReg(CI.getOperand(1));
+ TmpReg2 = getReg(CI.getOperand(2));
+ BuildMI(BB, X86::MOVrr32, 1, X86::ECX).addReg(CountReg);
+ BuildMI(BB, X86::MOVrr32, 1, X86::EDI).addReg(TmpReg1);
+ BuildMI(BB, X86::MOVrr32, 1, X86::ESI).addReg(TmpReg2);
+
+ unsigned Bytes = getReg(CI.getOperand(3));
+ switch (Align & 3) {
+ case 1: // BYTE aligned
+ case 3: // BYTE aligned
+ BuildMI(BB, X86::REP_MOVSB, 0);
+ break;
+ case 2: // WORD aligned
+ BuildMI(BB, X86::REP_MOVSW, 0);
+ break;
+ case 0: // DWORD aligned
+ BuildMI(BB, X86::REP_MOVSD, 0);
+ break;
+ }
+
+ return;
+ }
+
default: assert(0 && "Error: unknown intrinsics should have been lowered!");
}
}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 98f9fe68d7..c6e3b76176 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -86,9 +86,9 @@ namespace X86II {
OpSize = 1 << 5,
// Op0Mask - There are several prefix bytes that are used to form two byte
- // opcodes. These are currently 0x0F, and 0xD8-0xDF. This mask is used to
- // obtain the setting of this field. If no bits in this field is set, there
- // is no prefix byte for obtaining a multibyte opcode.
+ // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
+ // used to obtain the setting of this field. If no bits in this field is
+ // set, there is no prefix byte for obtaining a multibyte opcode.
//
Op0Shift = 6,
Op0Mask = 0xF << Op0Shift,
@@ -97,12 +97,16 @@ namespace X86II {
// starts with a 0x0F byte before the real opcode.
TB = 1 << Op0Shift,
+ // REP - The 0xF3 prefix byte indicating repetition of the following
+ // instruction.
+ REP = 2 << Op0Shift,
+
// D8-DF - These escape opcodes are used by the floating point unit. These
// values must remain sequential.
- D8 = 2 << Op0Shift, D9 = 3 << Op0Shift,
- DA = 4 << Op0Shift, DB = 5 << Op0Shift,
- DC = 6 << Op0Shift, DD = 7 << Op0Shift,
- DE = 8 << Op0Shift, DF = 9 << Op0Shift,
+ D8 = 3 << Op0Shift, D9 = 4 << Op0Shift,
+ DA = 5 << Op0Shift, DB = 6 << Op0Shift,
+ DC = 7 << Op0Shift, DD = 8 << Op0Shift,
+ DE = 9 << Op0Shift, DF = 10 << Op0Shift,
//===------------------------------------------------------------------===//
// This three-bit field describes the size of a memory operand. Zero is
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 92f193f985..4bb1a9550a 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -92,14 +92,15 @@ class Pattern<dag P> {
// emitter that various prefix bytes are required.
class OpSize { bit hasOpSizePrefix = 1; }
class TB { bits<4> Prefix = 1; }
-class D8 { bits<4> Prefix = 2; }
-class D9 { bits<4> Prefix = 3; }
-class DA { bits<4> Prefix = 4; }
-class DB { bits<4> Prefix = 5; }
-class DC { bits<4> Prefix = 6; }
-class DD { bits<4> Prefix = 7; }
-class DE { bits<4> Prefix = 8; }
-class DF { bits<4> Prefix = 9; }
+class REP { bits<4> Prefix = 2; }
+class D8 { bits<4> Prefix = 3; }
+class D9 { bits<4> Prefix = 4; }
+class DA { bits<4> Prefix = 5; }
+class DB { bits<4> Prefix = 6; }
+class DC { bits<4> Prefix = 7; }
+class DD { bits<4> Prefix = 8; }
+class DE { bits<4> Prefix = 9; }
+class DF { bits<4> Prefix = 10; }
@@ -172,6 +173,14 @@ def XCHGrr32 : X86Inst<"xchg", 0x87, MRMDestReg, Arg32>; // xchg R32, R32
def LEAr16 : X86Inst<"lea", 0x8D, MRMSrcMem, Arg16>, OpSize; // R16 = lea [mem]
def LEAr32 : X86Inst<"lea", 0x8D, MRMSrcMem, Arg32>; // R32 = lea [mem]
+
+def REP_MOVSB : X86Inst<"rep movsb", 0xA4, RawFrm, NoArg>, REP,
+ Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>;
+def REP_MOVSW : X86Inst<"rep movsw", 0xA5, RawFrm, NoArg>, REP, OpSize,
+ Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>;
+def REP_MOVSD : X86Inst<"rep movsd", 0xA5, RawFrm, NoArg>, REP,
+ Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>;
+
//===----------------------------------------------------------------------===//
// Move Instructions...
//