lib/Bytecode/Writer/InstructionWriter.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290

//===-- InstructionWriter.cpp - Functions for writing instructions --------===//
//
// This file implements the routines for encoding instruction opcodes to a 
// bytecode stream.
//
//===----------------------------------------------------------------------===//

#include "WriterInternals.h"
#include "llvm/Module.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "Support/Statistic.h"
#include <algorithm>

static Statistic<> 
NumInstrs("bytecodewriter", "Number of instructions");

typedef unsigned char uchar;

// outputInstructionFormat0 - Output those wierd instructions that have a large
// number of operands or have large operands themselves...
//
// Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg<numargs-1>]
//
static void outputInstructionFormat0(const Instruction *I, unsigned Opcode,
				     const SlotCalculator &Table,
				     unsigned Type, std::deque<uchar> &Out) {
  // Opcode must have top two bits clear...
  output_vbr(Opcode << 2, Out);                  // Instruction Opcode ID
  output_vbr(Type, Out);                         // Result type

  unsigned NumArgs = I->getNumOperands();
  output_vbr(NumArgs + (isa<CastInst>(I) || isa<VANextInst>(I) ||
                        isa<VAArgInst>(I)), Out);

  for (unsigned i = 0; i < NumArgs; ++i) {
    int Slot = Table.getSlot(I->getOperand(i));
    assert(Slot >= 0 && "No slot number for value!?!?");      
    output_vbr((unsigned)Slot, Out);
  }

  if (isa<CastInst>(I) || isa<VAArgInst>(I)) {
    int Slot = Table.getSlot(I->getType());
    assert(Slot != -1 && "Cast return type unknown?");
    output_vbr((unsigned)Slot, Out);
  } else if (const VANextInst *VAI = dyn_cast<VANextInst>(I)) {
    int Slot = Table.getSlot(VAI->getArgType());
    assert(Slot != -1 && "VarArg argument type unknown?");
    output_vbr((unsigned)Slot, Out);
  }

  align32(Out);    // We must maintain correct alignment!
}


// outputInstrVarArgsCall - Output the absurdly annoying varargs function calls.
// This are more annoying than most because the signature of the call does not
// tell us anything about the types of the arguments in the varargs portion.
// Because of this, we encode (as type 0) all of the argument types explicitly
// before the argument value.  This really sucks, but you shouldn't be using
// varargs functions in your code! *death to printf*!
//
// Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg<numargs-1>]
//
static void outputInstrVarArgsCall(const Instruction *I, unsigned Opcode,
				   const SlotCalculator &Table, unsigned Type,
				   std::deque<uchar> &Out) {
  assert(isa<CallInst>(I) || isa<InvokeInst>(I));
  // Opcode must have top two bits clear...
  output_vbr(Opcode << 2, Out);                  // Instruction Opcode ID
  output_vbr(Type, Out);                         // Result type (varargs type)

  const PointerType *PTy = cast<PointerType>(I->getOperand(0)->getType());
  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
  unsigned NumParams = FTy->getNumParams();

  unsigned NumFixedOperands;
  if (isa<CallInst>(I)) {
    // Output an operand for the callee and each fixed argument, then two for
    // each variable argument.
    NumFixedOperands = 1+NumParams;
  } else {
    assert(isa<InvokeInst>(I) && "Not call or invoke??");
    // Output an operand for the callee and destinations, then two for each
    // variable argument.
    NumFixedOperands = 3+NumParams;
  }
  output_vbr(2 * I->getNumOperands()-NumFixedOperands, Out);

  // The type for the function has already been emitted in the type field of the
  // instruction.  Just emit the slot # now.
  for (unsigned i = 0; i != NumFixedOperands; ++i) {
    int Slot = Table.getSlot(I->getOperand(i));
    assert(Slot >= 0 && "No slot number for value!?!?");      
    output_vbr((unsigned)Slot, Out);
  }

  for (unsigned i = NumFixedOperands, e = I->getNumOperands(); i != e; ++i) {
    // Output Arg Type ID
    int Slot = Table.getSlot(I->getOperand(i)->getType());
    assert(Slot >= 0 && "No slot number for value!?!?");      
    output_vbr((unsigned)Slot, Out);
    
    // Output arg ID itself
    Slot = Table.getSlot(I->getOperand(i));
    assert(Slot >= 0 && "No slot number for value!?!?");      
    output_vbr((unsigned)Slot, Out);
  }
  align32(Out);    // We must maintain correct alignment!
}


// outputInstructionFormat1 - Output one operand instructions, knowing that no
// operand index is >= 2^12.
//
static void outputInstructionFormat1(const Instruction *I, unsigned Opcode,
				     const SlotCalculator &Table, int *Slots,
				     unsigned Type, std::deque<uchar> &Out) {
  // bits   Instruction format:
  // --------------------------
  // 01-00: Opcode type, fixed to 1.
  // 07-02: Opcode
  // 19-08: Resulting type plane
  // 31-20: Operand #1 (if set to (2^12-1), then zero operands)
  //
  unsigned Bits = 1 | (Opcode << 2) | (Type << 8) | (Slots[0] << 20);
  //  cerr << "1 " << IType << " " << Type << " " << Slots[0] << endl;
  output(Bits, Out);
}


// outputInstructionFormat2 - Output two operand instructions, knowing that no
// operand index is >= 2^8.
//
static void outputInstructionFormat2(const Instruction *I, unsigned Opcode,
				     const SlotCalculator &Table, int *Slots,
				     unsigned Type, std::deque<uchar> &Out) {
  // bits   Instruction format:
  // --------------------------
  // 01-00: Opcode type, fixed to 2.
  // 07-02: Opcode
  // 15-08: Resulting type plane
  // 23-16: Operand #1
  // 31-24: Operand #2  
  //
  unsigned Bits = 2 | (Opcode << 2) | (Type << 8) |
                    (Slots[0] << 16) | (Slots[1] << 24);
  //  cerr << "2 " << IType << " " << Type << " " << Slots[0] << " " 
  //       << Slots[1] << endl;
  output(Bits, Out);
}


// outputInstructionFormat3 - Output three operand instructions, knowing that no
// operand index is >= 2^6.
//
static void outputInstructionFormat3(const Instruction *I, unsigned Opcode,
				     const SlotCalculator &Table, int *Slots,
				     unsigned Type, std::deque<uchar> &Out) {
  // bits   Instruction format:
  // --------------------------
  // 01-00: Opcode type, fixed to 3.
  // 07-02: Opcode
  // 13-08: Resulting type plane
  // 19-14: Operand #1
  // 25-20: Operand #2
  // 31-26: Operand #3
  //
  unsigned Bits = 3 | (Opcode << 2) | (Type << 8) |
          (Slots[0] << 14) | (Slots[1] << 20) | (Slots[2] << 26);
  //cerr << "3 " << IType << " " << Type << " " << Slots[0] << " " 
  //     << Slots[1] << " " << Slots[2] << endl;
  output(Bits, Out);
}

void BytecodeWriter::processInstruction(const Instruction &I) {
  assert(I.getOpcode() < 62 && "Opcode too big???");
  unsigned Opcode = I.getOpcode();

  // Encode 'volatile load' as 62 and 'volatile store' as 63.
  if (isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile())
    Opcode = 62;
  if (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile())
    Opcode = 63;

  unsigned NumOperands = I.getNumOperands();
  int MaxOpSlot = 0;
  int Slots[3]; Slots[0] = (1 << 12)-1;   // Marker to signify 0 operands

  for (unsigned i = 0; i < NumOperands; ++i) {
    const Value *Def = I.getOperand(i);
    int slot = Table.getSlot(Def);
    assert(slot != -1 && "Broken bytecode!");
    if (slot > MaxOpSlot) MaxOpSlot = slot;
    if (i < 3) Slots[i] = slot;
  }

  // Figure out which type to encode with the instruction.  Typically we want
  // the type of the first parameter, as opposed to the type of the instruction
  // (for example, with setcc, we always know it returns bool, but the type of
  // the first param is actually interesting).  But if we have no arguments
  // we take the type of the instruction itself.  
  //
  const Type *Ty;
  switch (I.getOpcode()) {
  case Instruction::Malloc:
  case Instruction::Alloca:
    Ty = I.getType();  // Malloc & Alloca ALWAYS want to encode the return type
    break;
  case Instruction::Store:
    Ty = I.getOperand(1)->getType();  // Encode the pointer type...
    assert(isa<PointerType>(Ty) && "Store to nonpointer type!?!?");
    break;
  default:              // Otherwise use the default behavior...
    Ty = NumOperands ? I.getOperand(0)->getType() : I.getType();
    break;
  }

  unsigned Type;
  int Slot = Table.getSlot(Ty);
  assert(Slot != -1 && "Type not available!!?!");
  Type = (unsigned)Slot;

  // Make sure that we take the type number into consideration.  We don't want
  // to overflow the field size for the instruction format we select.
  //
  if (Slot > MaxOpSlot) MaxOpSlot = Slot;

  // Handle the special case for cast...
  if (isa<CastInst>(I) || isa<VAArgInst>(I)) {
    // Cast has to encode the destination type as the second argument in the
    // packet, or else we won't know what type to cast to!
    Slots[1] = Table.getSlot(I.getType());
    assert(Slots[1] != -1 && "Cast return type unknown?");
    if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1];
    NumOperands++;
  } else if (const VANextInst *VANI = dyn_cast<VANextInst>(&I)) {
    Slots[1] = Table.getSlot(VANI->getArgType());
    assert(Slots[1] != -1 && "va_next return type unknown?");
    if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1];
    NumOperands++;
  } else if (const CallInst *CI = dyn_cast<CallInst>(&I)){// Handle VarArg calls
    const PointerType *Ty = cast<PointerType>(CI->getCalledValue()->getType());
    if (cast<FunctionType>(Ty->getElementType())->isVarArg()) {
      outputInstrVarArgsCall(CI, Opcode, Table, Type, Out);
      return;
    }
  } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {// ...  & Invokes
    const PointerType *Ty = cast<PointerType>(II->getCalledValue()->getType());
    if (cast<FunctionType>(Ty->getElementType())->isVarArg()) {
      outputInstrVarArgsCall(II, Opcode, Table, Type, Out);
      return;
    }
  }

  ++NumInstrs;

  // Decide which instruction encoding to use.  This is determined primarily by
  // the number of operands, and secondarily by whether or not the max operand
  // will fit into the instruction encoding.  More operands == fewer bits per
  // operand.
  //
  switch (NumOperands) {
  case 0:
  case 1:
    if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops
      outputInstructionFormat1(&I, Opcode, Table, Slots, Type, Out);
      return;
    }
    break;

  case 2:
    if (MaxOpSlot < (1 << 8)) {
      outputInstructionFormat2(&I, Opcode, Table, Slots, Type, Out);
      return;
    }
    break;

  case 3:
    if (MaxOpSlot < (1 << 6)) {
      outputInstructionFormat3(&I, Opcode, Table, Slots, Type, Out);
      return;
    }
    break;
  }

  // If we weren't handled before here, we either have a large number of
  // operands or a large operand index that we are referring to.
  outputInstructionFormat0(&I, Opcode, Table, Type, Out);
}