x86assembler.cpp 136 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
// [AsmJit]
// Complete x86/x64 JIT and Remote Assembler for C++.
//
// [License]
// Zlib - See LICENSE.md file in the package.

// [Export]
#define ASMJIT_EXPORTS

// [Guard]
11
12
13
14
15
16
17
18
#include "../asmjit_build.h"
#if defined(ASMJIT_BUILD_X86)

// [Dependencies]
#include "../base/cpuinfo.h"
#include "../base/logging.h"
#include "../base/misc_p.h"
#include "../base/utils.h"
19
#include "../x86/x86assembler.h"
20
#include "../x86/x86logging_p.h"
21
22

// [Api-Begin]
23
#include "../asmjit_apibegin.h"
24
25
26
27

namespace asmjit {

// ============================================================================
28
// [FastUInt8]
29
30
// ============================================================================

31
32
33
34
35
36
37
38
39
#if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
typedef unsigned char FastUInt8;
#else
typedef unsigned int FastUInt8;
#endif

// ============================================================================
// [Constants]
// ============================================================================
40

41
42
43
44
//! \internal
//!
//! X86/X64 bytes used to encode important prefixes.
enum X86Byte {
45
  //! 1-byte REX prefix mask.
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
  kX86ByteRex = 0x40,

  //! 1-byte REX.W component.
  kX86ByteRexW = 0x08,

  //! 2-byte VEX prefix:
  //!   - `[0]` - `0xC5`.
  //!   - `[1]` - `RvvvvLpp`.
  kX86ByteVex2 = 0xC5,

  //! 3-byte VEX prefix.
  //!   - `[0]` - `0xC4`.
  //!   - `[1]` - `RXBmmmmm`.
  //!   - `[2]` - `WvvvvLpp`.
  kX86ByteVex3 = 0xC4,

  //! 3-byte XOP prefix.
  //!   - `[0]` - `0x8F`.
  //!   - `[1]` - `RXBmmmmm`.
  //!   - `[2]` - `WvvvvLpp`.
  kX86ByteXop3 = 0x8F,

  //! 4-byte EVEX prefix.
  //!   - `[0]` - `0x62`.
  //!   - `[1]` - Payload0 or `P[ 7: 0]` - `[R  X  B  R' 0  0  m  m]`.
  //!   - `[2]` - Payload1 or `P[15: 8]` - `[W  v  v  v  v  1  p  p]`.
  //!   - `[3]` - Payload2 or `P[23:16]` - `[z  L' L  b  V' a  a  a]`.
  //!
  //! Groups:
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  //!   - `P[ 1: 0]` - OPCODE: EVEX.mmmmm, only lowest 2 bits [1:0] used.
  //!   - `P[ 3: 2]` - ______: Must be 0.
  //!   - `P[    4]` - REG-ID: EVEX.R' - 5th bit of 'RRRRR'.
  //!   - `P[    5]` - REG-ID: EVEX.B  - 4th bit of 'BBBBB'.
  //!   - `P[    6]` - REG-ID: EVEX.X  - 5th bit of 'BBBBB' or 4th bit of 'XXXX' (with SIB).
  //!   - `P[    7]` - REG-ID: EVEX.R  - 4th bit of 'RRRRR'.
  //!   - `P[ 9: 8]` - OPCODE: EVEX.pp.
  //!   - `P[   10]` - ______: Must be 1.
  //!   - `P[14:11]` - REG-ID: 4 bits of 'VVVV'.
  //!   - `P[   15]` - OPCODE: EVEX.W.
  //!   - `P[18:16]` - REG-ID: K register k0...k7 (Merging/Zeroing Vector Ops).
  //!   - `P[   19]` - REG-ID: 5th bit of 'VVVVV'.
  //!   - `P[   20]` - OPCODE: Broadcast/Rounding Control/SAE bit.
  //!   - `P[22.21]` - OPCODE: Vector Length (L' and  L) / Rounding Control.
  //!   - `P[   23]` - OPCODE: Zeroing/Merging.
  kX86ByteEvex = 0x62
91
};
92

93
94
95
96
// AsmJit specific (used to encode VVVVV field in XOP/VEX/EVEX).
enum VexVVVVV {
  kVexVVVVVShift = 7,
  kVexVVVVVMask = 0x1F << kVexVVVVVShift
97
98
99
100
101
102
103
104
105
106
107
108
};

//! \internal
//!
//! Instruction 2-byte/3-byte opcode prefix definition.
struct X86OpCodeMM {
  uint8_t len;
  uint8_t data[3];
};

//! \internal
//!
109
110
//! Mandatory prefixes used to encode legacy [66, F3, F2] or [9B] byte.
static const uint8_t x86OpCodePP[8] = { 0x00, 0x66, 0xF3, 0xF2, 0x00, 0x00, 0x00, 0x9B };
111
112
113
114
115

//! \internal
//!
//! Instruction 2-byte/3-byte opcode prefix data.
static const X86OpCodeMM x86OpCodeMM[] = {
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
  { 0, { 0x00, 0x00, 0 } }, // #00 (0b0000).
  { 1, { 0x0F, 0x00, 0 } }, // #01 (0b0001).
  { 2, { 0x0F, 0x38, 0 } }, // #02 (0b0010).
  { 2, { 0x0F, 0x3A, 0 } }, // #03 (0b0011).
  { 2, { 0x0F, 0x01, 0 } }, // #04 (0b0100).
  { 0, { 0x00, 0x00, 0 } }, // #05 (0b0101).
  { 0, { 0x00, 0x00, 0 } }, // #06 (0b0110).
  { 0, { 0x00, 0x00, 0 } }, // #07 (0b0111).
  { 0, { 0x00, 0x00, 0 } }, // #08 (0b1000).
  { 0, { 0x00, 0x00, 0 } }, // #09 (0b1001).
  { 0, { 0x00, 0x00, 0 } }, // #0A (0b1010).
  { 0, { 0x00, 0x00, 0 } }, // #0B (0b1011).
  { 0, { 0x00, 0x00, 0 } }, // #0C (0b1100).
  { 0, { 0x00, 0x00, 0 } }, // #0D (0b1101).
  { 0, { 0x00, 0x00, 0 } }, // #0E (0b1110).
  { 0, { 0x00, 0x00, 0 } }  // #0F (0b1111).
};

static const uint8_t x86SegmentPrefix[8] = { 0x00, 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65, 0x00 };
static const uint8_t x86OpCodePushSeg[8] = { 0x00, 0x06, 0x0E, 0x16, 0x1E, 0xA0, 0xA8, 0x00 };
static const uint8_t x86OpCodePopSeg[8]  = { 0x00, 0x07, 0x00, 0x17, 0x1F, 0xA1, 0xA9, 0x00 };

// ============================================================================
// [asmjit::X86MemInfo | X86VEXPrefix | X86LLByRegType | X86CDisp8Table]
// ============================================================================

//! \internal
//!
//! Memory operand's info bits.
//!
//! A lookup table that contains various information based on the BASE and INDEX
//! information of a memory operand. This is much better and safer than playing
//! with IFs in the code and can check for errors must faster and better.
enum X86MemInfo_Enum {
  kX86MemInfo_0         = 0x00,

  kX86MemInfo_BaseGp    = 0x01, //!< Has BASE reg, REX.B can be 1, compatible with REX.B byte.
  kX86MemInfo_Index     = 0x02, //!< Has INDEX reg, REX.X can be 1, compatible with REX.X byte.

  kX86MemInfo_BaseLabel = 0x10, //!< Base is Label.
  kX86MemInfo_BaseRip   = 0x20, //!< Base is RIP.

  kX86MemInfo_67H_X86   = 0x40, //!< Address-size override in 32-bit mode.
  kX86MemInfo_67H_X64   = 0x80, //!< Address-size override in 64-bit mode.
  kX86MemInfo_67H_Mask  = 0xC0  //!< Contains all address-size override bits.
};

template<uint32_t X>
struct X86MemInfo_T {
  enum {
    B = (X     ) & 0x1F,
    I = (X >> 5) & 0x1F,

    kBase  = ((B >= X86Reg::kRegGpw  && B <= X86Reg::kRegGpq ) ? kX86MemInfo_BaseGp    :
              (B == X86Reg::kRegRip                          ) ? kX86MemInfo_BaseRip   :
              (B == Label::kLabelTag                         ) ? kX86MemInfo_BaseLabel : 0),

    kIndex = ((I >= X86Reg::kRegGpw  && I <= X86Reg::kRegGpq ) ? kX86MemInfo_Index     :
              (I >= X86Reg::kRegXmm  && I <= X86Reg::kRegZmm ) ? kX86MemInfo_Index     : 0),

    k67H   = ((B == X86Reg::kRegGpw  && I == X86Reg::kRegNone) ? kX86MemInfo_67H_X86   :
              (B == X86Reg::kRegGpd  && I == X86Reg::kRegNone) ? kX86MemInfo_67H_X64   :
              (B == X86Reg::kRegNone && I == X86Reg::kRegGpw ) ? kX86MemInfo_67H_X86   :
              (B == X86Reg::kRegNone && I == X86Reg::kRegGpd ) ? kX86MemInfo_67H_X64   :
              (B == X86Reg::kRegGpw  && I == X86Reg::kRegGpw ) ? kX86MemInfo_67H_X86   :
              (B == X86Reg::kRegGpd  && I == X86Reg::kRegGpd ) ? kX86MemInfo_67H_X64   :
              (B == X86Reg::kRegGpw  && I == X86Reg::kRegXmm ) ? kX86MemInfo_67H_X86   :
              (B == X86Reg::kRegGpd  && I == X86Reg::kRegXmm ) ? kX86MemInfo_67H_X64   :
              (B == X86Reg::kRegGpw  && I == X86Reg::kRegYmm ) ? kX86MemInfo_67H_X86   :
              (B == X86Reg::kRegGpd  && I == X86Reg::kRegYmm ) ? kX86MemInfo_67H_X64   :
              (B == X86Reg::kRegGpw  && I == X86Reg::kRegZmm ) ? kX86MemInfo_67H_X86   :
              (B == X86Reg::kRegGpd  && I == X86Reg::kRegZmm ) ? kX86MemInfo_67H_X64   :
              (B == Label::kLabelTag && I == X86Reg::kRegGpw ) ? kX86MemInfo_67H_X86   :
              (B == Label::kLabelTag && I == X86Reg::kRegGpd ) ? kX86MemInfo_67H_X64   : 0),

    kValue = kBase | kIndex | k67H | 0x04 | 0x08
  };
};

// The result stored in the LUT is a combination of
//   - 67H - Address override prefix - depends on BASE+INDEX register types and
//           the target architecture.
//   - REX - A possible combination of REX.[B|X|R|W] bits in REX prefix where
//           REX.B and REX.X are possibly masked out, but REX.R and REX.W are
//           kept as is.
static const uint8_t x86MemInfo[] = { ASMJIT_TABLE_T_1024(X86MemInfo_T, kValue, 0) };

// VEX3 or XOP xor bits applied to the opcode before emitted. The index to this
// table is 'mmmmm' value, which contains all we need. This is only used by a
// 3 BYTE VEX and XOP prefixes, 2 BYTE VEX prefix is handled differently. The
// idea is to minimize the difference between VEX3 vs XOP when encoding VEX
// or XOP instruction. This should minimize the code required to emit such
// instructions and should also make it faster as we don't need any branch to
// decide between VEX3 vs XOP.
//            ____    ___
// [_OPCODE_|WvvvvLpp|RXBmmmmm|VEX3_XOP]
template<uint32_t X>
struct X86VEXPrefix_T {
  enum { kValue = ((X & 0x08) ? kX86ByteXop3 : kX86ByteVex3) | (0xF << 19) | (0x7 << 13) };
};
static const uint32_t x86VEXPrefix[] = { ASMJIT_TABLE_T_16(X86VEXPrefix_T, kValue, 0) };

// Table that contains LL opcode field addressed by a register size / 16. It's
// used to propagate L.256 or L.512 when YMM or ZMM registers are used,
// respectively.
template<uint32_t X>
struct X86LLBySizeDiv16_T {
  enum {
    kValue = (X & (64 >> 4)) ? X86Inst::kOpCode_LL_512 :
             (X & (32 >> 4)) ? X86Inst::kOpCode_LL_256 : 0
  };
};
static const uint32_t x86LLBySizeDiv16[] = { ASMJIT_TABLE_T_16(X86LLBySizeDiv16_T, kValue, 0) };

// Table that contains LL opcode field addressed by a register size / 16. It's
// used to propagate L.256 or L.512 when YMM or ZMM registers are used,
// respectively.
template<uint32_t X>
struct X86LLByRegType_T {
  enum {
    kValue = X == X86Reg::kRegZmm ? X86Inst::kOpCode_LL_512 :
             X == X86Reg::kRegYmm ? X86Inst::kOpCode_LL_256 : 0
  };
};
static const uint32_t x86LLByRegType[] = { ASMJIT_TABLE_T_16(X86LLByRegType_T, kValue, 0) };

// Table that contains a scale (shift left) based on 'TTWLL' field and
// the instruction's tuple-type (TT) field. The scale is then applied to
// the BASE-N stored in each opcode to calculate the final compressed
// displacement used by all EVEX encoded instructions.
template<uint32_t X>
struct X86CDisp8SHL_T {
  enum {
    TT = (((X) >> 3) << X86Inst::kOpCode_CDTT_Shift),
    LL = (((X) >> 0) & 0x3),
    W  = (((X) >> 2) & 0x1),

    kValue = (TT == X86Inst::kOpCode_CDTT_None ? ((LL==0) ? 0 : (LL==1) ? 0   : 0  ) :
              TT == X86Inst::kOpCode_CDTT_ByLL ? ((LL==0) ? 0 : (LL==1) ? 1   : 2  ) :
              TT == X86Inst::kOpCode_CDTT_T1W  ? ((LL==0) ? W : (LL==1) ? 1+W : 2+W) :
              TT == X86Inst::kOpCode_CDTT_DUP  ? ((LL==0) ? 0 : (LL==1) ? 2   : 3  ) : 0 ) << X86Inst::kOpCode_CDSHL_Shift
  };
};
static const uint32_t x86CDisp8SHL[] = { ASMJIT_TABLE_T_32(X86CDisp8SHL_T, kValue, 0) };

// Table that contains MOD byte of a 16-bit [BASE + disp] address.
//   0xFF == Invalid.
static const uint8_t x86Mod16BaseTable[8] = {
  0xFF, // AX -> N/A.
  0xFF, // CX -> N/A.
  0xFF, // DX -> N/A.
  0x07, // BX -> 111.
  0xFF, // SP -> N/A.
  0x06, // BP -> 110.
  0x04, // SI -> 100.
  0x05  // DI -> 101.
272
273
};

274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
// Table that contains MOD byte of a 16-bit [BASE + INDEX + disp] combination.
//   0xFF == Invalid.
template<uint32_t X>
struct X86Mod16BaseIndexTable_T {
  enum {
    B = X >> 3,
    I = X & 0x7,

    kValue = ((B == X86Gp::kIdBx && I == X86Gp::kIdSi) || (B == X86Gp::kIdSi && I == X86Gp::kIdBx)) ? 0x00 :
             ((B == X86Gp::kIdBx && I == X86Gp::kIdDi) || (B == X86Gp::kIdDi && I == X86Gp::kIdBx)) ? 0x01 :
             ((B == X86Gp::kIdBp && I == X86Gp::kIdSi) || (B == X86Gp::kIdSi && I == X86Gp::kIdBp)) ? 0x02 :
             ((B == X86Gp::kIdBp && I == X86Gp::kIdDi) || (B == X86Gp::kIdDi && I == X86Gp::kIdBp)) ? 0x03 : 0xFF
  };
};
static const uint8_t x86Mod16BaseIndexTable[] = { ASMJIT_TABLE_T_64(X86Mod16BaseIndexTable_T, kValue, 0) };
289
290

// ============================================================================
291
// [asmjit::X86Assembler - Helpers]
292
293
// ============================================================================

294
295
296
297
static ASMJIT_INLINE bool x86IsJmpOrCall(uint32_t instId) noexcept {
  return instId == X86Inst::kIdJmp ||
         instId == X86Inst::kIdCall;
}
298

299
300
static ASMJIT_INLINE bool x86IsImplicitMem(const Operand_& op, uint32_t base) noexcept {
  return op.isMem() && op.as<X86Mem>().getBaseId() == base;
301
302
}

303
304
static ASMJIT_INLINE int64_t x86SignExtend32To64(int64_t imm) noexcept {
  return static_cast<int64_t>(static_cast<int32_t>(imm & 0xFFFFFFFF));
305
306
}

307
308
309
//! Get `O` field of `opCode`.
static ASMJIT_INLINE uint32_t x86ExtractO(uint32_t opCode) noexcept {
  return (opCode >> X86Inst::kOpCode_O_Shift) & 0x07;
310
311
}

312
313
314
315
316
static ASMJIT_INLINE uint32_t x86ExtractREX(uint32_t opCode, uint32_t options) noexcept {
  // kOpCode_REX was designed in a way that when shifted there will be no bytes
  // set except REX.[B|X|R|W]. The returned value forms a real REX prefix byte.
  // This case is tested by `X86Inst.cpp`.
  return (opCode | options) >> X86Inst::kOpCode_REX_Shift;
317
318
}

319
320
321
//! Combine `regId` and `vvvvvId` into a single value (used by AVX and AVX-512).
static ASMJIT_INLINE uint32_t x86PackRegAndVvvvv(uint32_t regId, uint32_t vvvvvId) noexcept {
  return regId + (vvvvvId << kVexVVVVVShift);
322
323
}

324
325
static ASMJIT_INLINE uint32_t x86OpCodeLByVMem(const Operand_& op) noexcept {
  return x86LLByRegType[op.as<X86Mem>().getIndexType()];
326
327
}

328
329
static ASMJIT_INLINE uint32_t x86OpCodeLBySize(uint32_t size) noexcept {
  return x86LLBySizeDiv16[size / 16];
330
331
}

332
333
334
335
static ASMJIT_INLINE uint32_t x86ExtractLLMM(uint32_t opCode, uint32_t options) noexcept {
  uint32_t x = opCode & (X86Inst::kOpCode_LL_Mask | X86Inst::kOpCode_MM_Mask);
  uint32_t y = options & X86Inst::kOptionVex3;
  return (x | y) >> X86Inst::kOpCode_MM_Shift;
336
337
}

338
339
340
341
342
343
344
345
346
347
348
349
350
351
//! Encode MOD byte.
static ASMJIT_INLINE uint32_t x86EncodeMod(uint32_t m, uint32_t o, uint32_t rm) noexcept {
  ASMJIT_ASSERT(m <= 3);
  ASMJIT_ASSERT(o <= 7);
  ASMJIT_ASSERT(rm <= 7);
  return (m << 6) + (o << 3) + rm;
}

//! Encode SIB byte.
static ASMJIT_INLINE uint32_t x86EncodeSib(uint32_t s, uint32_t i, uint32_t b) noexcept {
  ASMJIT_ASSERT(s <= 3);
  ASMJIT_ASSERT(i <= 7);
  ASMJIT_ASSERT(b <= 7);
  return (s << 6) + (i << 3) + b;
352
353
}

354
// ============================================================================
355
// [asmjit::X86Assembler - Construction / Destruction]
356
357
// ============================================================================

358
359
360
361
362
X86Assembler::X86Assembler(CodeHolder* code) noexcept : Assembler() {
  if (code)
    code->attach(this);
}
X86Assembler::~X86Assembler() noexcept {}
363

364
365
366
// ============================================================================
// [asmjit::X86Assembler - Events]
// ============================================================================
367

368
369
370
371
Error X86Assembler::onAttach(CodeHolder* code) noexcept {
  uint32_t archType = code->getArchType();
  if (!ArchInfo::isX86Family(archType))
    return DebugUtils::errored(kErrorInvalidArch);
372

373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
  ASMJIT_PROPAGATE(Base::onAttach(code));

  if (archType == ArchInfo::kTypeX86) {
    // 32 bit architecture - X86.
    _setAddressOverrideMask(kX86MemInfo_67H_X86);
    _globalOptions |= X86Inst::_kOptionInvalidRex;
    _nativeGpArray = x86OpData.gpd;
  }
  else {
    // 64 bit architecture - X64 or X32.
    _setAddressOverrideMask(kX86MemInfo_67H_X64);
    _nativeGpArray = x86OpData.gpq;
  }

  _nativeGpReg = _nativeGpArray[0];
  return kErrorOk;
}

Error X86Assembler::onDetach(CodeHolder* code) noexcept {
  return Base::onDetach(code);
}

// ============================================================================
// [asmjit::X86Assembler - Helpers]
// ============================================================================
398

399
400
401
402
#define EMIT_BYTE(VAL)                               \
  do {                                               \
    cursor[0] = static_cast<uint8_t>((VAL) & 0xFFU); \
    cursor += 1;                                     \
403
404
  } while (0)

405
406
407
408
409
#define EMIT_16(VAL)                                 \
  do {                                               \
    Utils::writeU16uLE(cursor,                       \
      static_cast<uint32_t>((VAL) & 0xFFFFU));       \
    cursor += 2;                                     \
410
411
  } while (0)

412
413
414
415
416
#define EMIT_32(VAL)                                 \
  do {                                               \
    Utils::writeU32uLE(cursor,                       \
      static_cast<uint32_t>((VAL) & 0xFFFFFFFFU));   \
    cursor += 4;                                     \
417
418
  } while (0)

419
420
421
#define ADD_66H_P(EXP)                                                   \
  do {                                                                   \
    opCode |= (static_cast<uint32_t>(EXP) << X86Inst::kOpCode_PP_Shift); \
422
423
  } while (0)

424
425
426
427
#define ADD_66H_P_BY_SIZE(SIZE)                                          \
  do {                                                                   \
    opCode |= (static_cast<uint32_t>((SIZE) & 0x02))                     \
           << (X86Inst::kOpCode_PP_Shift - 1);                           \
428
429
  } while (0)

430
431
432
433
#define ADD_REX_W(EXP)                                                   \
  do {                                                                   \
    if (EXP)                                                             \
      opCode |= X86Inst::kOpCode_W;                                      \
434
435
  } while (0)

436
437
438
439
#define ADD_REX_W_BY_SIZE(SIZE)                                          \
  do {                                                                   \
    if ((SIZE) == 8)                                                     \
      opCode |= X86Inst::kOpCode_W;                                      \
440
441
  } while (0)

442
443
444
445
#define ADD_PREFIX_BY_SIZE(SIZE)                                         \
  do {                                                                   \
    ADD_66H_P_BY_SIZE(SIZE);                                             \
    ADD_REX_W_BY_SIZE(SIZE);                                             \
446
447
  } while (0)

448
449
450
#define ADD_VEX_W(EXP)                                                   \
  do {                                                                   \
    opCode |= static_cast<uint32_t>(EXP) << X86Inst::kOpCode_W_Shift;    \
451
452
  } while (0)

453
454
455
456
457
458
459
460
461
#define EMIT_PP(OPCODE)                                                  \
  do {                                                                   \
    uint32_t ppIndex =                                                   \
      ((OPCODE                   ) >> X86Inst::kOpCode_PP_Shift) &       \
      (X86Inst::kOpCode_PP_FPUMask >> X86Inst::kOpCode_PP_Shift) ;       \
    uint8_t ppCode = x86OpCodePP[ppIndex];                               \
                                                                         \
    cursor[0] = ppCode;                                                  \
    cursor   += ppIndex != 0;                                            \
462
463
  } while (0)

464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
#define EMIT_MM_OP(OPCODE)                                               \
  do {                                                                   \
    uint32_t op = OPCODE & (0x00FF | X86Inst::kOpCode_MM_Mask);          \
                                                                         \
    uint32_t mmIndex = op >> X86Inst::kOpCode_MM_Shift;                  \
    const X86OpCodeMM& mmCode = x86OpCodeMM[mmIndex];                    \
                                                                         \
    if (mmIndex) {                                                       \
      cursor[0] = mmCode.data[0];                                        \
      cursor[1] = mmCode.data[1];                                        \
      cursor   += mmCode.len;                                            \
    }                                                                    \
                                                                         \
    EMIT_BYTE(op);                                                       \
  } while (0)
479

480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
// If the operand is BPL|SPL|SIL|DIL|R8B-15B
//   - Force REX prefix
// If the operand is AH|BH|CH|DH
//   - patch its index from 0..3 to 4..7 as encoded by X86.
//   - Disallow REX prefix.
#define FIXUP_GPB(REG_OP, REG_ID, ...)                                   \
  do {                                                                   \
    if (!static_cast<const X86Gp&>(REG_OP).isGpbHi()) {                  \
      options |= (REG_ID >= 4) ? X86Inst::kOptionRex : 0;                \
    }                                                                    \
    else {                                                               \
      options |= X86Inst::_kOptionInvalidRex;                            \
      REG_ID += 4;                                                       \
    }                                                                    \
  } while (0)
495

496
497
498
499
#define ENC_OPS1(OP0)                     ((Operand::kOp##OP0))
#define ENC_OPS2(OP0, OP1)                ((Operand::kOp##OP0) + ((Operand::kOp##OP1) << 3))
#define ENC_OPS3(OP0, OP1, OP2)           ((Operand::kOp##OP0) + ((Operand::kOp##OP1) << 3) + ((Operand::kOp##OP2) << 6))
#define ENC_OPS4(OP0, OP1, OP2, OP3)      ((Operand::kOp##OP0) + ((Operand::kOp##OP1) << 3) + ((Operand::kOp##OP2) << 6) + ((Operand::kOp##OP3) << 9))
500
501

// ============================================================================
502
// [asmjit::X86Assembler - Emit]
503
504
// ============================================================================

505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
Error X86Assembler::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) {
  Error err;

  const Operand_* rmRel;         // Memory operand or operand that holds Label|Imm.
  uint32_t rmInfo;               // Memory operand's info based on x86MemInfo.
  uint32_t rbReg;                // Memory base or modRM register.
  uint32_t rxReg;                // Memory index register.
  uint32_t opReg;                // ModR/M opcode or register id.
  uint32_t opCode;               // Instruction opcode.

  LabelEntry* label;             // Label entry.
  RelocEntry* re = nullptr;      // Relocation entry.
  int32_t relOffset;             // Relative offset
  FastUInt8 relSize = 0;         // Relative size.

  int64_t imVal = 0;             // Immediate value (must be 64-bit).
  FastUInt8 imLen = 0;           // Immediate length.

  const uint32_t kSHR_W_PP = X86Inst::kOpCode_PP_Shift - 16;
  const uint32_t kSHR_W_EW = X86Inst::kOpCode_EW_Shift - 23;

  uint8_t* cursor = _bufferPtr;
  uint32_t options = static_cast<uint32_t>(instId >= X86Inst::_kIdCount)       |
                     static_cast<uint32_t>((size_t)(_bufferEnd - cursor) < 16) |
                     getGlobalOptions() | getOptions();

  const X86Inst* instData = X86InstDB::instData + instId;
  const X86Inst::CommonData* commonData;

  // Handle failure and rare cases first.
  const uint32_t kErrorsAndSpecialCases =
    CodeEmitter::kOptionMaybeFailureCase | // Error and buffer check.
    CodeEmitter::kOptionStrictValidation | // Strict validation.
    X86Inst::kOptionRep                  | // REP/REPZ prefix.
    X86Inst::kOptionRepnz                | // REPNZ prefix.
    X86Inst::kOptionLock                 | // LOCK prefix.
    X86Inst::kOptionXAcquire             | // XACQUIRE prefix.
    X86Inst::kOptionXRelease             ; // XRELEASE prefix.

  // Signature of the first 3 operands.
  uint32_t isign3 = o0.getOp() + (o1.getOp() << 3) + (o2.getOp() << 6);

  if (ASMJIT_UNLIKELY(options & kErrorsAndSpecialCases)) {
    // Don't do anything if we are in error state.
    if (_lastError) return _lastError;

    if (options & CodeEmitter::kOptionMaybeFailureCase) {
      // Unknown instruction.
      if (ASMJIT_UNLIKELY(instId >= X86Inst::_kIdCount))
        goto InvalidArgument;

      // Grow request, happens rarely.
      if ((size_t)(_bufferEnd - cursor) < 16) {
        err = _code->growBuffer(&_section->_buffer, 16);
        if (ASMJIT_UNLIKELY(err)) goto Failed;

        cursor = _bufferPtr;
        options &= ~1;
      }
    }
565

566
567
568
569
    // Strict validation.
#if !defined(ASMJIT_DISABLE_VALIDATION)
    if (options & CodeEmitter::kOptionStrictValidation) {
      Operand_ opArray[6];
570

571
572
573
574
      opArray[0].copyFrom(o0);
      opArray[1].copyFrom(o1);
      opArray[2].copyFrom(o2);
      opArray[3].copyFrom(o3);
575

576
577
578
579
580
581
582
583
      if (options & kOptionOp4Op5Used) {
        opArray[4].copyFrom(_op4);
        opArray[5].copyFrom(_op5);
      }
      else {
        opArray[4].reset();
        opArray[5].reset();
      }
584

585
586
587
588
      err = Inst::validate(getArchType(), Inst::Detail(instId, options, _extraReg), opArray, 6);
      if (ASMJIT_UNLIKELY(err)) goto Failed;
    }
#endif // !ASMJIT_DISABLE_VALIDATION
589

590
    uint32_t iFlags = instData->getFlags();
591

592
593
594
    // LOCK, XACQUIRE, and XRELEASE prefixes.
    if (options & X86Inst::kOptionLock) {
      bool xAcqRel = (options & (X86Inst::kOptionXAcquire | X86Inst::kOptionXRelease)) != 0;
595

596
597
      if (ASMJIT_UNLIKELY(!(iFlags & (X86Inst::kFlagLock)) && !xAcqRel))
        goto InvalidLockPrefix;
598

599
600
601
      if (xAcqRel) {
        if (ASMJIT_UNLIKELY((options & X86Inst::kOptionXAcquire) && !(iFlags & X86Inst::kFlagXAcquire)))
          goto InvalidXAcquirePrefix;
602

603
604
        if (ASMJIT_UNLIKELY((options & X86Inst::kOptionXRelease) && !(iFlags & X86Inst::kFlagXRelease)))
          goto InvalidXReleasePrefix;
605

606
607
        EMIT_BYTE((options & X86Inst::kOptionXAcquire) ? 0xF2 : 0xF3);
      }
608

609
610
      EMIT_BYTE(0xF0);
    }
611

612
613
614
615
    // REP and REPNZ prefixes.
    if (options & (X86Inst::kOptionRep | X86Inst::kOptionRepnz)) {
      if (ASMJIT_UNLIKELY(!(iFlags & (X86Inst::kFlagRep | X86Inst::kFlagRepnz))))
        goto InvalidRepPrefix;
616

617
618
      if (_extraReg.isValid() && ASMJIT_UNLIKELY(_extraReg.getKind() != X86Reg::kKindGp || _extraReg.getId() != X86Gp::kIdCx))
        goto InvalidRepPrefix;
619

620
621
      EMIT_BYTE((options & X86Inst::kOptionRepnz) ? 0xF2 : 0xF3);
    }
622
623
  }

624
625
626
  // --------------------------------------------------------------------------
  // [Encoding Scope]
  // --------------------------------------------------------------------------
627

628
629
630
  opCode = instData->getMainOpCode();
  opReg = x86ExtractO(opCode);
  commonData = &instData->getCommonData();
631

632
633
634
  switch (instData->getEncodingType()) {
    case X86Inst::kEncodingNone:
      goto EmitDone;
635

636
637
638
    // ------------------------------------------------------------------------
    // [X86]
    // ------------------------------------------------------------------------
639

640
641
    case X86Inst::kEncodingX86Op:
      goto EmitX86Op;
642

643
644
645
    case X86Inst::kEncodingX86Op_O_I8:
      if (ASMJIT_UNLIKELY(isign3 != ENC_OPS1(Imm)))
        goto InvalidInstruction;
646

647
648
649
      imVal = o0.as<Imm>().getUInt8();
      imLen = 1;
      ASMJIT_FALLTHROUGH;
650

651
652
653
    case X86Inst::kEncodingX86Op_O:
      rbReg = 0;
      goto EmitX86R;
654

655
656
657
    case X86Inst::kEncodingX86Op_xAX:
      if (isign3 == 0)
        goto EmitX86Op;
658

659
660
661
      if (isign3 == ENC_OPS1(Reg) && o0.getId() == X86Gp::kIdAx)
        goto EmitX86Op;
      break;
662

663
664
665
    case X86Inst::kEncodingX86Op_xDX_xAX:
      if (isign3 == 0)
        goto EmitX86Op;
666

667
668
669
670
      if (isign3 == ENC_OPS2(Reg, Reg) && o0.getId() == X86Gp::kIdDx &&
                                          o1.getId() == X86Gp::kIdAx)
        goto EmitX86Op;
      break;
671

672
673
674
    case X86Inst::kEncodingX86Op_ZAX:
      if (isign3 == 0)
        goto EmitX86Op;
675

676
677
678
      rmRel = &o0;
      if (isign3 == ENC_OPS1(Mem) && x86IsImplicitMem(o0, X86Gp::kIdAx))
        goto EmitX86OpImplicitMem;
679

680
      break;
681

682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
    case X86Inst::kEncodingX86I_xAX:
      // Implicit form.
      if (isign3 == ENC_OPS1(Imm)) {
        imVal = o0.as<Imm>().getUInt8();
        imLen = 1;
        goto EmitX86Op;
      }

      // Explicit form.
      if (isign3 == ENC_OPS2(Reg, Imm) && o0.getId() == X86Gp::kIdAx) {
        imVal = o1.as<Imm>().getUInt8();
        imLen = 1;
        goto EmitX86Op;
      }
      break;
697

698
699
700
    case X86Inst::kEncodingX86M:
      rbReg = o0.getId();
      ADD_PREFIX_BY_SIZE(o0.getSize());
701

702
703
      if (isign3 == ENC_OPS1(Reg))
        goto EmitX86R;
704

705
706
707
708
      rmRel = &o0;
      if (isign3 == ENC_OPS1(Mem))
        goto EmitX86M;
      break;
709

710
711
712
713
714
715
716
717
718
719
720
721
722
    case X86Inst::kEncodingX86M_GPB_MulDiv:
CaseX86M_GPB_MulDiv:
      // Explicit form?
      if (isign3 > 0x7) {
        // [AX] <- [AX] div|mul r8.
        if (isign3 == ENC_OPS2(Reg, Reg)) {
          if (ASMJIT_UNLIKELY(!X86Reg::isGpw(o0, X86Gp::kIdAx) || !X86Reg::isGpb(o1)))
            goto InvalidInstruction;

          rbReg = o1.getId();
          FIXUP_GPB(o1, rbReg);
          goto EmitX86R;
        }
723

724
725
726
727
        // [AX] <- [AX] div|mul m8.
        if (isign3 == ENC_OPS2(Reg, Mem)) {
          if (ASMJIT_UNLIKELY(!X86Reg::isGpw(o0, X86Gp::kIdAx)))
            goto InvalidInstruction;
728

729
730
731
          rmRel = &o1;
          goto EmitX86M;
        }
732

733
734
735
736
737
        // [?DX:?AX] <- [?DX:?AX] div|mul r16|r32|r64
        if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
          if (ASMJIT_UNLIKELY(o0.getSize() != o1.getSize()))
            goto InvalidInstruction;
          rbReg = o2.getId();
738

739
740
741
742
          opCode++;
          ADD_PREFIX_BY_SIZE(o0.getSize());
          goto EmitX86R;
        }
743

744
745
746
747
748
        // [?DX:?AX] <- [?DX:?AX] div|mul m16|m32|m64
        if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
          if (ASMJIT_UNLIKELY(o0.getSize() != o1.getSize()))
            goto InvalidInstruction;
          rmRel = &o2;
749

750
751
752
753
          opCode++;
          ADD_PREFIX_BY_SIZE(o0.getSize());
          goto EmitX86M;
        }
754

755
756
        goto InvalidInstruction;
      }
757

758
      ASMJIT_FALLTHROUGH;
759

760
761
762
763
764
765
    case X86Inst::kEncodingX86M_GPB:
      if (isign3 == ENC_OPS1(Reg)) {
        rbReg = o0.getId();
        if (o0.getSize() == 1) {
          FIXUP_GPB(o0, rbReg);
          goto EmitX86R;
766
        }
767
768
769
770
771
772
        else {
          opCode++;
          ADD_PREFIX_BY_SIZE(o0.getSize());
          goto EmitX86R;
        }
      }
773

774
775
776
777
      if (isign3 == ENC_OPS1(Mem)) {
        if (ASMJIT_UNLIKELY(o0.getSize() == 0))
          goto AmbiguousOperandSize;
        rmRel = &o0;
778

779
780
781
782
783
        opCode += o0.getSize() != 1;
        ADD_PREFIX_BY_SIZE(o0.getSize());
        goto EmitX86M;
      }
      break;
784

785
786
787
788
789
790
    case X86Inst::kEncodingX86M_Only:
      if (isign3 == ENC_OPS1(Mem)) {
        rmRel = &o0;
        goto EmitX86M;
      }
      break;
791

792
793
794
    case X86Inst::kEncodingX86Rm:
      ADD_PREFIX_BY_SIZE(o0.getSize());
      ASMJIT_FALLTHROUGH;
795

796
797
798
799
800
801
    case X86Inst::kEncodingX86Rm_NoRexW:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o0.getId();
        rbReg = o1.getId();
        goto EmitX86R;
      }
802

803
804
805
806
807
808
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitX86M;
      }
      break;
809

810
811
812
813
814
    case X86Inst::kEncodingX86Rm_Raw66H:
      // We normally emit either [66|F2|F3], this instruction requires 66+[F2|F3].
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o0.getId();
        rbReg = o1.getId();
815

816
817
818
819
820
821
        if (o0.getSize() == 2)
          EMIT_BYTE(0x66);
        else
          ADD_REX_W_BY_SIZE(o0.getSize());
        goto EmitX86R;
      }
822

823
824
825
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = o0.getId();
        rmRel = &o1;
826

827
828
829
830
831
832
833
        if (o0.getSize() == 2)
          EMIT_BYTE(0x66);
        else
          ADD_REX_W_BY_SIZE(o0.getSize());
        goto EmitX86M;
      }
      break;
834

835
836
837
838
839
840
841
842
843
844
    case X86Inst::kEncodingX86Mr:
      ADD_PREFIX_BY_SIZE(o0.getSize());
      ASMJIT_FALLTHROUGH;

    case X86Inst::kEncodingX86Mr_NoSize:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        rbReg = o0.getId();
        opReg = o1.getId();
        goto EmitX86R;
      }
845

846
847
848
849
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        rmRel = &o0;
        opReg = o1.getId();
        goto EmitX86M;
850
      }
851
      break;
852

853
854
855
856
    case X86Inst::kEncodingX86Arith:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        if (o0.getSize() != o1.getSize())
          goto OperandSizeMismatch;
857

858
859
        opReg = o0.getId();
        rbReg = o1.getId();
860

861
862
863
864
        if (o0.getSize() == 1) {
          opCode += 2;
          FIXUP_GPB(o0, opReg);
          FIXUP_GPB(o1, rbReg);
865

866
867
          if (!(options & X86Inst::kOptionModMR))
            goto EmitX86R;
868

869
870
871
872
873
874
875
          opCode -= 2;
          Utils::swap(opReg, rbReg);
          goto EmitX86R;
        }
        else {
          opCode += 3;
          ADD_PREFIX_BY_SIZE(o0.getSize());
876

877
878
          if (!(options & X86Inst::kOptionModMR))
            goto EmitX86R;
879

880
881
882
883
          opCode -= 2;
          Utils::swap(opReg, rbReg);
          goto EmitX86R;
        }
884
885
      }

886
887
888
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = o0.getId();
        rmRel = &o1;
889

890
891
892
893
894
895
896
897
898
899
        if (o0.getSize() == 1) {
          FIXUP_GPB(o0, opReg);
          opCode += 2;
          goto EmitX86M;
        }
        else {
          opCode += 3;
          ADD_PREFIX_BY_SIZE(o0.getSize());
          goto EmitX86M;
        }
900
901
      }

902
903
904
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        opReg = o1.getId();
        rmRel = &o0;
905

906
907
908
909
910
911
912
913
914
915
        if (o1.getSize() == 1) {
          FIXUP_GPB(o1, opReg);
          goto EmitX86M;
        }
        else {
          opCode++;
          ADD_PREFIX_BY_SIZE(o1.getSize());
          goto EmitX86M;
        }
      }
916

917
918
      // The remaining instructions use 0x80 opcode.
      opCode = 0x80;
919

920
921
      if (isign3 == ENC_OPS2(Reg, Imm)) {
        uint32_t size = o0.getSize();
922

923
924
        rbReg = o0.getId();
        imVal = static_cast<const Imm&>(o1).getInt64();
925

926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
        if (size == 1) {
          FIXUP_GPB(o0, rbReg);
          imLen = 1;
        }
        else {
          if (size == 2) {
            ADD_66H_P(1);
          }
          else if (size == 4) {
            // Sign extend so isInt8 returns the right result.
            imVal = x86SignExtend32To64(imVal);
          }
          else if (size == 8) {
            // In 64-bit mode it's not possible to use 64-bit immediate.
            if (Utils::isUInt32(imVal)) {
              // Zero-extend `and` by using a 32-bit GPD destination instead of a 64-bit GPQ.
              if (instId == X86Inst::kIdAnd)
                size = 4;
              else if (!Utils::isInt32(imVal))
                goto InvalidImmediate;
            }
            ADD_REX_W_BY_SIZE(size);
          }
949

950
951
952
953
          imLen = std::min<uint32_t>(size, 4);
          if (Utils::isInt8(imVal) && !(options & X86Inst::kOptionLongForm))
            imLen = 1;
        }
954

955
956
957
958
959
960
961
        // Alternate Form - AL, AX, EAX, RAX.
        if (rbReg == 0 && (size == 1 || imLen != 1) && !(options & X86Inst::kOptionLongForm)) {
          opCode &= X86Inst::kOpCode_PP_66 | X86Inst::kOpCode_W;
          opCode |= ((opReg << 3) | (0x04 + (size != 1)));
          imLen = std::min<uint32_t>(size, 4);
          goto EmitX86Op;
        }
962

963
964
965
        opCode += size != 1 ? (imLen != 1 ? 1 : 3) : 0;
        goto EmitX86R;
      }
966

967
968
      if (isign3 == ENC_OPS2(Mem, Imm)) {
        uint32_t memSize = o0.getSize();
969

970
971
        if (ASMJIT_UNLIKELY(memSize == 0))
          goto AmbiguousOperandSize;
972

973
974
        imVal = static_cast<const Imm&>(o1).getInt64();
        imLen = std::min<uint32_t>(memSize, 4);
975

976
977
978
        // Sign extend so isInt8 returns the right result.
        if (memSize == 4)
          imVal = x86SignExtend32To64(imVal);
979

980
981
        if (Utils::isInt8(imVal) && !(options & X86Inst::kOptionLongForm))
          imLen = 1;
982

983
984
        opCode += memSize != 1 ? (imLen != 1 ? 1 : 3) : 0;
        ADD_PREFIX_BY_SIZE(memSize);
985

986
987
        rmRel = &o0;
        goto EmitX86M;
988
      }
989
      break;
990

991
992
993
994
    case X86Inst::kEncodingX86Bswap:
      if (isign3 == ENC_OPS1(Reg)) {
        if (ASMJIT_UNLIKELY(o0.getSize() < 4))
          goto InvalidInstruction;
995

996
997
998
        opReg = o0.getId();
        ADD_REX_W_BY_SIZE(o0.getSize());
        goto EmitX86OpReg;
999
      }
1000
      break;
1001

1002
1003
1004
1005
1006
1007
    case X86Inst::kEncodingX86Bt:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        ADD_PREFIX_BY_SIZE(o1.getSize());
        opReg = o1.getId();
        rbReg = o0.getId();
        goto EmitX86R;
1008
1009
      }

1010
1011
1012
1013
1014
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        ADD_PREFIX_BY_SIZE(o1.getSize());
        opReg = o1.getId();
        rmRel = &o0;
        goto EmitX86M;
1015
1016
      }

1017
1018
1019
      // The remaining instructions use the secondary opcode/r.
      imVal = static_cast<const Imm&>(o1).getInt64();
      imLen = 1;
1020

1021
1022
1023
      opCode = commonData->getAltOpCode();
      opReg = x86ExtractO(opCode);
      ADD_PREFIX_BY_SIZE(o0.getSize());
1024

1025
1026
1027
1028
      if (isign3 == ENC_OPS2(Reg, Imm)) {
        rbReg = o0.getId();
        goto EmitX86R;
      }
1029

1030
1031
1032
      if (isign3 == ENC_OPS2(Mem, Imm)) {
        if (ASMJIT_UNLIKELY(o0.getSize() == 0))
          goto AmbiguousOperandSize;
1033

1034
1035
1036
1037
        rmRel = &o0;
        goto EmitX86M;
      }
      break;
1038

1039
1040
1041
1042
1043
    case X86Inst::kEncodingX86Call:
      if (isign3 == ENC_OPS1(Reg)) {
        rbReg = o0.getId();
        goto EmitX86R;
      }
1044

1045
1046
1047
      rmRel = &o0;
      if (isign3 == ENC_OPS1(Mem))
        goto EmitX86M;
1048

1049
1050
1051
1052
1053
1054
      // Call with 32-bit displacement use 0xE8 opcode. Call with 8-bit
      // displacement is not encodable so the alternative opcode field
      // in X86DB must be zero.
      opCode = 0xE8;
      opReg = 0;
      goto EmitJmpCall;
1055

1056
1057
1058
1059
1060
1061
1062
    case X86Inst::kEncodingX86Cmpxchg: {
      // Convert explicit to implicit.
      if (isign3 & (0x7 << 6)) {
        if (!X86Reg::isGp(o2) || o2.getId() != X86Gp::kIdAx)
          goto InvalidInstruction;
        isign3 &= 0x3F;
      }
1063

1064
1065
1066
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        if (o0.getSize() != o1.getSize())
          goto OperandSizeMismatch;
1067

1068
1069
        rbReg = o0.getId();
        opReg = o1.getId();
1070

1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
        if (o0.getSize() == 1) {
          FIXUP_GPB(o0, rbReg);
          FIXUP_GPB(o1, opReg);
          goto EmitX86R;
        }
        else {
          ADD_PREFIX_BY_SIZE(o0.getSize());
          opCode++;
          goto EmitX86R;
        }
      }
1082

1083
1084
1085
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        opReg = o1.getId();
        rmRel = &o0;
1086

1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
        if (o1.getSize() == 1) {
          FIXUP_GPB(o0, opReg);
          goto EmitX86M;
        }
        else {
          ADD_PREFIX_BY_SIZE(o1.getSize());
          opCode++;
          goto EmitX86M;
        }
      }
      break;
    }
1099

1100
1101
1102
    case X86Inst::kEncodingX86Crc:
      opReg = o0.getId();
      ADD_REX_W_BY_SIZE(o0.getSize());
1103

1104
1105
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        rbReg = o1.getId();
1106

1107
1108
1109
1110
1111
1112
1113
        if (o1.getSize() == 1) {
          FIXUP_GPB(o1, rbReg);
          goto EmitX86R;
        }
        else {
          // This seems to be the only exception of encoding 66F2 PP prefix.
          if (o1.getSize() == 2) EMIT_BYTE(0x66);
1114

1115
1116
1117
          opCode++;
          goto EmitX86R;
        }
1118
1119
      }

1120
1121
1122
1123
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        rmRel = &o1;
        if (o1.getSize() == 0)
          goto AmbiguousOperandSize;
1124

1125
1126
        // This seems to be the only exception of encoding 66F2 PP prefix.
        if (o1.getSize() == 2) EMIT_BYTE(0x66);
1127

1128
1129
1130
1131
        opCode += o1.getSize() != 1;
        goto EmitX86M;
      }
      break;
1132

1133
1134
1135
1136
    case X86Inst::kEncodingX86Enter:
      if (isign3 == ENC_OPS2(Imm, Imm)) {
        uint32_t iw = static_cast<const Imm&>(o0).getUInt16();
        uint32_t ib = static_cast<const Imm&>(o1).getUInt8();
1137

1138
1139
1140
1141
1142
        imVal = iw | (ib << 16);
        imLen = 3;
        goto EmitX86Op;
      }
      break;
1143

1144
1145
1146
1147
1148
    case X86Inst::kEncodingX86Imul:
      // First process all forms distinct of `kEncodingX86M_OptB_MulDiv`.
      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
        opCode = 0x6B;
        ADD_PREFIX_BY_SIZE(o0.getSize());
1149

1150
1151
        imVal = static_cast<const Imm&>(o2).getInt64();
        imLen = 1;
1152

1153
1154
1155
1156
        if (!Utils::isInt8(imVal) || (options & X86Inst::kOptionLongForm)) {
          opCode -= 2;
          imLen = o0.getSize() == 2 ? 2 : 4;
        }
1157

1158
1159
        opReg = o0.getId();
        rbReg = o1.getId();
1160

1161
1162
        goto EmitX86R;
      }
1163

1164
1165
1166
      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
        opCode = 0x6B;
        ADD_PREFIX_BY_SIZE(o0.getSize());
1167

1168
1169
        imVal = static_cast<const Imm&>(o2).getInt64();
        imLen = 1;
1170

1171
1172
1173
        // Sign extend so isInt8 returns the right result.
        if (o0.getSize() == 4)
          imVal = x86SignExtend32To64(imVal);
1174

1175
1176
1177
1178
        if (!Utils::isInt8(imVal) || (options & X86Inst::kOptionLongForm)) {
          opCode -= 2;
          imLen = o0.getSize() == 2 ? 2 : 4;
        }
1179

1180
1181
        opReg = o0.getId();
        rmRel = &o1;
1182

1183
        goto EmitX86M;
1184
1185
      }

1186
1187
1188
1189
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        // Must be explicit 'ax, r8' form.
        if (o1.getSize() == 1)
          goto CaseX86M_GPB_MulDiv;
1190

1191
1192
        if (o0.getSize() != o1.getSize())
          goto OperandSizeMismatch;
1193

1194
1195
        opReg = o0.getId();
        rbReg = o1.getId();
1196

1197
1198
1199
1200
        opCode = X86Inst::kOpCode_MM_0F | 0xAF;
        ADD_PREFIX_BY_SIZE(o0.getSize());
        goto EmitX86R;
      }
1201

1202
1203
1204
1205
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        // Must be explicit 'ax, m8' form.
        if (o1.getSize() == 1)
          goto CaseX86M_GPB_MulDiv;
1206

1207
1208
1209
1210
1211
1212
1213
        opReg = o0.getId();
        rmRel = &o1;

        opCode = X86Inst::kOpCode_MM_0F | 0xAF;
        ADD_PREFIX_BY_SIZE(o0.getSize());
        goto EmitX86M;
      }
1214

1215
1216
1217
1218
      // Shorthand to imul 'reg, reg, imm'.
      if (isign3 == ENC_OPS2(Reg, Imm)) {
        opCode = 0x6B;
        ADD_PREFIX_BY_SIZE(o0.getSize());
1219

1220
1221
        imVal = static_cast<const Imm&>(o1).getInt64();
        imLen = 1;
1222

1223
1224
1225
        // Sign extend so isInt8 returns the right result.
        if (o0.getSize() == 4)
          imVal = x86SignExtend32To64(imVal);
1226

1227
1228
1229
1230
        if (!Utils::isInt8(imVal) || (options & X86Inst::kOptionLongForm)) {
          opCode -= 2;
          imLen = o0.getSize() == 2 ? 2 : 4;
        }
1231

1232
1233
        opReg = rbReg = o0.getId();
        goto EmitX86R;
1234
1235
      }

1236
1237
      // Try implicit form.
      goto CaseX86M_GPB_MulDiv;
1238

1239
1240
1241
1242
1243
1244
1245
    case X86Inst::kEncodingX86In:
      if (isign3 == ENC_OPS2(Reg, Imm)) {
        if (ASMJIT_UNLIKELY(o0.getId() != X86Gp::kIdAx))
          goto InvalidInstruction;

        imVal = o1.as<Imm>().getUInt8();
        imLen = 1;
1246

1247
1248
1249
        opCode = commonData->getAltOpCode() + (o0.getSize() != 1);
        ADD_66H_P_BY_SIZE(o0.getSize());
        goto EmitX86Op;
1250
1251
      }

1252
1253
1254
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        if (ASMJIT_UNLIKELY(o0.getId() != X86Gp::kIdAx || o1.getId() != X86Gp::kIdDx))
          goto InvalidInstruction;
1255

1256
1257
1258
        opCode += o0.getSize() != 1;
        ADD_66H_P_BY_SIZE(o0.getSize());
        goto EmitX86Op;
1259
1260
1261
      }
      break;

1262
1263
1264
1265
    case X86Inst::kEncodingX86Ins:
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        if (ASMJIT_UNLIKELY(!x86IsImplicitMem(o0, X86Gp::kIdDi) || o1.getId() != X86Gp::kIdDx))
          goto InvalidInstruction;
1266

1267
1268
1269
        uint32_t size = o0.getSize();
        if (ASMJIT_UNLIKELY(size == 0))
          goto AmbiguousOperandSize;
1270

1271
1272
        rmRel = &o0;
        opCode += (size != 1);
1273

1274
1275
        ADD_66H_P_BY_SIZE(size);
        goto EmitX86OpImplicitMem;
1276
1277
1278
      }
      break;

1279
1280
1281
    case X86Inst::kEncodingX86IncDec:
      if (isign3 == ENC_OPS1(Reg)) {
        rbReg = o0.getId();
1282

1283
1284
1285
1286
        if (o0.getSize() == 1) {
          FIXUP_GPB(o0, rbReg);
          goto EmitX86R;
        }
1287

1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
        if (is32Bit()) {
          // INC r16|r32 is only encodable in 32-bit mode (collides with REX).
          opCode = commonData->getAltOpCode() + (rbReg & 0x07);
          ADD_66H_P_BY_SIZE(o0.getSize());
          goto EmitX86Op;
        }
        else {
          opCode++;
          ADD_PREFIX_BY_SIZE(o0.getSize());
          goto EmitX86R;
        }
1299
1300
      }

1301
1302
1303
      if (isign3 == ENC_OPS1(Mem)) {
        rmRel = &o0;
        opCode += o0.getSize() != 1;
1304

1305
1306
        ADD_PREFIX_BY_SIZE(o0.getSize());
        goto EmitX86M;
1307
      }
1308
      break;
1309

1310
1311
1312
1313
1314
    case X86Inst::kEncodingX86Int:
      if (isign3 == ENC_OPS1(Imm)) {
        imVal = static_cast<const Imm&>(o0).getInt64();
        imLen = 1;
        goto EmitX86Op;
1315
      }
1316
      break;
1317

1318
1319
1320
1321
1322
1323
1324
    case X86Inst::kEncodingX86Jcc:
      if (_globalHints & CodeEmitter::kHintPredictedJumps) {
        if (options & X86Inst::kOptionTaken)
          EMIT_BYTE(0x3E);
        if (options & X86Inst::kOptionNotTaken)
          EMIT_BYTE(0x2E);
      }
1325

1326
1327
1328
      rmRel = &o0;
      opReg = 0;
      goto EmitJmpCall;
1329

1330
1331
1332
1333
1334
1335
    case X86Inst::kEncodingX86JecxzLoop:
      rmRel = &o0;
      // Explicit jecxz|loop [r|e]cx, dst
      if (o0.isReg()) {
        if (ASMJIT_UNLIKELY(!X86Reg::isGp(o0, X86Gp::kIdCx)))
          goto InvalidInstruction;
1336

1337
1338
        if ((is32Bit() && o0.getSize() == 2) || (is64Bit() && o0.getSize() == 4))
          EMIT_BYTE(0x67);
1339

1340
        rmRel = &o1;
1341
1342
      }

1343
1344
      opReg = 0;
      goto EmitJmpCall;
1345

1346
1347
1348
1349
1350
    case X86Inst::kEncodingX86Jmp:
      if (isign3 == ENC_OPS1(Reg)) {
        rbReg = o0.getId();
        goto EmitX86R;
      }
1351

1352
1353
1354
      rmRel = &o0;
      if (isign3 == ENC_OPS1(Mem))
        goto EmitX86M;
1355

1356
1357
1358
1359
1360
      // Jump encoded with 32-bit displacement use 0xE9 opcode. Jump encoded
      // with 8-bit displacement's opcode is stored as an alternative opcode.
      opCode = 0xE9;
      opReg = 0;
      goto EmitJmpCall;
1361

1362
1363
1364
    case X86Inst::kEncodingX86JmpRel:
      rmRel = &o0;
      goto EmitJmpCall;
1365

1366
1367
1368
1369
1370
1371
    case X86Inst::kEncodingX86Lea:
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        ADD_PREFIX_BY_SIZE(o0.getSize());
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitX86M;
1372
1373
1374
      }
      break;

1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
    case X86Inst::kEncodingX86Mov:
      // Reg <- Reg
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o0.getId();
        rbReg = o1.getId();

        // Asmjit uses segment registers indexed from 1 to 6, leaving zero as
        // "no segment register used". We have to fix this (decrement the index
        // of the register) when emitting MOV instructions which move to/from
        // a segment register. The segment register is always `opReg`, because
        // the MOV instruction uses either RM or MR encoding.

        // GP <- ??
        if (X86Reg::isGp(o0)) {
          // GP <- GP
          if (X86Reg::isGp(o1)) {
            uint32_t size0 = o0.getSize();
            uint32_t size1 = o1.getSize();

            if (size0 != size1) {
              // We allow 'mov r64, r32' as it's basically zero-extend.
              if (size0 == 8 && size1 == 4)
                size0 = 4; // Zero extend, don't promote to 64-bit.
              else
                goto InvalidInstruction;
            }

            if (size0 == 1) {
              FIXUP_GPB(o0, opReg);
              FIXUP_GPB(o1, rbReg);
              opCode = 0x8A;

              if (!(options & X86Inst::kOptionModMR))
                goto EmitX86R;

              opCode -= 2;
              Utils::swap(opReg, rbReg);
              goto EmitX86R;
            }
            else {
              opCode = 0x8B;
              ADD_PREFIX_BY_SIZE(size0);

              if (!(options & X86Inst::kOptionModMR))
                goto EmitX86R;

              opCode -= 2;
              Utils::swap(opReg, rbReg);
              goto EmitX86R;
            }
          }
1426

1427
1428
          opReg = rbReg;
          rbReg = o0.getId();
1429

1430
1431
1432
1433
1434
1435
1436
          // GP <- SEG
          if (X86Reg::isSeg(o1)) {
            opCode = 0x8C;
            opReg--;
            ADD_PREFIX_BY_SIZE(o0.getSize());
            goto EmitX86R;
          }
1437

1438
1439
1440
          // GP <- CR
          if (X86Reg::isCr(o1)) {
            opCode = 0x20 | X86Inst::kOpCode_MM_0F;
1441

1442
1443
1444
1445
1446
1447
1448
            // Use `LOCK MOV` in 32-bit mode if CR8+ register is accessed (AMD extension).
            if ((opReg & 0x8) && is32Bit()) {
              EMIT_BYTE(0xF0);
              opReg &= 0x7;
            }
            goto EmitX86R;
          }
1449

1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
          // GP <- DR
          if (X86Reg::isDr(o1)) {
            opCode = 0x21 | X86Inst::kOpCode_MM_0F;
            goto EmitX86R;
          }
        }
        else {
          // ?? <- GP
          if (!X86Reg::isGp(o1))
            goto InvalidInstruction;

          // SEG <- GP
          if (X86Reg::isSeg(o0)) {
            opCode = 0x8E;
            opReg--;
            ADD_PREFIX_BY_SIZE(o1.getSize());
            goto EmitX86R;
          }
1468

1469
1470
1471
          // CR <- GP
          if (X86Reg::isCr(o0)) {
            opCode = 0x22 | X86Inst::kOpCode_MM_0F;
1472

1473
1474
1475
1476
1477
1478
1479
            // Use `LOCK MOV` in 32-bit mode if CR8+ register is accessed (AMD extension).
            if ((opReg & 0x8) && is32Bit()) {
              EMIT_BYTE(0xF0);
              opReg &= 0x7;
            }
            goto EmitX86R;
          }
1480

1481
1482
1483
1484
1485
1486
          // DR <- GP
          if (X86Reg::isDr(o0)) {
            opCode = 0x23 | X86Inst::kOpCode_MM_0F;
            goto EmitX86R;
          }
        }
1487

1488
        goto InvalidInstruction;
1489
1490
      }

1491
1492
1493
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = o0.getId();
        rmRel = &o1;
1494

1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
        // SEG <- Mem
        if (X86Reg::isSeg(o0)) {
          opCode = 0x8E;
          opReg--;
          ADD_PREFIX_BY_SIZE(o1.getSize());
          goto EmitX86M;
        }
        // Reg <- Mem
        else {
          if (o0.getSize() == 1) {
            opCode = 0;
            FIXUP_GPB(o0, opReg);
          }
          else {
            opCode = 1;
            ADD_PREFIX_BY_SIZE(o0.getSize());
          }
1512

1513
1514
1515
1516
1517
1518
1519
1520
          // Handle a special form `mov al|ax|eax|rax, [ptr64]` that doesn't use MOD.
          if (o0.getId() == X86Gp::kIdAx && !rmRel->as<X86Mem>().hasBaseOrIndex()) {
            imVal = rmRel->as<X86Mem>().getOffset();
            if (!is64Bit() || (is64Bit() && ((options & X86Inst::kOptionLongForm) || !Utils::isInt32(imVal)))) {
              opCode += 0xA0;
              goto EmitX86OpMovAbs;
            }
          }
1521

1522
1523
1524
          opCode += 0x8A;
          goto EmitX86M;
        }
1525
1526
      }

1527
1528
1529
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        opReg = o1.getId();
        rmRel = &o0;
1530

1531
1532
1533
1534
1535
        // Mem <- SEG
        if (X86Reg::isSeg(o1)) {
          opCode = 0x8C;
          ADD_PREFIX_BY_SIZE(o0.getSize());
          goto EmitX86M;
1536
        }
1537
        // Mem <- Reg
1538
        else {
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
          if (o1.getSize() == 1) {
            opCode = 0;
            FIXUP_GPB(o1, opReg);
          }
          else {
            opCode = 1;
            ADD_PREFIX_BY_SIZE(o1.getSize());
          }

          // Handle a special form `mov [ptr64], al|ax|eax|rax` that doesn't use MOD.
          if (o1.getId() == X86Gp::kIdAx && !rmRel->as<X86Mem>().hasBaseOrIndex()) {
            imVal = rmRel->as<X86Mem>().getOffset();
            if (!is64Bit() || (is64Bit() && ((options & X86Inst::kOptionLongForm) || !Utils::isInt32(imVal)))) {
              opCode += 0xA2;
              goto EmitX86OpMovAbs;
            }
          }

          opCode += 0x88;
          goto EmitX86M;
1559
1560
1561
        }
      }

1562
1563
1564
      if (isign3 == ENC_OPS2(Reg, Imm)) {
        opReg = o0.getId();
        imLen = o0.getSize();
1565

1566
1567
        if (imLen == 1) {
          FIXUP_GPB(o0, opReg);
1568

1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
          imVal = static_cast<const Imm&>(o1).getUInt8();
          opCode = 0xB0;
          goto EmitX86OpReg;
        }
        else {
          // 64-bit immediate in 64-bit mode is allowed.
          imVal = static_cast<const Imm&>(o1).getInt64();

          // Optimize the instruction size by using a 32-bit immediate if possible.
          if (imLen == 8 && !(options & X86Inst::kOptionLongForm)) {
            if (Utils::isUInt32(imVal)) {
              // Zero-extend by using a 32-bit GPD destination instead of a 64-bit GPQ.
              imLen = 4;
            }
            else if (Utils::isInt32(imVal)) {
              // Sign-extend, uses 'C7 /0' opcode.
              rbReg = opReg;

              opCode = 0xC7 | X86Inst::kOpCode_W;
              opReg = 0;

              imLen = 4;
              goto EmitX86R;
            }
          }
1594

1595
1596
1597
1598
          opCode = 0xB8;
          ADD_PREFIX_BY_SIZE(imLen);
          goto EmitX86OpReg;
        }
1599
1600
      }

1601
1602
      if (isign3 == ENC_OPS2(Mem, Imm)) {
        uint32_t memSize = o0.getSize();
1603

1604
1605
        if (ASMJIT_UNLIKELY(memSize == 0))
          goto AmbiguousOperandSize;
1606

1607
1608
        imVal = static_cast<const Imm&>(o1).getInt64();
        imLen = std::min<uint32_t>(memSize, 4);
1609

1610
1611
1612
        opCode = 0xC6 + (memSize != 1);
        opReg = 0;
        ADD_PREFIX_BY_SIZE(memSize);
1613

1614
1615
        rmRel = &o0;
        goto EmitX86M;
1616
      }
1617
      break;
1618

1619
1620
1621
1622
1623
    case X86Inst::kEncodingX86MovsxMovzx:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o0.getId();
        rbReg = o1.getId();
        ADD_PREFIX_BY_SIZE(o0.getSize());
1624

1625
1626
1627
1628
1629
1630
1631
1632
        if (o1.getSize() == 1) {
          FIXUP_GPB(o1, rbReg);
          goto EmitX86R;
        }
        else {
          opCode++;
          goto EmitX86R;
        }
1633
1634
      }

1635
1636
1637
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opCode += o1.getSize() != 1;
        ADD_PREFIX_BY_SIZE(o0.getSize());
1638

1639
1640
1641
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitX86M;
1642
      }
1643
      break;
1644

1645
1646
1647
1648
    case X86Inst::kEncodingX86Out:
      if (isign3 == ENC_OPS2(Imm, Reg)) {
        if (ASMJIT_UNLIKELY(o1.getId() != X86Gp::kIdAx))
          goto InvalidInstruction;
1649

1650
        imVal = o0.as<Imm>().getUInt8();
1651
1652
        imLen = 1;

1653
1654
1655
        opCode = commonData->getAltOpCode() + (o1.getSize() != 1);
        ADD_66H_P_BY_SIZE(o1.getSize());
        goto EmitX86Op;
1656
1657
      }

1658
1659
1660
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        if (ASMJIT_UNLIKELY(o0.getId() != X86Gp::kIdDx || o1.getId() != X86Gp::kIdAx))
          goto InvalidInstruction;
1661

1662
1663
1664
        opCode += o1.getSize() != 1;
        ADD_66H_P_BY_SIZE(o1.getSize());
        goto EmitX86Op;
1665
1666
1667
      }
      break;

1668
1669
1670
1671
    case X86Inst::kEncodingX86Outs:
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        if (ASMJIT_UNLIKELY(o0.getId() != X86Gp::kIdDx || !x86IsImplicitMem(o1, X86Gp::kIdSi)))
          goto InvalidInstruction;
1672

1673
1674
1675
        uint32_t size = o1.getSize();
        if (ASMJIT_UNLIKELY(size == 0))
          goto AmbiguousOperandSize;
1676

1677
1678
        rmRel = &o1;
        opCode += (size != 1);
1679

1680
1681
        ADD_66H_P_BY_SIZE(size);
        goto EmitX86OpImplicitMem;
1682
1683
1684
      }
      break;

1685
1686
1687
1688
1689
1690
1691
1692
1693
    case X86Inst::kEncodingX86Push:
      if (isign3 == ENC_OPS1(Reg)) {
        if (X86Reg::isSeg(o0)) {
          uint32_t segment = o0.getId();
          if (ASMJIT_UNLIKELY(segment >= X86Seg::kIdCount))
            goto InvalidSegment;

          if (segment >= X86Seg::kIdFs)
            EMIT_BYTE(0x0F);
1694

1695
1696
          EMIT_BYTE(x86OpCodePushSeg[segment]);
          goto EmitDone;
1697
1698
        }
        else {
1699
          goto CaseX86Pop_Gp;
1700
1701
1702
        }
      }

1703
1704
1705
      if (isign3 == ENC_OPS1(Imm)) {
        imVal = static_cast<const Imm&>(o0).getInt64();
        imLen = 4;
1706

1707
1708
        if (Utils::isInt8(imVal) && !(options & X86Inst::kOptionLongForm))
          imLen = 1;
1709

1710
1711
1712
1713
        opCode = imLen == 1 ? 0x6A : 0x68;
        goto EmitX86Op;
      }
      ASMJIT_FALLTHROUGH;
1714

1715
1716
1717
1718
1719
1720
    case X86Inst::kEncodingX86Pop:
      if (isign3 == ENC_OPS1(Reg)) {
        if (X86Reg::isSeg(o0)) {
          uint32_t segment = o0.getId();
          if (ASMJIT_UNLIKELY(segment == X86Seg::kIdCs || segment >= X86Seg::kIdCount))
            goto InvalidSegment;
1721

1722
          if (segment >= X86Seg::kIdFs)
1723
1724
            EMIT_BYTE(0x0F);

1725
1726
          EMIT_BYTE(x86OpCodePopSeg[segment]);
          goto EmitDone;
1727
1728
        }
        else {
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
CaseX86Pop_Gp:
          // We allow 2 byte, 4 byte, and 8 byte register sizes, although PUSH
          // and POP only allow 2 bytes or native size. On 64-bit we simply
          // PUSH/POP 64-bit register even if 32-bit register was given.
          if (ASMJIT_UNLIKELY(o0.getSize() < 2))
            goto InvalidInstruction;

          opCode = commonData->getAltOpCode();
          opReg = o0.getId();

          ADD_66H_P_BY_SIZE(o0.getSize());
          goto EmitX86OpReg;
1741
1742
1743
        }
      }

1744
1745
1746
      if (isign3 == ENC_OPS1(Mem)) {
        if (ASMJIT_UNLIKELY(o0.getSize() == 0))
          goto AmbiguousOperandSize;
1747

1748
1749
        if (ASMJIT_UNLIKELY(o0.getSize() != 2 && o0.getSize() != getGpSize()))
          goto InvalidInstruction;
1750

1751
1752
1753
1754
1755
        ADD_66H_P_BY_SIZE(o0.getSize());
        rmRel = &o0;
        goto EmitX86M;
      }
      break;
1756

1757
1758
1759
1760
1761
1762
    case X86Inst::kEncodingX86Ret:
      if (isign3 == 0) {
        // 'ret' without immediate, change C2 to C3.
        opCode++;
        goto EmitX86Op;
      }
1763

1764
1765
1766
1767
1768
1769
      if (isign3 == ENC_OPS1(Imm)) {
        imVal = static_cast<const Imm&>(o0).getInt64();
        if (imVal == 0 && !(options & X86Inst::kOptionLongForm)) {
          // 'ret' without immediate, change C2 to C3.
          opCode++;
          goto EmitX86Op;
1770
1771
        }
        else {
1772
1773
          imLen = 2;
          goto EmitX86Op;
1774
1775
1776
1777
        }
      }
      break;

1778
1779
1780
    case X86Inst::kEncodingX86Rot:
      if (o0.isReg()) {
        rbReg = o0.getId();
1781

1782
1783
1784
1785
1786
1787
1788
        if (o0.getSize() == 1) {
          FIXUP_GPB(o0, rbReg);
        }
        else {
          opCode++;
          ADD_PREFIX_BY_SIZE(o0.getSize());
        }
1789

1790
1791
1792
        if (isign3 == ENC_OPS2(Reg, Reg)) {
          if (ASMJIT_UNLIKELY(o1.getId() != X86Gp::kIdCx))
            goto InvalidInstruction;
1793

1794
1795
1796
          opCode += 2;
          goto EmitX86R;
        }
1797

1798
1799
1800
        if (isign3 == ENC_OPS2(Reg, Imm)) {
          imVal = static_cast<const Imm&>(o1).getInt64() & 0xFF;
          imLen = 0;
1801

1802
1803
          if (imVal == 1 && !(options & X86Inst::kOptionLongForm))
            goto EmitX86R;
1804

1805
1806
1807
          imLen = 1;
          opCode -= 0x10;
          goto EmitX86R;
1808
1809
        }
      }
1810
1811
1812
      else {
        opCode += o0.getSize() != 1;
        ADD_PREFIX_BY_SIZE(o0.getSize());
1813

1814
1815
1816
        if (isign3 == ENC_OPS2(Mem, Reg)) {
          if (ASMJIT_UNLIKELY(o1.getId() != X86Gp::kIdCx))
            goto InvalidInstruction;
1817

1818
1819
1820
1821
          opCode += 2;
          rmRel = &o0;
          goto EmitX86M;
        }
1822

1823
1824
1825
        if (isign3 == ENC_OPS2(Mem, Imm)) {
          if (ASMJIT_UNLIKELY(o0.getSize() == 0))
            goto AmbiguousOperandSize;
1826

1827
1828
1829
          imVal = static_cast<const Imm&>(o1).getInt64() & 0xFF;
          imLen = 0;
          rmRel = &o0;
1830

1831
1832
1833
1834
1835
1836
          if (imVal == 1 && !(options & X86Inst::kOptionLongForm))
            goto EmitX86M;

          imLen = 1;
          opCode -= 0x10;
          goto EmitX86M;
1837
1838
        }
      }
1839
      break;
1840

1841
1842
1843
1844
1845
1846
    case X86Inst::kEncodingX86Set:
      if (isign3 == ENC_OPS1(Reg)) {
        rbReg = o0.getId();
        FIXUP_GPB(o0, rbReg);
        goto EmitX86R;
      }
1847

1848
1849
1850
      if (isign3 == ENC_OPS1(Mem)) {
        rmRel = &o0;
        goto EmitX86M;
1851
      }
1852
      break;
1853

1854
1855
1856
1857
1858
    case X86Inst::kEncodingX86ShldShrd:
      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
        ADD_PREFIX_BY_SIZE(o0.getSize());
        imVal = static_cast<const Imm&>(o2).getInt64();
        imLen = 1;
1859

1860
1861
1862
        opReg = o1.getId();
        rbReg = o0.getId();
        goto EmitX86R;
1863
1864
      }

1865
1866
1867
1868
      if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
        ADD_PREFIX_BY_SIZE(o1.getSize());
        imVal = static_cast<const Imm&>(o2).getInt64();
        imLen = 1;
1869

1870
1871
1872
        opReg = o1.getId();
        rmRel = &o0;
        goto EmitX86M;
1873
1874
      }

1875
1876
      // The following instructions use opCode + 1.
      opCode++;
1877

1878
1879
1880
      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
        if (ASMJIT_UNLIKELY(o2.getId() != X86Gp::kIdCx))
          goto InvalidInstruction;
1881

1882
1883
1884
1885
1886
        ADD_PREFIX_BY_SIZE(o0.getSize());
        opReg = o1.getId();
        rbReg = o0.getId();
        goto EmitX86R;
      }
1887

1888
1889
1890
      if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
        if (ASMJIT_UNLIKELY(o2.getId() != X86Gp::kIdCx))
          goto InvalidInstruction;
1891

1892
1893
1894
1895
        ADD_PREFIX_BY_SIZE(o1.getSize());
        opReg = o1.getId();
        rmRel = &o0;
        goto EmitX86M;
1896
1897
1898
      }
      break;

1899
1900
1901
1902
1903
    case X86Inst::kEncodingX86StrRm:
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        rmRel = &o1;
        if (ASMJIT_UNLIKELY(rmRel->as<X86Mem>().getOffsetLo32() || !X86Reg::isGp(o0.as<X86Reg>(), X86Gp::kIdAx)))
          goto InvalidInstruction;
1904

1905
1906
1907
        uint32_t size = o0.getSize();
        if (o1.hasSize() && ASMJIT_UNLIKELY(o1.getSize() != size))
          goto OperandSizeMismatch;
1908

1909
1910
        ADD_PREFIX_BY_SIZE(size);
        opCode += static_cast<uint32_t>(size != 1);
1911

1912
        goto EmitX86OpImplicitMem;
1913
1914
1915
      }
      break;

1916
1917
1918
1919
1920
    case X86Inst::kEncodingX86StrMr:
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        rmRel = &o0;
        if (ASMJIT_UNLIKELY(rmRel->as<X86Mem>().getOffsetLo32() || !X86Reg::isGp(o1.as<X86Reg>(), X86Gp::kIdAx)))
          goto InvalidInstruction;
1921

1922
1923
1924
        uint32_t size = o1.getSize();
        if (o0.hasSize() && ASMJIT_UNLIKELY(o0.getSize() != size))
          goto OperandSizeMismatch;
1925

1926
1927
        ADD_PREFIX_BY_SIZE(size);
        opCode += static_cast<uint32_t>(size != 1);
1928

1929
        goto EmitX86OpImplicitMem;
1930
1931
1932
      }
      break;

1933
1934
1935
1936
1937
    case X86Inst::kEncodingX86StrMm:
      if (isign3 == ENC_OPS2(Mem, Mem)) {
        if (ASMJIT_UNLIKELY(o0.as<X86Mem>().getBaseIndexType() !=
                            o1.as<X86Mem>().getBaseIndexType()))
          goto InvalidInstruction;
1938

1939
1940
1941
        rmRel = &o1;
        if (ASMJIT_UNLIKELY(o0.as<X86Mem>().hasOffset()))
          goto InvalidInstruction;
1942

1943
1944
1945
        uint32_t size = o1.getSize();
        if (ASMJIT_UNLIKELY(size == 0))
          goto AmbiguousOperandSize;
1946

1947
1948
        if (ASMJIT_UNLIKELY(o0.getSize() != size))
          goto OperandSizeMismatch;
1949

1950
1951
        ADD_PREFIX_BY_SIZE(size);
        opCode += static_cast<uint32_t>(size != 1);
1952

1953
        goto EmitX86OpImplicitMem;
1954
1955
1956
      }
      break;

1957
1958
1959
1960
    case X86Inst::kEncodingX86Test:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        if (o0.getSize() != o1.getSize())
          goto OperandSizeMismatch;
1961

1962
1963
        rbReg = o0.getId();
        opReg = o1.getId();
1964

1965
1966
1967
1968
        if (o0.getSize() == 1) {
          FIXUP_GPB(o0, rbReg);
          FIXUP_GPB(o1, opReg);
          goto EmitX86R;
1969
1970
        }
        else {
1971
1972
1973
          opCode++;
          ADD_PREFIX_BY_SIZE(o0.getSize());
          goto EmitX86R;
1974
1975
1976
        }
      }

1977
1978
1979
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        opReg = o1.getId();
        rmRel = &o0;
1980

1981
1982
1983
1984
1985
1986
1987
1988
1989
        if (o1.getSize() == 1) {
          FIXUP_GPB(o1, opReg);
          goto EmitX86M;
        }
        else {
          opCode++;
          ADD_PREFIX_BY_SIZE(o1.getSize());
          goto EmitX86M;
        }
1990
1991
      }

1992
1993
1994
1995
1996
1997
      // The following instructions use the secondary opcode.
      opCode = commonData->getAltOpCode();
      opReg = x86ExtractO(opCode);

      if (isign3 == ENC_OPS2(Reg, Imm)) {
        rbReg = o0.getId();
1998

1999
2000
        if (o0.getSize() == 1) {
          FIXUP_GPB(o0, rbReg);
2001

2002
2003
          imVal = static_cast<const Imm&>(o1).getUInt8();
          imLen = 1;
2004
2005
        }
        else {
2006
2007
          opCode++;
          ADD_PREFIX_BY_SIZE(o0.getSize());
2008

2009
2010
2011
          imVal = static_cast<const Imm&>(o1).getInt64();
          imLen = std::min<uint32_t>(o0.getSize(), 4);
        }
2012

2013
2014
2015
2016
2017
        // Alternate Form - AL, AX, EAX, RAX.
        if (o0.getId() == 0 && !(options & X86Inst::kOptionLongForm)) {
          opCode &= X86Inst::kOpCode_PP_66 | X86Inst::kOpCode_W;
          opCode |= 0xA8 + (o0.getSize() != 1);
          goto EmitX86Op;
2018
        }
2019
2020

        goto EmitX86R;
2021
2022
      }

2023
2024
2025
2026
2027
2028
      if (isign3 == ENC_OPS2(Mem, Imm)) {
        if (ASMJIT_UNLIKELY(o0.getSize() == 0))
          goto AmbiguousOperandSize;

        imVal = static_cast<const Imm&>(o1).getInt64();
        imLen = std::min<uint32_t>(o0.getSize(), 4);
2029

2030
2031
2032
2033
2034
        opCode += (o0.getSize() != 1);
        ADD_PREFIX_BY_SIZE(o0.getSize());

        rmRel = &o0;
        goto EmitX86M;
2035
2036
2037
      }
      break;

2038
2039
2040
2041
    case X86Inst::kEncodingX86Xchg:
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = o0.getId();
        rmRel = &o1;
2042

2043
2044
2045
2046
2047
2048
2049
2050
2051
        if (o0.getSize() == 1) {
          FIXUP_GPB(o0, opReg);
          goto EmitX86M;
        }
        else {
          opCode++;
          ADD_PREFIX_BY_SIZE(o0.getSize());
          goto EmitX86M;
        }
2052
      }
2053
      ASMJIT_FALLTHROUGH;
2054

2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
    case X86Inst::kEncodingX86Xadd:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        rbReg = o0.getId();
        opReg = o1.getId();

        if (o0.getSize() != o1.getSize())
          goto OperandSizeMismatch;

        if (o0.getSize() == 1) {
          FIXUP_GPB(o0, rbReg);
          FIXUP_GPB(o1, opReg);
          goto EmitX86R;
2067
2068
        }
        else {
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
          opCode++;
          ADD_PREFIX_BY_SIZE(o0.getSize());

          // Special opcode for 'xchg ?ax, reg'.
          if (instId == X86Inst::kIdXchg && (opReg == 0 || rbReg == 0)) {
            opCode &= X86Inst::kOpCode_PP_66 | X86Inst::kOpCode_W;
            opCode |= 0x90;
            // One of `xchg a, b` or `xchg b, a` is AX/EAX/RAX.
            opReg += rbReg;
            goto EmitX86OpReg;
          }
          else {
            goto EmitX86R;
          }
2083
2084
2085
        }
      }

2086
2087
2088
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        opCode += o1.getSize() != 1;
        ADD_PREFIX_BY_SIZE(o1.getSize());
2089

2090
2091
2092
        opReg = o1.getId();
        rmRel = &o0;
        goto EmitX86M;
2093
      }
2094
      break;
2095

2096
2097
2098
    case X86Inst::kEncodingX86Fence:
      rbReg = 0;
      goto EmitX86R;
2099

2100
2101
2102
2103
    case X86Inst::kEncodingX86Bndmov:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o0.getId();
        rbReg = o1.getId();
2104

2105
2106
2107
        // ModRM encoding:
        if (!(options & X86Inst::kOptionModMR))
          goto EmitX86R;
2108

2109
2110
2111
2112
        // ModMR encoding:
        opCode = commonData->getAltOpCode();
        std::swap(opReg, rbReg);
        goto EmitX86R;
2113
2114
      }

2115
2116
2117
2118
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitX86M;
2119
2120
      }

2121
2122
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        opCode = commonData->getAltOpCode();
2123

2124
2125
2126
        rmRel = &o0;
        opReg = o1.getId();
        goto EmitX86M;
2127
2128
2129
      }
      break;

2130
2131
2132
    // ------------------------------------------------------------------------
    // [FPU]
    // ------------------------------------------------------------------------
2133

2134
2135
    case X86Inst::kEncodingFpuOp:
      goto EmitFpuOp;
2136

2137
2138
2139
2140
    case X86Inst::kEncodingFpuArith:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o0.getId();
        rbReg = o1.getId();
2141
2142
2143

        // We switch to the alternative opcode if the first operand is zero.
        if (opReg == 0) {
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
CaseFpuArith_Reg:
          opCode = ((0xD8   << X86Inst::kOpCode_FPU_2B_Shift)       ) +
                   ((opCode >> X86Inst::kOpCode_FPU_2B_Shift) & 0xFF) + rbReg;
          goto EmitFpuOp;
        }
        else if (rbReg == 0) {
          rbReg = opReg;
          opCode = ((0xDC   << X86Inst::kOpCode_FPU_2B_Shift)       ) +
                   ((opCode                                 ) & 0xFF) + rbReg;
          goto EmitFpuOp;
2154
2155
        }
        else {
2156
          goto InvalidInstruction;
2157
2158
2159
        }
      }

2160
2161
2162
2163
2164
2165
2166
2167
2168
      if (isign3 == ENC_OPS1(Mem)) {
CaseFpuArith_Mem:
        // 0xD8/0xDC, depends on the size of the memory operand; opReg is valid.
        opCode = (o0.getSize() == 4) ? 0xD8 : 0xDC;
        // Clear compressed displacement before going to EmitX86M.
        opCode &= ~static_cast<uint32_t>(X86Inst::kOpCode_CDSHL_Mask);

        rmRel = &o0;
        goto EmitX86M;
2169
2170
2171
      }
      break;

2172
2173
2174
2175
    case X86Inst::kEncodingFpuCom:
      if (isign3 == 0) {
        rbReg = 1;
        goto CaseFpuArith_Reg;
2176
2177
      }

2178
2179
2180
      if (isign3 == ENC_OPS1(Reg)) {
        rbReg = o0.getId();
        goto CaseFpuArith_Reg;
2181
2182
      }

2183
2184
      if (isign3 == ENC_OPS1(Mem)) {
        goto CaseFpuArith_Mem;
2185
2186
2187
      }
      break;

2188
2189
2190
    case X86Inst::kEncodingFpuFldFst:
      if (isign3 == ENC_OPS1(Mem)) {
        rmRel = &o0;
2191

2192
2193
        if (o0.getSize() == 4 && commonData->hasFlag(X86Inst::kFlagFpuM32)) {
          goto EmitX86M;
2194
2195
        }

2196
        if (o0.getSize() == 8 && commonData->hasFlag(X86Inst::kFlagFpuM64)) {
2197
          opCode += 4;
2198
          goto EmitX86M;
2199
2200
        }

2201
2202
        if (o0.getSize() == 10 && commonData->hasFlag(X86Inst::kFlagFpuM80)) {
          opCode = commonData->getAltOpCode();
2203
          opReg  = x86ExtractO(opCode);
2204
          goto EmitX86M;
2205
2206
2207
        }
      }

2208
2209
2210
2211
      if (isign3 == ENC_OPS1(Reg)) {
        if (instId == X86Inst::kIdFld ) { opCode = (0xD9 << X86Inst::kOpCode_FPU_2B_Shift) + 0xC0 + o0.getId(); goto EmitFpuOp; }
        if (instId == X86Inst::kIdFst ) { opCode = (0xDD << X86Inst::kOpCode_FPU_2B_Shift) + 0xD0 + o0.getId(); goto EmitFpuOp; }
        if (instId == X86Inst::kIdFstp) { opCode = (0xDD << X86Inst::kOpCode_FPU_2B_Shift) + 0xD8 + o0.getId(); goto EmitFpuOp; }
2212
2213
2214
      }
      break;

2215
2216
2217
2218
    case X86Inst::kEncodingFpuM:
      if (isign3 == ENC_OPS1(Mem)) {
        // Clear compressed displacement before going to EmitX86M.
        opCode &= ~static_cast<uint32_t>(X86Inst::kOpCode_CDSHL_Mask);
2219

2220
2221
        rmRel = &o0;
        if (o0.getSize() == 2 && commonData->hasFlag(X86Inst::kFlagFpuM16)) {
2222
          opCode += 4;
2223
          goto EmitX86M;
2224
2225
        }

2226
2227
        if (o0.getSize() == 4 && commonData->hasFlag(X86Inst::kFlagFpuM32)) {
          goto EmitX86M;
2228
2229
        }

2230
2231
        if (o0.getSize() == 8 && commonData->hasFlag(X86Inst::kFlagFpuM64)) {
          opCode = commonData->getAltOpCode() & ~static_cast<uint32_t>(X86Inst::kOpCode_CDSHL_Mask);
2232
          opReg  = x86ExtractO(opCode);
2233
          goto EmitX86M;
2234
2235
2236
2237
        }
      }
      break;

2238
2239
    case X86Inst::kEncodingFpuRDef:
      if (isign3 == 0) {
2240
        opCode += 1;
2241
        goto EmitFpuOp;
2242
      }
2243
      ASMJIT_FALLTHROUGH;
2244

2245
2246
2247
2248
    case X86Inst::kEncodingFpuR:
      if (isign3 == ENC_OPS1(Reg)) {
        opCode += o0.getId();
        goto EmitFpuOp;
2249
2250
2251
      }
      break;

2252
2253
2254
2255
    case X86Inst::kEncodingFpuStsw:
      if (isign3 == ENC_OPS1(Reg)) {
        if (ASMJIT_UNLIKELY(o0.getId() != X86Gp::kIdAx))
          goto InvalidInstruction;
2256

2257
2258
        opCode = commonData->getAltOpCode();
        goto EmitFpuOp;
2259
2260
      }

2261
2262
2263
2264
2265
2266
      if (isign3 == ENC_OPS1(Mem)) {
        // Clear compressed displacement before going to EmitX86M.
        opCode &= ~static_cast<uint32_t>(X86Inst::kOpCode_CDSHL_Mask);

        rmRel = &o0;
        goto EmitX86M;
2267
2268
2269
2270
2271
2272
2273
      }
      break;

    // ------------------------------------------------------------------------
    // [Ext]
    // ------------------------------------------------------------------------

2274
2275
2276
    case X86Inst::kEncodingExtPextrw:
      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
        ADD_66H_P(X86Reg::isXmm(o1));
2277

2278
2279
        imVal = static_cast<const Imm&>(o2).getInt64();
        imLen = 1;
2280

2281
2282
2283
        opReg = o0.getId();
        rbReg = o1.getId();
        goto EmitX86R;
2284
2285
      }

2286
2287
2288
2289
      if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
        // Secondary opcode of 'pextrw' instruction (SSE4.1).
        opCode = commonData->getAltOpCode();
        ADD_66H_P(X86Reg::isXmm(o1));
2290

2291
2292
2293
2294
2295
2296
        imVal = static_cast<const Imm&>(o2).getInt64();
        imLen = 1;

        opReg = o1.getId();
        rmRel = &o0;
        goto EmitX86M;
2297
2298
2299
      }
      break;

2300
2301
2302
    case X86Inst::kEncodingExtExtract:
      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
        ADD_66H_P(X86Reg::isXmm(o1));
2303

2304
        imVal = static_cast<const Imm&>(o2).getInt64();
2305
2306
        imLen = 1;

2307
2308
2309
        opReg = o1.getId();
        rbReg = o0.getId();
        goto EmitX86R;
2310
2311
      }

2312
2313
      if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
        ADD_66H_P(X86Reg::isXmm(o1));
2314

2315
        imVal = static_cast<const Imm&>(o2).getInt64();
2316
2317
        imLen = 1;

2318
2319
2320
        opReg = o1.getId();
        rmRel = &o0;
        goto EmitX86M;
2321
2322
2323
      }
      break;

2324
2325
2326
2327
2328
    case X86Inst::kEncodingExtMov:
      // GP|MMX|XMM <- GP|MMX|XMM
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o0.getId();
        rbReg = o1.getId();
2329

2330
2331
        if (!(options & X86Inst::kOptionModMR) || !commonData->hasAltOpCode())
          goto EmitX86R;
2332

2333
2334
2335
        opCode = commonData->getAltOpCode();
        Utils::swap(opReg, rbReg);
        goto EmitX86R;
2336
2337
      }

2338
2339
2340
2341
2342
      // GP|MMX|XMM <- Mem
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitX86M;
2343
2344
2345
      }

      // The following instruction uses opCode[1].
2346
      opCode = commonData->getAltOpCode();
2347

2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
      // Mem <- GP|MMX|XMM
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        opReg = o1.getId();
        rmRel = &o0;
        goto EmitX86M;
      }
      break;

    case X86Inst::kEncodingExtMovnti:
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        ADD_REX_W(X86Reg::isGpq(o1));
2359

2360
2361
2362
        opReg = o1.getId();
        rmRel = &o0;
        goto EmitX86M;
2363
2364
2365
      }
      break;

2366
2367
2368
2369
    case X86Inst::kEncodingExtMovbe:
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        if (o0.getSize() == 1)
          goto InvalidInstruction;
2370

2371
2372
2373
2374
        ADD_PREFIX_BY_SIZE(o0.getSize());
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitX86M;
2375
2376
2377
      }

      // The following instruction uses the secondary opcode.
2378
      opCode = commonData->getAltOpCode();
2379

2380
2381
2382
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        if (o1.getSize() == 1)
          goto InvalidInstruction;
2383

2384
2385
2386
2387
        ADD_PREFIX_BY_SIZE(o1.getSize());
        opReg = o1.getId();
        rmRel = &o0;
        goto EmitX86M;
2388
2389
2390
      }
      break;

2391
2392
2393
2394
    case X86Inst::kEncodingExtMovd:
CaseExtMovd:
      opReg = o0.getId();
      ADD_66H_P(X86Reg::isXmm(o0));
2395

2396
2397
2398
2399
      // MMX/XMM <- Gp
      if (isign3 == ENC_OPS2(Reg, Reg) && X86Reg::isGp(o1)) {
        rbReg = o1.getId();
        goto EmitX86R;
2400
2401
      }

2402
2403
2404
2405
      // MMX/XMM <- Mem
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        rmRel = &o1;
        goto EmitX86M;
2406
2407
2408
      }

      // The following instructions use the secondary opcode.
2409
2410
2411
2412
      opCode &= X86Inst::kOpCode_W;
      opCode |= commonData->getAltOpCode();
      opReg = o1.getId();
      ADD_66H_P(X86Reg::isXmm(o1));
2413

2414
2415
2416
2417
      // GP <- MMX/XMM
      if (isign3 == ENC_OPS2(Reg, Reg) && X86Reg::isGp(o0)) {
        rbReg = o0.getId();
        goto EmitX86R;
2418
2419
      }

2420
2421
2422
2423
      // Mem <- MMX/XMM
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        rmRel = &o0;
        goto EmitX86M;
2424
2425
2426
      }
      break;

2427
2428
2429
2430
2431
2432
2433
2434
    case X86Inst::kEncodingExtMovq:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o0.getId();
        rbReg = o1.getId();

        // MMX <- MMX
        if (X86Reg::isMm(o0) && X86Reg::isMm(o1)) {
          opCode = X86Inst::kOpCode_PP_00 | X86Inst::kOpCode_MM_0F | 0x6F;
2435

2436
2437
2438
2439
2440
2441
          if (!(options & X86Inst::kOptionModMR))
            goto EmitX86R;

          opCode += 0x10;
          Utils::swap(opReg, rbReg);
          goto EmitX86R;
2442
2443
        }

2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
        // XMM <- XMM
        if (X86Reg::isXmm(o0) && X86Reg::isXmm(o1)) {
          opCode = X86Inst::kOpCode_PP_F3 | X86Inst::kOpCode_MM_0F | 0x7E;

          if (!(options & X86Inst::kOptionModMR))
            goto EmitX86R;

          opCode = X86Inst::kOpCode_PP_66 | X86Inst::kOpCode_MM_0F | 0xD6;
          Utils::swap(opReg, rbReg);
          goto EmitX86R;
2454
2455
        }

2456
2457
2458
2459
        // MMX <- XMM (MOVDQ2Q)
        if (X86Reg::isMm(o0) && X86Reg::isXmm(o1)) {
          opCode = X86Inst::kOpCode_PP_F2 | X86Inst::kOpCode_MM_0F | 0xD6;
          goto EmitX86R;
2460
2461
        }

2462
2463
2464
2465
        // XMM <- MMX (MOVQ2DQ)
        if (X86Reg::isXmm(o0) && X86Reg::isMm(o1)) {
          opCode = X86Inst::kOpCode_PP_F3 | X86Inst::kOpCode_MM_0F | 0xD6;
          goto EmitX86R;
2466
2467
2468
        }
      }

2469
2470
2471
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = o0.getId();
        rmRel = &o1;
2472

2473
2474
2475
2476
        // MMX <- Mem
        if (X86Reg::isMm(o0)) {
          opCode = X86Inst::kOpCode_PP_00 | X86Inst::kOpCode_MM_0F | 0x6F;
          goto EmitX86M;
2477
2478
        }

2479
2480
2481
2482
        // XMM <- Mem
        if (X86Reg::isXmm(o0)) {
          opCode = X86Inst::kOpCode_PP_F3 | X86Inst::kOpCode_MM_0F | 0x7E;
          goto EmitX86M;
2483
2484
2485
        }
      }

2486
2487
2488
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        opReg = o1.getId();
        rmRel = &o0;
2489

2490
2491
2492
2493
        // Mem <- MMX
        if (X86Reg::isMm(o1)) {
          opCode = X86Inst::kOpCode_PP_00 | X86Inst::kOpCode_MM_0F | 0x7F;
          goto EmitX86M;
2494
2495
        }

2496
2497
2498
2499
        // Mem <- XMM
        if (X86Reg::isXmm(o1)) {
          opCode = X86Inst::kOpCode_PP_66 | X86Inst::kOpCode_MM_0F | 0xD6;
          goto EmitX86M;
2500
2501
2502
        }
      }

2503
2504
2505
      // MOVQ in other case is simply a MOVD instruction promoted to 64-bit.
      opCode |= X86Inst::kOpCode_W;
      goto CaseExtMovd;
2506

2507
2508
2509
    case X86Inst::kEncodingExtRm_XMM0:
      if (ASMJIT_UNLIKELY(!o2.isNone() && !X86Reg::isXmm(o2, 0)))
        goto InvalidInstruction;
2510

2511
2512
      isign3 &= 0x3F;
      goto CaseExtRm;
2513

2514
2515
2516
    case X86Inst::kEncodingExtRm_ZDI:
      if (ASMJIT_UNLIKELY(!o2.isNone() && !x86IsImplicitMem(o2, X86Gp::kIdDi)))
        goto InvalidInstruction;
2517

2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
      isign3 &= 0x3F;
      goto CaseExtRm;

    case X86Inst::kEncodingExtRm_Wx:
      ADD_REX_W(X86Reg::isGpq(o0) || o1.getSize() == 8);
      ASMJIT_FALLTHROUGH;

    case X86Inst::kEncodingExtRm:
CaseExtRm:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o0.getId();
        rbReg = o1.getId();
        goto EmitX86R;
2531
2532
      }

2533
2534
2535
2536
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitX86M;
2537
2538
2539
      }
      break;

2540
2541
2542
    case X86Inst::kEncodingExtRm_P:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        ADD_66H_P(X86Reg::isXmm(o0) | X86Reg::isXmm(o1));
2543

2544
2545
2546
        opReg = o0.getId();
        rbReg = o1.getId();
        goto EmitX86R;
2547
2548
      }

2549
2550
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        ADD_66H_P(X86Reg::isXmm(o0));
2551

2552
2553
2554
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitX86M;
2555
2556
2557
      }
      break;

2558
2559
2560
2561
2562
    case X86Inst::kEncodingExtRmRi:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o0.getId();
        rbReg = o1.getId();
        goto EmitX86R;
2563
2564
      }

2565
2566
2567
2568
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitX86M;
2569
2570
2571
      }

      // The following instruction uses the secondary opcode.
2572
      opCode = commonData->getAltOpCode();
2573
      opReg  = x86ExtractO(opCode);
2574

2575
2576
      if (isign3 == ENC_OPS2(Reg, Imm)) {
        imVal = static_cast<const Imm&>(o1).getInt64();
2577
2578
        imLen = 1;

2579
2580
        rbReg = o0.getId();
        goto EmitX86R;
2581
2582
2583
      }
      break;

2584
2585
2586
    case X86Inst::kEncodingExtRmRi_P:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        ADD_66H_P(X86Reg::isXmm(o0) | X86Reg::isXmm(o1));
2587

2588
2589
2590
        opReg = o0.getId();
        rbReg = o1.getId();
        goto EmitX86R;
2591
2592
      }

2593
2594
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        ADD_66H_P(X86Reg::isXmm(o0));
2595

2596
2597
2598
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitX86M;
2599
2600
2601
      }

      // The following instruction uses the secondary opcode.
2602
      opCode = commonData->getAltOpCode();
2603
      opReg  = x86ExtractO(opCode);
2604

2605
2606
      if (isign3 == ENC_OPS2(Reg, Imm)) {
        ADD_66H_P(X86Reg::isXmm(o0));
2607

2608
        imVal = static_cast<const Imm&>(o1).getInt64();
2609
2610
        imLen = 1;

2611
2612
        rbReg = o0.getId();
        goto EmitX86R;
2613
2614
2615
      }
      break;

2616
2617
    case X86Inst::kEncodingExtRmi:
      imVal = static_cast<const Imm&>(o2).getInt64();
2618
2619
      imLen = 1;

2620
2621
2622
2623
      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
        opReg = o0.getId();
        rbReg = o1.getId();
        goto EmitX86R;
2624
2625
      }

2626
2627
2628
2629
      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitX86M;
2630
2631
2632
      }
      break;

2633
2634
    case X86Inst::kEncodingExtRmi_P:
      imVal = static_cast<const Imm&>(o2).getInt64();
2635
2636
      imLen = 1;

2637
2638
      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
        ADD_66H_P(X86Reg::isXmm(o0) | X86Reg::isXmm(o1));
2639

2640
2641
2642
        opReg = o0.getId();
        rbReg = o1.getId();
        goto EmitX86R;
2643
2644
      }

2645
2646
      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
        ADD_66H_P(X86Reg::isXmm(o0));
2647

2648
2649
2650
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitX86M;
2651
2652
2653
      }
      break;

2654
    // ------------------------------------------------------------------------
2655
    // [Extrq / Insertq (SSE4A)]
2656
2657
    // ------------------------------------------------------------------------

2658
2659
2660
    case X86Inst::kEncodingExtExtrq:
      opReg = o0.getId();
      rbReg = o1.getId();
2661

2662
2663
      if (isign3 == ENC_OPS2(Reg, Reg))
        goto EmitX86R;
2664
2665

      // The following instruction uses the secondary opcode.
2666
      opCode = commonData->getAltOpCode();
2667

2668
2669
2670
      if (isign3 == ENC_OPS3(Reg, Imm, Imm)) {
        imVal = (static_cast<const Imm&>(o1).getUInt32()     ) +
                (static_cast<const Imm&>(o2).getUInt32() << 8) ;
2671
2672
        imLen = 2;

2673
2674
        rbReg = x86ExtractO(opCode);
        goto EmitX86R;
2675
2676
2677
      }
      break;

2678
2679
2680
2681
    case X86Inst::kEncodingExtInsertq: {
      const uint32_t isign4 = isign3 + (o3.getOp() << 9);
      opReg = o0.getId();
      rbReg = o1.getId();
2682

2683
2684
      if (isign4 == ENC_OPS2(Reg, Reg))
        goto EmitX86R;
2685
2686

      // The following instruction uses the secondary opcode.
2687
      opCode = commonData->getAltOpCode();
2688

2689
2690
2691
      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
        imVal = (static_cast<const Imm&>(o2).getUInt32()     ) +
                (static_cast<const Imm&>(o3).getUInt32() << 8) ;
2692
        imLen = 2;
2693
        goto EmitX86R;
2694
2695
      }
      break;
2696
    }
2697

2698
    // ------------------------------------------------------------------------
2699
    // [3dNow]
2700
2701
    // ------------------------------------------------------------------------

2702
    case X86Inst::kEncodingExt3dNow:
2703
2704
2705
2706
2707
      // Every 3dNow instruction starts with 0x0F0F and the actual opcode is
      // stored as 8-bit immediate.
      imVal = opCode & 0xFF;
      imLen = 1;

2708
2709
      opCode = X86Inst::kOpCode_MM_0F | 0x0F;
      opReg = o0.getId();
2710

2711
2712
2713
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        rbReg = o1.getId();
        goto EmitX86R;
2714
2715
      }

2716
2717
2718
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        rmRel = &o1;
        goto EmitX86M;
2719
2720
2721
2722
      }
      break;

    // ------------------------------------------------------------------------
2723
    // [VEX/EVEX]
2724
2725
    // ------------------------------------------------------------------------

2726
2727
    case X86Inst::kEncodingVexOp:
      goto EmitVexEvexOp;
2728

2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
    case X86Inst::kEncodingVexKmov:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o0.getId();
        rbReg = o1.getId();

        // Form 'k, reg'.
        if (X86Reg::isGp(o1)) {
          opCode = commonData->getAltOpCode();
          goto EmitVexEvexR;
        }
2739

2740
2741
2742
2743
2744
2745
2746
2747
2748
        // Form 'reg, k'.
        if (X86Reg::isGp(o0)) {
          opCode = commonData->getAltOpCode() + 1;
          goto EmitVexEvexR;
        }

        // Form 'k, k'.
        if (!(options & X86Inst::kOptionModMR))
          goto EmitVexEvexR;
2749

2750
2751
2752
        opCode++;
        Utils::swap(opReg, rbReg);
        goto EmitVexEvexR;
2753
2754
      }

2755
2756
2757
2758
2759
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = o0.getId();
        rmRel = &o1;

        goto EmitVexEvexM;
2760
2761
      }

2762
2763
2764
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        opReg = o1.getId();
        rmRel = &o0;
2765

2766
2767
2768
2769
        opCode++;
        goto EmitVexEvexM;
      }
      break;
2770

2771
2772
2773
2774
    case X86Inst::kEncodingVexM:
      if (isign3 == ENC_OPS1(Mem)) {
        rmRel = &o0;
        goto EmitVexEvexM;
2775
      }
2776
      break;
2777

2778
2779
2780
2781
2782
    case X86Inst::kEncodingVexM_VM:
      if (isign3 == ENC_OPS1(Mem)) {
        opCode |= x86OpCodeLByVMem(o0);
        rmRel = &o0;
        goto EmitVexEvexM;
2783
2784
2785
      }
      break;

2786
2787
    case X86Inst::kEncodingVexMr_Lx:
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize());
2788

2789
2790
2791
2792
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o1.getId();
        rbReg = o0.getId();
        goto EmitVexEvexR;
2793
2794
      }

2795
2796
2797
2798
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        opReg = o1.getId();
        rmRel = &o0;
        goto EmitVexEvexM;
2799
2800
2801
      }
      break;

2802
2803
2804
    case X86Inst::kEncodingVexMr_VM:
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        opCode |= std::max(x86OpCodeLByVMem(o0), x86OpCodeLBySize(o1.getSize()));
2805

2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
        opReg = o1.getId();
        rmRel = &o0;
        goto EmitVexEvexM;
      }
      break;

    case X86Inst::kEncodingVexMri_Lx:
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize());
      ASMJIT_FALLTHROUGH;

    case X86Inst::kEncodingVexMri:
      imVal = static_cast<const Imm&>(o2).getInt64();
2818
2819
      imLen = 1;

2820
2821
2822
2823
      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
        opReg = o1.getId();
        rbReg = o0.getId();
        goto EmitVexEvexR;
2824
2825
      }

2826
2827
2828
2829
      if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
        opReg = o1.getId();
        rmRel = &o0;
        goto EmitVexEvexM;
2830
2831
2832
      }
      break;

2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
    case X86Inst::kEncodingVexRm_ZDI:
      if (ASMJIT_UNLIKELY(!o2.isNone() && !x86IsImplicitMem(o2, X86Gp::kIdDi)))
        goto InvalidInstruction;

      isign3 &= 0x3F;
      goto CaseVexRm;

    case X86Inst::kEncodingVexRm_Wx:
      ADD_REX_W(X86Reg::isGpq(o0) | X86Reg::isGpq(o1));
      goto CaseVexRm;

    case X86Inst::kEncodingVexRm_Lx:
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize());
      ASMJIT_FALLTHROUGH;
2847

2848
2849
2850
2851
2852
2853
    case X86Inst::kEncodingVexRm:
CaseVexRm:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o0.getId();
        rbReg = o1.getId();
        goto EmitVexEvexR;
2854
2855
      }

2856
2857
2858
2859
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitVexEvexM;
2860
2861
2862
      }
      break;

2863
2864
2865
2866
2867
2868
2869
2870
    case X86Inst::kEncodingVexRm_VM:
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opCode |= std::max(x86OpCodeLByVMem(o1), x86OpCodeLBySize(o0.getSize()));
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitVexEvexM;
      }
      break;
2871

2872
2873
2874
    case X86Inst::kEncodingVexRm_T1_4X: {
      if (!(options & kOptionOp4Op5Used))
        goto InvalidInstruction;
2875

2876
2877
2878
      if (X86Reg::isZmm(o0  ) && X86Reg::isZmm(o1) &&
          X86Reg::isZmm(o2  ) && X86Reg::isZmm(o3) &&
          X86Reg::isZmm(_op4) && _op5.isMem()) {
2879

2880
2881
2882
2883
2884
        // Registers [o1, o2, o3, _op4] must start aligned and must be consecutive.
        uint32_t i1 = o1.getId();
        uint32_t i2 = o2.getId();
        uint32_t i3 = o3.getId();
        uint32_t i4 = _op4.getId();
2885

2886
2887
2888
2889
2890
2891
        if (ASMJIT_UNLIKELY((i1 & 0x3) != 0 || i2 != i1 + 1 || i3 != i1 + 2 || i4 != i1 + 3))
          goto NotConsecutiveRegs;

        opReg = o0.getId();
        rmRel = &_op5;
        goto EmitVexEvexM;
2892
2893
      }
      break;
2894
    }
2895

2896
2897
2898
    case X86Inst::kEncodingVexRmi_Wx:
      ADD_REX_W(X86Reg::isGpq(o0) | X86Reg::isGpq(o1));
      goto CaseVexRmi;
2899

2900
2901
2902
    case X86Inst::kEncodingVexRmi_Lx:
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize());
      ASMJIT_FALLTHROUGH;
2903

2904
2905
2906
    case X86Inst::kEncodingVexRmi:
CaseVexRmi:
      imVal = static_cast<const Imm&>(o2).getInt64();
2907
2908
      imLen = 1;

2909
2910
2911
2912
      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
        opReg = o0.getId();
        rbReg = o1.getId();
        goto EmitVexEvexR;
2913
2914
      }

2915
2916
2917
2918
      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitVexEvexM;
2919
2920
2921
      }
      break;

2922
2923
2924
2925
2926
2927
2928
    case X86Inst::kEncodingVexRvm:
CaseVexRvm:
      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
CaseVexRvm_R:
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rbReg = o2.getId();
        goto EmitVexEvexR;
2929
2930
      }

2931
2932
2933
2934
      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o2;
        goto EmitVexEvexM;
2935
2936
2937
      }
      break;

2938
2939
2940
2941
2942
2943
2944
2945
    case X86Inst::kEncodingVexRvm_ZDX_Wx:
      if (ASMJIT_UNLIKELY(!o3.isNone() && !X86Reg::isGp(o3, X86Gp::kIdDx)))
        goto InvalidInstruction;
      ASMJIT_FALLTHROUGH;

    case X86Inst::kEncodingVexRvm_Wx:
      ADD_REX_W(X86Reg::isGpq(o0) | (o2.getSize() == 8));
      goto CaseVexRvm;
2946

2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
    case X86Inst::kEncodingVexRvm_Lx:
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize());
      goto CaseVexRvm;

    case X86Inst::kEncodingVexRvmr_Lx:
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize());
      ASMJIT_FALLTHROUGH;

    case X86Inst::kEncodingVexRvmr: {
      const uint32_t isign4 = isign3 + (o3.getOp() << 9);
      imVal = o3.getId() << 4;
2958
2959
      imLen = 1;

2960
2961
2962
2963
      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rbReg = o2.getId();
        goto EmitVexEvexR;
2964
2965
      }

2966
2967
2968
2969
      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o2;
        goto EmitVexEvexM;
2970
2971
      }
      break;
2972
    }
2973

2974
2975
2976
    case X86Inst::kEncodingVexRvmi_Lx:
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize());
      ASMJIT_FALLTHROUGH;
2977

2978
2979
2980
2981
2982
2983
2984
2985
2986
    case X86Inst::kEncodingVexRvmi: {
      const uint32_t isign4 = isign3 + (o3.getOp() << 9);
      imVal = static_cast<const Imm&>(o3).getInt64();
      imLen = 1;

      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rbReg = o2.getId();
        goto EmitVexEvexR;
2987
2988
      }

2989
2990
2991
2992
      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Imm)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o2;
        goto EmitVexEvexM;
2993
      }
2994
2995
      break;
    }
2996

2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
    case X86Inst::kEncodingVexRmv_Wx:
      ADD_REX_W(X86Reg::isGpq(o0) | X86Reg::isGpq(o2));
      ASMJIT_FALLTHROUGH;

    case X86Inst::kEncodingVexRmv:
      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o2.getId());
        rbReg = o1.getId();
        goto EmitVexEvexR;
      }
3007

3008
3009
3010
3011
      if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o2.getId());
        rmRel = &o1;
        goto EmitVexEvexM;
3012
3013
3014
      }
      break;

3015
3016
3017
3018
    case X86Inst::kEncodingVexRmvRm_VM:
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opCode  = commonData->getAltOpCode();
        opCode |= std::max(x86OpCodeLByVMem(o1), x86OpCodeLBySize(o0.getSize()));
3019

3020
3021
3022
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitVexEvexM;
3023
3024
      }

3025
3026
3027
3028
3029
3030
3031
3032
3033
      ASMJIT_FALLTHROUGH;

    case X86Inst::kEncodingVexRmv_VM:
      if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
        opCode |= std::max(x86OpCodeLByVMem(o1), x86OpCodeLBySize(o0.getSize() | o2.getSize()));

        opReg = x86PackRegAndVvvvv(o0.getId(), o2.getId());
        rmRel = &o1;
        goto EmitVexEvexM;
3034
      }
3035
      break;
3036
3037


3038
3039
3040
    case X86Inst::kEncodingVexRmvi: {
      const uint32_t isign4 = isign3 + (o3.getOp() << 9);
      imVal = static_cast<const Imm&>(o3).getInt64();
3041
3042
      imLen = 1;

3043
3044
3045
3046
      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o2.getId());
        rbReg = o1.getId();
        goto EmitVexEvexR;
3047
3048
      }

3049
3050
3051
3052
      if (isign4 == ENC_OPS4(Reg, Mem, Reg, Imm)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o2.getId());
        rmRel = &o1;
        goto EmitVexEvexM;
3053
3054
      }
      break;
3055
    }
3056

3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
    case X86Inst::kEncodingVexMovdMovq:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        if (X86Reg::isGp(o0)) {
          opCode = commonData->getAltOpCode();
          ADD_REX_W_BY_SIZE(o0.getSize());
          opReg = o1.getId();
          rbReg = o0.getId();
          goto EmitVexEvexR;
        }

        if (X86Reg::isGp(o1)) {
          ADD_REX_W_BY_SIZE(o1.getSize());
          opReg = o0.getId();
          rbReg = o1.getId();
          goto EmitVexEvexR;
        }
3073

3074
3075
3076
3077
3078
3079
3080
3081
3082
        // If this is a 'W' version (movq) then allow also vmovq 'xmm|xmm' form.
        if (opCode & X86Inst::kOpCode_EW) {
          opCode &= ~(X86Inst::kOpCode_PP_VEXMask | X86Inst::kOpCode_MM_Mask | 0xFF);
          opCode |=  (X86Inst::kOpCode_PP_F3      | X86Inst::kOpCode_MM_0F   | 0x7E);

          opReg = o0.getId();
          rbReg = o1.getId();
          goto EmitVexEvexR;
        }
3083
3084
      }

3085
3086
3087
3088
3089
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        if (opCode & X86Inst::kOpCode_EW) {
          opCode &= ~(X86Inst::kOpCode_PP_VEXMask | X86Inst::kOpCode_MM_Mask | 0xFF);
          opCode |=  (X86Inst::kOpCode_PP_F3      | X86Inst::kOpCode_MM_0F   | 0x7E);
        }
3090

3091
3092
3093
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitVexEvexM;
3094
3095
      }

3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
      // The following instruction uses the secondary opcode.
      opCode = commonData->getAltOpCode();

      if (isign3 == ENC_OPS2(Mem, Reg)) {
        if (opCode & X86Inst::kOpCode_EW) {
          opCode &= ~(X86Inst::kOpCode_PP_VEXMask | X86Inst::kOpCode_MM_Mask | 0xFF);
          opCode |=  (X86Inst::kOpCode_PP_66      | X86Inst::kOpCode_MM_0F   | 0xD6);
        }

        opReg = o1.getId();
        rmRel = &o0;
        goto EmitVexEvexM;
3108
3109
3110
      }
      break;

3111
3112
3113
    case X86Inst::kEncodingVexRmMr_Lx:
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize());
      ASMJIT_FALLTHROUGH;
3114

3115
3116
3117
3118
3119
    case X86Inst::kEncodingVexRmMr:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o0.getId();
        rbReg = o1.getId();
        goto EmitVexEvexR;
3120
3121
      }

3122
3123
3124
3125
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitVexEvexM;
3126
3127
3128
      }

      // The following instruction uses the secondary opcode.
3129
3130
      opCode &= X86Inst::kOpCode_LL_Mask;
      opCode |= commonData->getAltOpCode();
3131

3132
3133
3134
3135
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        opReg = o1.getId();
        rmRel = &o0;
        goto EmitVexEvexM;
3136
3137
3138
      }
      break;

3139
3140
3141
3142
    case X86Inst::kEncodingVexRvmRmv:
      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o2.getId());
        rbReg = o1.getId();
3143

3144
3145
        if (!(options & X86Inst::kOptionModMR))
          goto EmitVexEvexR;
3146

3147
3148
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rbReg = o2.getId();
3149

3150
3151
3152
        ADD_VEX_W(true);
        goto EmitVexEvexR;
      }
3153

3154
3155
3156
3157
      if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o2.getId());
        rmRel = &o1;
        goto EmitVexEvexM;
3158
3159
      }

3160
3161
3162
3163
3164
3165
      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o2;

        ADD_VEX_W(true);
        goto EmitVexEvexM;
3166
3167
3168
      }
      break;

3169
3170
3171
3172
3173
3174
3175
3176
3177
    case X86Inst::kEncodingVexRvmRmi_Lx:
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize());
      ASMJIT_FALLTHROUGH;

    case X86Inst::kEncodingVexRvmRmi:
      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rbReg = o2.getId();
        goto EmitVexEvexR;
3178
3179
      }

3180
3181
3182
3183
      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o2;
        goto EmitVexEvexM;
3184
3185
      }

3186
3187
3188
      // The following instructions use the secondary opcode.
      opCode &= X86Inst::kOpCode_LL_Mask;
      opCode |= commonData->getAltOpCode();
3189

3190
      imVal = static_cast<const Imm&>(o2).getInt64();
3191
3192
      imLen = 1;

3193
3194
3195
3196
      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
        opReg = o0.getId();
        rbReg = o1.getId();
        goto EmitVexEvexR;
3197
3198
      }

3199
3200
3201
3202
      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitVexEvexM;
3203
3204
3205
      }
      break;

3206
3207
3208
3209
    case X86Inst::kEncodingVexRvmRmvRmi:
      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o2.getId());
        rbReg = o1.getId();
3210

3211
3212
        if (!(options & X86Inst::kOptionModMR))
          goto EmitVexEvexR;
3213

3214
3215
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rbReg = o2.getId();
3216

3217
3218
        ADD_VEX_W(true);
        goto EmitVexEvexR;
3219
3220
      }

3221
3222
3223
      if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o2.getId());
        rmRel = &o1;
3224

3225
3226
3227
3228
3229
3230
        goto EmitVexEvexM;
      }

      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o2;
3231
3232

        ADD_VEX_W(true);
3233
        goto EmitVexEvexM;
3234
3235
      }

3236
3237
      // The following instructions use the secondary opcode.
      opCode = commonData->getAltOpCode();
3238

3239
3240
      imVal = static_cast<const Imm&>(o2).getInt64();
      imLen = 1;
3241

3242
3243
3244
3245
      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
        opReg = o0.getId();
        rbReg = o1.getId();
        goto EmitVexEvexR;
3246
3247
      }

3248
3249
3250
3251
      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitVexEvexM;
3252
      }
3253
      break;
3254

3255
3256
3257
3258
3259
    case X86Inst::kEncodingVexRvmMr:
      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rbReg = o2.getId();
        goto EmitVexEvexR;
3260
3261
      }

3262
3263
3264
3265
      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o2;
        goto EmitVexEvexM;
3266
3267
      }

3268
3269
      // The following instructions use the secondary opcode.
      opCode = commonData->getAltOpCode();
3270

3271
3272
3273
3274
3275
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = o1.getId();
        rbReg = o0.getId();
        goto EmitVexEvexR;
      }
3276

3277
3278
3279
3280
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        opReg = o1.getId();
        rmRel = &o0;
        goto EmitVexEvexM;
3281
3282
3283
      }
      break;

3284
3285
3286
    case X86Inst::kEncodingVexRvmMvr_Lx:
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize());
      ASMJIT_FALLTHROUGH;
3287

3288
3289
3290
3291
3292
3293
    case X86Inst::kEncodingVexRvmMvr:
      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rbReg = o2.getId();
        goto EmitVexEvexR;
      }
3294

3295
3296
3297
3298
      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o2;
        goto EmitVexEvexM;
3299
3300
      }

3301
3302
3303
      // The following instruction uses the secondary opcode.
      opCode &= X86Inst::kOpCode_LL_Mask;
      opCode |= commonData->getAltOpCode();
3304

3305
3306
3307
3308
3309
3310
      if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
        opReg = x86PackRegAndVvvvv(o2.getId(), o1.getId());
        rmRel = &o0;
        goto EmitVexEvexM;
      }
      break;
3311

3312
3313
3314
    case X86Inst::kEncodingVexRvmVmi_Lx:
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize());
      ASMJIT_FALLTHROUGH;
3315

3316
3317
3318
3319
3320
3321
    case X86Inst::kEncodingVexRvmVmi:
      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rbReg = o2.getId();
        goto EmitVexEvexR;
      }
3322

3323
3324
3325
3326
      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o2;
        goto EmitVexEvexM;
3327
3328
      }

3329
3330
3331
3332
      // The following instruction uses the secondary opcode.
      opCode &= X86Inst::kOpCode_LL_Mask;
      opCode |= commonData->getAltOpCode();
      opReg = x86ExtractO(opCode);
3333

3334
3335
      imVal = static_cast<const Imm&>(o2).getInt64();
      imLen = 1;
3336

3337
3338
3339
3340
      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
        opReg = x86PackRegAndVvvvv(opReg, o0.getId());
        rbReg = o1.getId();
        goto EmitVexEvexR;
3341
3342
      }

3343
3344
3345
3346
      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
        opReg = x86PackRegAndVvvvv(opReg, o0.getId());
        rmRel = &o1;
        goto EmitVexEvexM;
3347
3348
3349
      }
      break;

3350
3351
3352
    case X86Inst::kEncodingVexVm_Wx:
      ADD_REX_W(X86Reg::isGpq(o0) | X86Reg::isGpq(o1));
      ASMJIT_FALLTHROUGH;
3353

3354
3355
3356
3357
3358
    case X86Inst::kEncodingVexVm:
      if (isign3 == ENC_OPS2(Reg, Reg)) {
        opReg = x86PackRegAndVvvvv(opReg, o0.getId());
        rbReg = o1.getId();
        goto EmitVexEvexR;
3359
3360
      }

3361
3362
3363
3364
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = x86PackRegAndVvvvv(opReg, o0.getId());
        rmRel = &o1;
        goto EmitVexEvexM;
3365
3366
3367
      }
      break;

3368
3369
3370
3371
    case X86Inst::kEncodingVexEvexVmi_Lx:
      if (isign3 == ENC_OPS3(Reg, Mem, Imm))
        opCode |= X86Inst::kOpCode_MM_ForceEvex;
      ASMJIT_FALLTHROUGH;
3372

3373
3374
3375
    case X86Inst::kEncodingVexVmi_Lx:
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize());
      ASMJIT_FALLTHROUGH;
3376

3377
3378
3379
    case X86Inst::kEncodingVexVmi:
      imVal = static_cast<const Imm&>(o2).getInt64();
      imLen = 1;
3380

3381
3382
3383
3384
      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
        opReg = x86PackRegAndVvvvv(opReg, o0.getId());
        rbReg = o1.getId();
        goto EmitVexEvexR;
3385
3386
      }

3387
3388
3389
3390
      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
        opReg = x86PackRegAndVvvvv(opReg, o0.getId());
        rmRel = &o1;
        goto EmitVexEvexM;
3391
3392
3393
      }
      break;

3394
3395
3396
    case X86Inst::kEncodingVexRvrmRvmr_Lx:
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize());
      ASMJIT_FALLTHROUGH;
3397

3398
3399
    case X86Inst::kEncodingVexRvrmRvmr: {
      const uint32_t isign4 = isign3 + (o3.getOp() << 9);
3400

3401
3402
3403
      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
        imVal = o3.getId() << 4;
        imLen = 1;
3404

3405
3406
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rbReg = o2.getId();
3407

3408
        goto EmitVexEvexR;
3409
3410
      }

3411
3412
3413
      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
        imVal = o2.getId() << 4;
        imLen = 1;
3414

3415
3416
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o3;
3417

3418
3419
        ADD_VEX_W(true);
        goto EmitVexEvexM;
3420
3421
      }

3422
3423
3424
3425
3426
3427
3428
3429
      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
        imVal = o3.getId() << 4;
        imLen = 1;

        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o2;

        goto EmitVexEvexM;
3430
3431
      }
      break;
3432
    }
3433

3434
3435
3436
    case X86Inst::kEncodingVexRvrmiRvmri_Lx: {
      if (!(options & CodeEmitter::kOptionOp4Op5Used) || !_op4.isImm())
        goto InvalidInstruction;
3437

3438
3439
      const uint32_t isign4 = isign3 + (o3.getOp() << 9);
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize() | o2.getSize() | o3.getSize());
3440

3441
      imVal = static_cast<const Imm&>(_op4).getUInt8() & 0x0F;
3442
3443
      imLen = 1;

3444
3445
3446
3447
3448
3449
      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
        imVal |= o3.getId() << 4;
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rbReg = o2.getId();

        goto EmitVexEvexR;
3450
3451
      }

3452
3453
3454
3455
3456
3457
3458
      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
        imVal |= o2.getId() << 4;
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o3;

        ADD_VEX_W(true);
        goto EmitVexEvexM;
3459
3460
      }

3461
3462
3463
3464
      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
        imVal |= o3.getId() << 4;
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o2;
3465

3466
3467
3468
3469
        goto EmitVexEvexM;
      }
      break;
    }
3470

3471
3472
3473
3474
    case X86Inst::kEncodingVexMovssMovsd:
      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
        goto CaseVexRvm_R;
      }
3475

3476
3477
3478
3479
      if (isign3 == ENC_OPS2(Reg, Mem)) {
        opReg = o0.getId();
        rmRel = &o1;
        goto EmitVexEvexM;
3480
3481
      }

3482
3483
3484
3485
3486
      if (isign3 == ENC_OPS2(Mem, Reg)) {
        opCode = commonData->getAltOpCode();
        opReg = o1.getId();
        rmRel = &o0;
        goto EmitVexEvexM;
3487
3488
3489
      }
      break;

3490
3491
3492
3493
3494
3495
3496
3497
    // ------------------------------------------------------------------------
    // [FMA4]
    // ------------------------------------------------------------------------

    case X86Inst::kEncodingFma4_Lx:
      // It's fine to just check the first operand, second is just for sanity.
      opCode |= x86OpCodeLBySize(o0.getSize() | o1.getSize());
      ASMJIT_FALLTHROUGH;
3498

3499
3500
3501
3502
3503
    case X86Inst::kEncodingFma4: {
      const uint32_t isign4 = isign3 + (o3.getOp() << 9);

      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
        imVal = o3.getId() << 4;
3504
3505
        imLen = 1;

3506
3507
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rbReg = o2.getId();
3508

3509
        goto EmitVexEvexR;
3510
3511
      }

3512
3513
      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
        imVal = o2.getId() << 4;
3514
3515
        imLen = 1;

3516
3517
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o3;
3518
3519

        ADD_VEX_W(true);
3520
        goto EmitVexEvexM;
3521
3522
      }

3523
3524
      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
        imVal = o3.getId() << 4;
3525
3526
        imLen = 1;

3527
3528
        opReg = x86PackRegAndVvvvv(o0.getId(), o1.getId());
        rmRel = &o2;
3529

3530
        goto EmitVexEvexM;
3531
3532
      }
      break;
3533
    }
3534
  }
3535
  goto InvalidInstruction;
3536
3537
3538
3539
3540

  // --------------------------------------------------------------------------
  // [Emit - X86]
  // --------------------------------------------------------------------------

3541
3542
EmitX86OpMovAbs:
  imLen = getGpSize();
3543

3544
3545
3546
  // Segment-override prefix.
  if (rmRel->as<X86Mem>().hasSegment())
    EMIT_BYTE(x86SegmentPrefix[rmRel->as<X86Mem>().getSegmentId()]);
3547

3548
3549
3550
EmitX86Op:
  // Emit mandatory instruction prefix.
  EMIT_PP(opCode);
3551

3552
3553
3554
3555
3556
3557
3558
  // Emit REX prefix (64-bit only).
  {
    uint32_t rex = x86ExtractREX(opCode, options);
    if (rex) {
      if (options & X86Inst::_kOptionInvalidRex)
        goto InvalidRexPrefix;
      EMIT_BYTE(rex | kX86ByteRex);
3559
    }
3560
3561
  }

3562
3563
  // Emit instruction opcodes.
  EMIT_MM_OP(opCode);
3564

3565
  if (imLen != 0)
3566
    goto EmitImm;
3567
  else
3568
    goto EmitDone;
3569

3570
3571
EmitX86OpReg:
  // Emit mandatory instruction prefix.
3572
3573
  EMIT_PP(opCode);

3574
3575
3576
3577
3578
3579
3580
3581
  // Emit REX prefix (64-bit only).
  {
    uint32_t rex = x86ExtractREX(opCode, options) |
                   (opReg >> 3); // Rex.B (0x01).
    if (rex) {
      EMIT_BYTE(rex | kX86ByteRex);
      if (options & X86Inst::_kOptionInvalidRex)
        goto InvalidRexPrefix;
3582
3583
      opReg &= 0x7;
    }
3584
3585
  }

3586
  // Emit instruction opcodes.
3587
  opCode += opReg;
3588
  EMIT_MM_OP(opCode);
3589
3590

  if (imLen != 0)
3591
    goto EmitImm;
3592
  else
3593
    goto EmitDone;
3594

3595
3596
3597
3598
3599
3600
3601
EmitX86OpImplicitMem:
  // NOTE: Don't change the emit order here, it's compatible with KeyStone/LLVM.
  rmInfo = x86MemInfo[rmRel->as<X86Mem>().getBaseIndexType()];
  if (ASMJIT_UNLIKELY(rmRel->as<X86Mem>().hasOffset() || (rmInfo & kX86MemInfo_Index)))
    goto InvalidInstruction;

  // Emit mandatory instruction prefix.
3602
3603
  EMIT_PP(opCode);

3604
3605
3606
3607
3608
3609
3610
3611
3612
  // Emit REX prefix (64-bit only).
  {
    uint32_t rex = x86ExtractREX(opCode, options);
    if (rex) {
      if (options & X86Inst::_kOptionInvalidRex)
        goto InvalidRexPrefix;
      EMIT_BYTE(rex | kX86ByteRex);
    }
  }
3613

3614
3615
3616
  // Segment-override prefix.
  if (rmRel->as<X86Mem>().hasSegment())
    EMIT_BYTE(x86SegmentPrefix[rmRel->as<X86Mem>().getSegmentId()]);
3617

3618
3619
3620
3621
3622
3623
  // Address-override prefix.
  if (rmInfo & _getAddressOverrideMask())
    EMIT_BYTE(0x67);

  // Emit instruction opcodes.
  EMIT_MM_OP(opCode);
3624

3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
  if (imLen != 0)
    goto EmitImm;
  else
    goto EmitDone;

EmitX86R:
  // Mandatory instruction prefix.
  EMIT_PP(opCode);

  // Rex prefix (64-bit only).
  {
    uint32_t rex = x86ExtractREX(opCode, options) |
                   ((opReg & 0x08) >> 1) | // REX.R (0x04).
                   ((rbReg       ) >> 3) ; // REX.B (0x01).
    if (rex) {
      if (options & X86Inst::_kOptionInvalidRex)
        goto InvalidRexPrefix;
      EMIT_BYTE(rex | kX86ByteRex);
      opReg &= 0x07;
      rbReg &= 0x07;
3645
3646
3647
3648
    }
  }

  // Instruction opcodes.
3649
  EMIT_MM_OP(opCode);
3650
  // ModR.
3651
  EMIT_BYTE(x86EncodeMod(3, opReg, rbReg));
3652
3653

  if (imLen != 0)
3654
    goto EmitImm;
3655
  else
3656
    goto EmitDone;
3657

3658
3659
3660
3661
EmitX86M:
  ASMJIT_ASSERT(rmRel != nullptr);
  ASMJIT_ASSERT(rmRel->getOp() == Operand::kOpMem);
  rmInfo = x86MemInfo[rmRel->as<X86Mem>().getBaseIndexType()];
3662

3663
3664
  // GP instructions have never compressed displacement specified.
  ASMJIT_ASSERT((opCode & X86Inst::kOpCode_CDSHL_Mask) == 0);
3665

3666
3667
3668
  // Segment-override prefix.
  if (rmRel->as<X86Mem>().hasSegment())
    EMIT_BYTE(x86SegmentPrefix[rmRel->as<X86Mem>().getSegmentId()]);
3669

3670
3671
3672
  // Address-override prefix.
  if (rmInfo & _getAddressOverrideMask())
    EMIT_BYTE(0x67);
3673
3674
3675
3676

  // Mandatory instruction prefix.
  EMIT_PP(opCode);

3677
3678
  rbReg = rmRel->as<X86Mem>().getBaseId();
  rxReg = rmRel->as<X86Mem>().getIndexId();
3679

3680
3681
3682
  // REX prefix (64-bit only).
  {
    uint32_t rex;
3683

3684
3685
3686
    rex  = (rbReg >> 3) & 0x01; // REX.B (0x01).
    rex |= (rxReg >> 2) & 0x02; // REX.X (0x02).
    rex |= (opReg >> 1) & 0x04; // REX.R (0x04).
3687

3688
3689
    rex &= rmInfo;
    rex |= x86ExtractREX(opCode, options);
3690

3691
3692
3693
3694
3695
3696
    if (rex) {
      if (options & X86Inst::_kOptionInvalidRex)
        goto InvalidRexPrefix;
      EMIT_BYTE(rex | kX86ByteRex);
      opReg &= 0x07;
    }
3697
3698
3699
  }

  // Instruction opcodes.
3700
  EMIT_MM_OP(opCode);
3701
3702
3703
  // ... Fall through ...

  // --------------------------------------------------------------------------
3704
  // [Emit - MOD/SIB]
3705
3706
  // --------------------------------------------------------------------------

3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
EmitModSib:
  if (!(rmInfo & (kX86MemInfo_Index | kX86MemInfo_67H_X86))) {
    // ==========|> [BASE + DISP8|DISP32].
    if (rmInfo & kX86MemInfo_BaseGp) {
      rbReg &= 0x7;
      relOffset = rmRel->as<X86Mem>().getOffsetLo32();

      uint32_t mod = x86EncodeMod(0, opReg, rbReg);
      if (rbReg == X86Gp::kIdSp) {
        // [XSP|R12].
        if (relOffset == 0) {
          EMIT_BYTE(mod);
3719
3720
          EMIT_BYTE(x86EncodeSib(0, 4, 4));
        }
3721
        // [XSP|R12 + DISP8|DISP32].
3722
        else {
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
          uint32_t cdShift = (opCode & X86Inst::kOpCode_CDSHL_Mask) >> X86Inst::kOpCode_CDSHL_Shift;
          int32_t cdOffset = relOffset >> cdShift;

          if (Utils::isInt8(cdOffset) && relOffset == (cdOffset << cdShift)) {
            EMIT_BYTE(mod + 0x40); // <- MOD(1, opReg, rbReg).
            EMIT_BYTE(x86EncodeSib(0, 4, 4));
            EMIT_BYTE(cdOffset & 0xFF);
          }
          else {
            EMIT_BYTE(mod + 0x80); // <- MOD(2, opReg, rbReg).
            EMIT_BYTE(x86EncodeSib(0, 4, 4));
            EMIT_32(relOffset);
          }
3736
3737
        }
      }
3738
3739
3740
      else if (rbReg != X86Gp::kIdBp && relOffset == 0) {
        // [BASE].
        EMIT_BYTE(mod);
3741
3742
      }
      else {
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
        // [BASE + DISP8|DISP32].
        uint32_t cdShift = (opCode & X86Inst::kOpCode_CDSHL_Mask) >> X86Inst::kOpCode_CDSHL_Shift;
        int32_t cdOffset = relOffset >> cdShift;

        if (Utils::isInt8(cdOffset) && relOffset == (cdOffset << cdShift)) {
          EMIT_BYTE(mod + 0x40);
          EMIT_BYTE(cdOffset & 0xFF);
        }
        else {
          EMIT_BYTE(mod + 0x80);
          EMIT_32(relOffset);
        }
3755
3756
      }
    }
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
    // ==========|> [ABSOLUTE | DISP32].
    else if (!(rmInfo & (kX86MemInfo_BaseLabel | kX86MemInfo_BaseRip))) {
      if (is32Bit()) {
        relOffset = rmRel->as<X86Mem>().getOffsetLo32();
        EMIT_BYTE(x86EncodeMod(0, opReg, 5));
        EMIT_32(relOffset);
      }
      else {
        uint64_t baseAddress = getCodeInfo().getBaseAddress();
        relOffset = rmRel->as<X86Mem>().getOffsetLo32();

        // Prefer absolute addressing mode if FS|GS segment override is present.
        bool absoluteValid = rmRel->as<X86Mem>().getOffsetHi32() == (relOffset >> 31);
        bool preferAbsolute = (rmRel->as<X86Mem>().getSegmentId() >= X86Seg::kIdFs) || rmRel->as<X86Mem>().isAbs();

        // If we know the base address and the memory operand points to an
        // absolute address it's possible to calculate REL32 that can be
        // be used as [RIP+REL32] in 64-bit mode.
        if (baseAddress != Globals::kNoBaseAddress && !preferAbsolute) {
          const uint32_t kModRel32Size = 5;
          uint64_t rip64 = baseAddress +
            static_cast<uint64_t>((uintptr_t)(cursor - _bufferData)) + imLen + kModRel32Size;

          uint64_t rel64 = static_cast<uint64_t>(rmRel->as<X86Mem>().getOffset()) - rip64;
          if (Utils::isInt32(static_cast<int64_t>(rel64))) {
            EMIT_BYTE(x86EncodeMod(0, opReg, 5));
            EMIT_32(static_cast<uint32_t>(rel64 & 0xFFFFFFFFU));
            if (imLen != 0)
              goto EmitImm;
            else
              goto EmitDone;
          }
        }
3790

3791
3792
        if (ASMJIT_UNLIKELY(!absoluteValid))
          goto InvalidAddress64Bit;
3793
3794

        EMIT_BYTE(x86EncodeMod(0, opReg, 4));
3795
3796
        EMIT_BYTE(x86EncodeSib(0, 4, 5));
        EMIT_32(relOffset);
3797
3798
      }
    }
3799
    // ==========|> [LABEL|RIP + DISP32]
3800
    else {
3801
      EMIT_BYTE(x86EncodeMod(0, opReg, 5));
3802

3803
3804
3805
3806
      if (is32Bit()) {
EmitModSib_LabelRip_X86:
        if (ASMJIT_UNLIKELY(_code->_relocations.willGrow(&_code->_baseHeap) != kErrorOk))
          goto NoHeapMemory;
3807

3808
3809
3810
3811
3812
        relOffset = rmRel->as<X86Mem>().getOffsetLo32();
        if (rmInfo & kX86MemInfo_BaseLabel) {
          // [LABEL->ABS].
          label = _code->getLabelEntry(rmRel->as<X86Mem>().getBaseId());
          if (!label) goto InvalidLabel;
3813

3814
3815
          err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs, 4);
          if (ASMJIT_UNLIKELY(err)) goto Failed;
3816

3817
3818
3819
          re->_sourceSectionId = _section->getId();
          re->_sourceOffset = static_cast<uint64_t>((uintptr_t)(cursor - _bufferData));
          re->_data = static_cast<int64_t>(relOffset);
3820

3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
          if (label->isBound()) {
            // Bound label.
            re->_data += static_cast<uint64_t>(label->getOffset());
            EMIT_32(0);
          }
          else {
            // Non-bound label.
            relOffset = -4 - imLen;
            relSize = 4;
            goto EmitRel;
          }
        }
        else {
          // [RIP->ABS].
          err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs, 4);
          if (ASMJIT_UNLIKELY(err)) goto Failed;

          re->_sourceSectionId = _section->getId();
          re->_sourceOffset = static_cast<uint64_t>((uintptr_t)(cursor - _bufferData));
          re->_data = re->_sourceOffset + static_cast<uint64_t>(static_cast<int64_t>(relOffset));
          EMIT_32(0);
        }
3843
3844
      }
      else {
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
        relOffset = rmRel->as<X86Mem>().getOffsetLo32();
        if (rmInfo & kX86MemInfo_BaseLabel) {
          // [RIP].
          label = _code->getLabelEntry(rmRel->as<X86Mem>().getBaseId());
          if (!label) goto InvalidLabel;

          relOffset -= (4 + imLen);
          if (label->isBound()) {
            // Bound label.
            relOffset += label->getOffset() - static_cast<int32_t>((intptr_t)(cursor - _bufferData));
            EMIT_32(static_cast<int32_t>(relOffset));
          }
          else {
            // Non-bound label.
            relSize = 4;
            goto EmitRel;
          }
        }
        else {
          // [RIP].
          EMIT_32(static_cast<int32_t>(relOffset));
        }
3867
3868
3869
      }
    }
  }
3870
3871
3872
3873
3874
  else if (!(rmInfo & kX86MemInfo_67H_X86)) {
    // ESP|RSP can't be used as INDEX in pure SIB mode, however, VSIB mode
    // allows XMM4|YMM4|ZMM4 (that's why the check is before the label).
    if (ASMJIT_UNLIKELY(rxReg == X86Gp::kIdSp))
      goto InvalidAddressIndex;
3875

3876
3877
EmitModVSib:
    rxReg &= 0x7;
3878

3879
3880
3881
3882
    // ==========|> [BASE + INDEX + DISP8|DISP32].
    if (rmInfo & kX86MemInfo_BaseGp) {
      rbReg &= 0x7;
      relOffset = rmRel->as<X86Mem>().getOffsetLo32();
3883

3884
3885
3886
3887
3888
3889
3890
      uint32_t mod = x86EncodeMod(0, opReg, 4);
      uint32_t sib = x86EncodeSib(rmRel->as<X86Mem>().getShift(), rxReg, rbReg);

      if (relOffset == 0 && rbReg != X86Gp::kIdBp) {
        // [BASE + INDEX << SHIFT].
        EMIT_BYTE(mod);
        EMIT_BYTE(sib);
3891
3892
      }
      else {
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
        uint32_t cdShift = (opCode & X86Inst::kOpCode_CDSHL_Mask) >> X86Inst::kOpCode_CDSHL_Shift;
        int32_t cdOffset = relOffset >> cdShift;

        if (Utils::isInt8(cdOffset) && relOffset == (cdOffset << cdShift)) {
          // [BASE + INDEX << SHIFT + DISP8].
          EMIT_BYTE(mod + 0x40); // <- MOD(1, opReg, 4).
          EMIT_BYTE(sib);
          EMIT_BYTE(cdOffset);
        }
        else {
          // [BASE + INDEX << SHIFT + DISP32].
          EMIT_BYTE(mod + 0x80); // <- MOD(2, opReg, 4).
          EMIT_BYTE(sib);
          EMIT_32(relOffset);
        }
3908
3909
      }
    }
3910
3911
3912
    // ==========|> [INDEX + DISP32].
    else if (!(rmInfo & (kX86MemInfo_BaseLabel | kX86MemInfo_BaseRip))) {
      // [INDEX << SHIFT + DISP32].
3913
      EMIT_BYTE(x86EncodeMod(0, opReg, 4));
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
      EMIT_BYTE(x86EncodeSib(rmRel->as<X86Mem>().getShift(), rxReg, 5));

      relOffset = rmRel->as<X86Mem>().getOffsetLo32();
      EMIT_32(relOffset);
    }
    // ==========|> [LABEL|RIP + INDEX + DISP32].
    else {
      if (is32Bit()) {
        EMIT_BYTE(x86EncodeMod(0, opReg, 4));
        EMIT_BYTE(x86EncodeSib(rmRel->as<X86Mem>().getShift(), rxReg, 5));
        goto EmitModSib_LabelRip_X86;
3925
3926
      }
      else {
3927
3928
        // NOTE: This also handles VSIB+RIP, which is not allowed in 64-bit mode.
        goto InvalidAddress;
3929
3930
3931
      }
    }
  }
3932
3933
3934
  else {
    // 16-bit address mode (32-bit mode with 67 override prefix).
    relOffset = (static_cast<int32_t>(rmRel->as<X86Mem>().getOffsetLo32()) << 16) >> 16;
3935

3936
3937
3938
3939
3940
    // NOTE: 16-bit addresses don't use SIB byte and their encoding differs. We
    // use a table-based approach to calculate the proper MOD byte as it's easier.
    // Also, not all BASE [+ INDEX] combinations are supported in 16-bit mode, so
    // this may fail.
    const uint32_t kBaseGpIdx = (kX86MemInfo_BaseGp | kX86MemInfo_Index);
3941

3942
3943
3944
    if (rmInfo & kBaseGpIdx) {
      // ==========|> [BASE + INDEX + DISP16].
      uint32_t mod;
3945

3946
3947
      rbReg &= 0x7;
      rxReg &= 0x7;
3948

3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
      if ((rmInfo & kBaseGpIdx) == kBaseGpIdx) {
        uint32_t shf = rmRel->as<X86Mem>().getShift();
        if (ASMJIT_UNLIKELY(shf != 0))
          goto InvalidAddress;
        mod = x86Mod16BaseIndexTable[(rbReg << 3) + rxReg];
      }
      else {
        if (rmInfo & kX86MemInfo_Index)
          rbReg = rxReg;
        mod = x86Mod16BaseTable[rbReg];
      }

      if (ASMJIT_UNLIKELY(mod == 0xFF))
        goto InvalidAddress;

      mod += opReg << 3;
      if (relOffset == 0 && mod != 0x06) {
        EMIT_BYTE(mod);
      }
      else if (Utils::isInt8(relOffset)) {
        EMIT_BYTE(mod + 0x40);
        EMIT_BYTE(relOffset);
      }
      else {
        EMIT_BYTE(mod + 0x80);
        EMIT_16(relOffset);
      }
    }
    else {
      // Not supported in 16-bit addresses.
      if (rmInfo & (kX86MemInfo_BaseRip | kX86MemInfo_BaseLabel))
        goto InvalidAddress;

      // ==========|> [DISP16].
      EMIT_BYTE(opReg | 0x06);
      EMIT_16(relOffset);
    }
3986
  }
3987
3988
3989
3990
3991

  if (imLen != 0)
    goto EmitImm;
  else
    goto EmitDone;
3992
3993

  // --------------------------------------------------------------------------
3994
  // [Emit - FPU]
3995
3996
  // --------------------------------------------------------------------------

3997
EmitFpuOp:
3998
3999
4000
  // Mandatory instruction prefix.
  EMIT_PP(opCode);

4001
4002
4003
4004
  // FPU instructions consist of two opcodes.
  EMIT_BYTE(opCode >> X86Inst::kOpCode_FPU_2B_Shift);
  EMIT_BYTE(opCode);
  goto EmitDone;
4005
4006

  // --------------------------------------------------------------------------
4007
  // [Emit - VEX / EVEX]
4008
4009
  // --------------------------------------------------------------------------

4010
EmitVexEvexOp:
4011
  {
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
    // These don't use immediate.
    ASMJIT_ASSERT(imLen == 0);

    // Only 'vzeroall' and 'vzeroupper' instructions use this encoding, they
    // don't define 'W' to be '1' so we can just check the 'mmmmm' field. Both
    // functions can encode by using VEV2 prefix so VEV3 is basically only used
    // when forced from outside.
    ASMJIT_ASSERT((opCode & X86Inst::kOpCode_W) == 0);

    uint32_t x = ((opCode & X86Inst::kOpCode_MM_Mask   ) >> (X86Inst::kOpCode_MM_Shift     )) |
                 ((opCode & X86Inst::kOpCode_LL_Mask   ) >> (X86Inst::kOpCode_LL_Shift - 10)) |
                 ((opCode & X86Inst::kOpCode_PP_VEXMask) >> (X86Inst::kOpCode_PP_Shift -  8)) |
                 ((options & X86Inst::kOptionVex3      ) >> (X86Inst::kOpCode_MM_Shift     )) ;
    if (x & 0x04U) {
      x  = (x & (0x4 ^ 0xFFFF)) << 8;                    // [00000000|00000Lpp|0000m0mm|00000000].
      x ^= (kX86ByteVex3) |                              // [........|00000Lpp|0000m0mm|__VEX3__].
           (0x07U  << 13) |                              // [........|00000Lpp|1110m0mm|__VEX3__].
           (0x0FU  << 19) |                              // [........|01111Lpp|1110m0mm|__VEX3__].
           (opCode << 24) ;                              // [_OPCODE_|01111Lpp|1110m0mm|__VEX3__].

      EMIT_32(x);
      goto EmitDone;
4034
4035
    }
    else {
4036
      x = ((x >> 8) ^ x) ^ 0xF9;
4037
      EMIT_BYTE(kX86ByteVex2);
4038
4039
4040
      EMIT_BYTE(x);
      EMIT_BYTE(opCode);
      goto EmitDone;
4041
4042
4043
    }
  }

4044
EmitVexEvexR:
4045
  {
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
    // VEX instructions use only 0-1 BYTE immediate.
    ASMJIT_ASSERT(imLen <= 1);

    // Construct `x` - a complete EVEX|VEX prefix.
    uint32_t x = ((opReg << 4) & 0xF980U) |              // [........|........|Vvvvv..R|R.......].
                 ((rbReg << 2) & 0x0060U) |              // [........|........|........|.BB.....].
                 (x86ExtractLLMM(opCode, options)) |     // [........|.LL.....|Vvvvv..R|RBBmmmmm].
                 (_extraReg.getId() << 16);              // [........|.LL..aaa|Vvvvv..R|RBBmmmmm].
    opReg &= 0x7;

    // Mark invalid VEX (force EVEX) case:               // [@.......|.LL..aaa|Vvvvv..R|RBBmmmmm].
    x |= (~commonData->getFlags() & X86Inst::kFlagVex) << (31 - Utils::firstBitOfT<X86Inst::kFlagVex>());

    // Handle AVX512 options by a single branch.
    const uint32_t kAvx512Options = X86Inst::kOptionZMask   |
                                    X86Inst::kOption1ToX    |
                                    X86Inst::kOptionSAE     |
                                    X86Inst::kOptionER      ;
    if (options & kAvx512Options) {
      // Memory broadcast without a memory operand is invalid.
      if (ASMJIT_UNLIKELY(options & X86Inst::kOption1ToX))
        goto InvalidBroadcast;

      // TODO: {sae} and {er}
      x |= options & X86Inst::kOptionZMask;              // [@.......|zLL..aaa|Vvvvv..R|RBBmmmmm].
    }
4072

4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
    // Check if EVEX is required by checking bits in `x` :  [@.......|xx...xxx|x......x|.x.x....].
    if (x & 0x80C78150U) {
      uint32_t y = ((x << 4) & 0x00080000U) |            // [@.......|....V...|........|........].
                   ((x >> 4) & 0x00000010U) ;            // [@.......|....V...|........|...R....].
      x  = (x & 0x00FF78E3U) | y;                        // [........|zLL.Vaaa|0vvvv000|RBBR00mm].
      x  = (x << 8) |                                    // [zLL.Vaaa|0vvvv000|RBBR00mm|00000000].
           ((opCode >> kSHR_W_PP) & 0x00830000U) |       // [zLL.Vaaa|Wvvvv0pp|RBBR00mm|00000000].
           ((opCode >> kSHR_W_EW) & 0x00800000U) ;       // [zLL.Vaaa|Wvvvv0pp|RBBR00mm|00000000] (added EVEX.W).
                                                         //      _     ____    ____
      x ^= 0x087CF000U | kX86ByteEvex;                   // [zLL.Vaaa|Wvvvv1pp|RBBR00mm|01100010].

      EMIT_32(x);
      EMIT_BYTE(opCode);

      rbReg &= 0x7;
      EMIT_BYTE(x86EncodeMod(3, opReg, rbReg));

      if (imLen == 0) goto EmitDone;
      EMIT_BYTE(imVal & 0xFF);
      goto EmitDone;
    }

    // Not EVEX, prepare `x` for VEX2 or VEX3:          x = [........|00L00000|0vvvv000|R0B0mmmm].
    x |= ((opCode >> (kSHR_W_PP + 8)) & 0x8300U) |       // [00000000|00L00000|Wvvvv0pp|R0B0mmmm].
         ((x      >> 11             ) & 0x0400U) ;       // [00000000|00L00000|WvvvvLpp|R0B0mmmm].
4098

4099
4100
4101
    // Check if VEX3 is required / forced:                  [........|........|x.......|..x..x..].
    if (x & 0x0008024U) {
      uint32_t xorMsk = x86VEXPrefix[x & 0xF] | (opCode << 24);
4102

4103
4104
4105
4106
4107
      // Clear 'FORCE-VEX3' bit and all high bits.
      x  = (x & (0x4 ^ 0xFFFF)) << 8;                    // [00000000|WvvvvLpp|R0B0m0mm|00000000].
                                                         //            ____    _ _
      x ^= xorMsk;                                       // [_OPCODE_|WvvvvLpp|R1Bmmmmm|VEX3|XOP].
      EMIT_32(x);
4108

4109
4110
      rbReg &= 0x7;
      EMIT_BYTE(x86EncodeMod(3, opReg, rbReg));
4111

4112
4113
4114
      if (imLen == 0) goto EmitDone;
      EMIT_BYTE(imVal & 0xFF);
      goto EmitDone;
4115
4116
    }
    else {
4117
4118
      // 'mmmmm' must be '00001'.
      ASMJIT_ASSERT((x & 0x1F) == 0x01);
4119

4120
      x = ((x >> 8) ^ x) ^ 0xF9;
4121
      EMIT_BYTE(kX86ByteVex2);
4122
4123
4124
4125
4126
4127
4128
4129
4130
      EMIT_BYTE(x);
      EMIT_BYTE(opCode);

      rbReg &= 0x7;
      EMIT_BYTE(x86EncodeMod(3, opReg, rbReg));

      if (imLen == 0) goto EmitDone;
      EMIT_BYTE(imVal & 0xFF);
      goto EmitDone;
4131
4132
4133
    }
  }

4134
4135
4136
4137
EmitVexEvexM:
  ASMJIT_ASSERT(rmRel != nullptr);
  ASMJIT_ASSERT(rmRel->getOp() == Operand::kOpMem);
  rmInfo = x86MemInfo[rmRel->as<X86Mem>().getBaseIndexType()];
4138

4139
4140
4141
  // Segment-override prefix.
  if (rmRel->as<X86Mem>().hasSegment())
    EMIT_BYTE(x86SegmentPrefix[rmRel->as<X86Mem>().getSegmentId()]);
4142

4143
4144
4145
  // Address-override prefix.
  if (rmInfo & _getAddressOverrideMask())
    EMIT_BYTE(0x67);
4146

4147
4148
  rbReg = rmRel->as<X86Mem>().hasBaseReg()  ? rmRel->as<X86Mem>().getBaseId()  : uint32_t(0);
  rxReg = rmRel->as<X86Mem>().hasIndexReg() ? rmRel->as<X86Mem>().getIndexId() : uint32_t(0);
4149

4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
  {
    // VEX instructions use only 0-1 BYTE immediate.
    ASMJIT_ASSERT(imLen <= 1);

    // Construct `x` - a complete EVEX|VEX prefix.
    uint32_t x = ((opReg << 4 ) & 0x0000F980U) |         // [........|........|Vvvvv..R|R.......].
                 ((rxReg << 3 ) & 0x00000040U) |         // [........|........|........|.X......].
                 ((rxReg << 15) & 0x00080000U) |         // [........|....X...|........|........].
                 ((rbReg << 2 ) & 0x00000020U) |         // [........|........|........|..B.....].
                 (x86ExtractLLMM(opCode, options)) |     // [........|.LL.X...|Vvvvv..R|RXBmmmmm].
                 (_extraReg.getId() << 16)         ;     // [........|.LL.Xaaa|Vvvvv..R|RXBmmmmm].
    opReg &= 0x07U;

    // Mark invalid VEX (force EVEX) case:               // [@.......|.LL.Xaaa|Vvvvv..R|RXBmmmmm].
    x |= (~commonData->getFlags() & X86Inst::kFlagVex) << (31 - Utils::firstBitOfT<X86Inst::kFlagVex>());

    // Handle AVX512 options by a single branch.
    const uint32_t kAvx512Options = X86Inst::kOption1ToX    |
                                    X86Inst::kOptionZMask   |
                                    X86Inst::kOptionSAE     |
                                    X86Inst::kOptionER      ;
    if (options & kAvx512Options) {
      // {er} and {sae} are both invalid if memory operand is used.
      if (ASMJIT_UNLIKELY(options & (X86Inst::kOptionSAE | X86Inst::kOptionER)))
        goto InvalidEROrSAE;

      x |= options & (X86Inst::kOption1ToX |             // [@.......|.LLbXaaa|Vvvvv..R|RXBmmmmm].
                      X86Inst::kOptionZMask);            // [@.......|zLLbXaaa|Vvvvv..R|RXBmmmmm].
    }
4179

4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
    // Check if EVEX is required by checking bits in `x` :  [@.......|xx.xxxxx|x......x|...x....].
    if (x & 0x80DF8110U) {
      uint32_t y = ((x << 4) & 0x00080000U) |            // [@.......|....V...|........|........].
                   ((x >> 4) & 0x00000010U) ;            // [@.......|....V...|........|...R....].
      x  = (x & 0x00FF78E3U) | y;                        // [........|zLLbVaaa|0vvvv000|RXBR00mm].
      x  = (x << 8) |                                    // [zLLbVaaa|0vvvv000|RBBR00mm|00000000].
           ((opCode >> kSHR_W_PP) & 0x00830000U) |       // [zLLbVaaa|Wvvvv0pp|RBBR00mm|00000000].
           ((opCode >> kSHR_W_EW) & 0x00800000U) ;       // [zLLbVaaa|Wvvvv0pp|RBBR00mm|00000000] (added EVEX.W).
                                                         //      _     ____    ____
      x ^= 0x087CF000U | kX86ByteEvex;                   // [zLLbVaaa|Wvvvv1pp|RBBR00mm|01100010].

      EMIT_32(x);
      EMIT_BYTE(opCode);

      if (opCode & 0x10000000U) {
        // Broadcast, change the compressed displacement scale to either x4 (SHL 2) or x8 (SHL 3)
        // depending on instruction's W. If 'W' is 1 'SHL' must be 3, otherwise it must be 2.
        opCode &=~static_cast<uint32_t>(X86Inst::kOpCode_CDSHL_Mask);
        opCode |= ((x & 0x00800000U) ? 3 : 2) << X86Inst::kOpCode_CDSHL_Shift;
      }
      else {
        // Add the compressed displacement 'SHF' to the opcode based on 'TTWLL'.
        uint32_t TTWLL = ((opCode >> (X86Inst::kOpCode_CDTT_Shift - 3)) & 0x18) +
                         ((opCode >> (X86Inst::kOpCode_W_Shift    - 2)) & 0x04) +
                         ((x >> 29) & 0x3);
        opCode += x86CDisp8SHL[TTWLL];
      }
    }
    else {
      // Not EVEX, prepare `x` for VEX2 or VEX3:        x = [........|00L00000|0vvvv000|RXB0mmmm].
      x |= ((opCode >> (kSHR_W_PP + 8)) & 0x8300U) |     // [00000000|00L00000|Wvvvv0pp|RXB0mmmm].
           ((x      >> 11             ) & 0x0400U) ;     // [00000000|00L00000|WvvvvLpp|RXB0mmmm].
4212

4213
4214
      // Clear a possible CDisp specified by EVEX.
      opCode &= ~X86Inst::kOpCode_CDSHL_Mask;
4215

4216
4217
4218
      // Check if VEX3 is required / forced:                [........|........|x.......|.xx..x..].
      if (x & 0x0008064U) {
        uint32_t xorMsk = x86VEXPrefix[x & 0xF] | (opCode << 24);
4219

4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
        // Clear 'FORCE-VEX3' bit and all high bits.
        x  = (x & (0x4 ^ 0xFFFF)) << 8;                  // [00000000|WvvvvLpp|RXB0m0mm|00000000].
                                                         //            ____    ___
        x ^= xorMsk;                                     // [_OPCODE_|WvvvvLpp|RXBmmmmm|VEX3_XOP].
        EMIT_32(x);
      }
      else {
        // 'mmmmm' must be '00001'.
        ASMJIT_ASSERT((x & 0x1F) == 0x01);

        x = ((x >> 8) ^ x) ^ 0xF9;
        EMIT_BYTE(kX86ByteVex2);
        EMIT_BYTE(x);
        EMIT_BYTE(opCode);
      }
4235
4236
4237
    }
  }

4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
  // MOD|SIB address.
  if (!commonData->hasFlag(X86Inst::kFlagVsib))
    goto EmitModSib;

  // MOD|VSIB address without INDEX is invalid.
  if (rmInfo & kX86MemInfo_Index)
    goto EmitModVSib;
  goto InvalidInstruction;

  // --------------------------------------------------------------------------
  // [Emit - Jmp/Jcc/Call]
  // --------------------------------------------------------------------------

  // TODO: Should be adjusted after the support for multiple sections feature is added.
EmitJmpCall:
  {
    // Emit REX prefix if asked for (64-bit only).
    uint32_t rex = x86ExtractREX(opCode, options);
    if (rex) {
      if (options & X86Inst::_kOptionInvalidRex)
        goto InvalidRexPrefix;
      EMIT_BYTE(rex | kX86ByteRex);
    }
4261

4262
4263
4264
    uint64_t ip = static_cast<uint64_t>((intptr_t)(cursor - _bufferData));
    uint32_t rel32 = 0;
    uint32_t opCode8 = commonData->getAltOpCode();
4265

4266
4267
    uint32_t inst8Size  = 1 + 1; //          OPCODE + REL8 .
    uint32_t inst32Size = 1 + 4; // [PREFIX] OPCODE + REL32.
4268

4269
4270
4271
4272
4273
    // Jcc instructions with 32-bit displacement use 0x0F prefix,
    // other instructions don't. No other prefixes are used by X86.
    ASMJIT_ASSERT((opCode8 & X86Inst::kOpCode_MM_Mask) == 0);
    ASMJIT_ASSERT((opCode  & X86Inst::kOpCode_MM_Mask) == 0 ||
                  (opCode  & X86Inst::kOpCode_MM_Mask) == X86Inst::kOpCode_MM_0F);
4274

4275
4276
4277
    // Only one of these should be used at the same time.
    inst32Size += static_cast<uint32_t>(opReg != 0);
    inst32Size += static_cast<uint32_t>((opCode & X86Inst::kOpCode_MM_Mask) == X86Inst::kOpCode_MM_0F);
4278

4279
4280
4281
4282
4283
    if (rmRel->isLabel()) {
      label = _code->getLabelEntry(rmRel->as<Label>());
      if (!label) goto InvalidLabel;

      if (label->isBound()) {
4284
        // Bound label.
4285
4286
        rel32 = static_cast<uint32_t>((static_cast<uint64_t>(label->getOffset()) - ip - inst32Size) & 0xFFFFFFFFU);
        goto EmitJmpCallRel;
4287
4288
4289
      }
      else {
        // Non-bound label.
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
        if (opCode8 && (!opCode || (options & X86Inst::kOptionShortForm))) {
          EMIT_BYTE(opCode8);
          relOffset = -1;
          relSize = 1;
          goto EmitRel;
        }
        else {
          // Refuse also 'short' prefix, if specified.
          if (ASMJIT_UNLIKELY(!opCode || (options & X86Inst::kOptionShortForm) != 0))
            goto InvalidDisplacement;
4300

4301
4302
4303
          // Emit [PREFIX] OPCODE [/X] <DISP32>.
          if (opCode & X86Inst::kOpCode_MM_Mask)
            EMIT_BYTE(0x0F);
4304

4305
4306
4307
          EMIT_BYTE(opCode);
          if (opReg)
            EMIT_BYTE(x86EncodeMod(3, opReg, 0));
4308

4309
4310
4311
4312
4313
4314
          relOffset = -4;
          relSize = 4;
          goto EmitRel;
        }
      }
    }
4315

4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
    if (rmRel->isImm()) {
      uint64_t baseAddress = getCodeInfo().getBaseAddress();
      uint64_t jumpAddress = rmRel->as<Imm>().getUInt64();

      // If the base-address is known calculate a relative displacement and
      // check if it fits in 32 bits (which is always true in 32-bit mode).
      // Emit relative displacement as it was a bound label if all checks ok.
      if (baseAddress != Globals::kNoBaseAddress) {
        uint64_t rel64 = jumpAddress - (ip + baseAddress) - inst32Size;
        if (getArchType() == ArchInfo::kTypeX86 || Utils::isInt32(static_cast<int64_t>(rel64))) {
          rel32 = static_cast<uint32_t>(rel64 & 0xFFFFFFFFU);
          goto EmitJmpCallRel;
        }
        else {
          // Relative displacement exceeds 32-bits - relocator can only
          // insert trampoline for jmp/call, but not for jcc/jecxz.
          if (ASMJIT_UNLIKELY(!x86IsJmpOrCall(instId)))
            goto InvalidDisplacement;
        }
      }
4336

4337
4338
      if (ASMJIT_UNLIKELY(_code->_relocations.willGrow(&_code->_baseHeap) != kErrorOk))
        goto NoHeapMemory;
4339

4340
4341
      err = _code->newRelocEntry(&re, RelocEntry::kTypeAbsToRel, 0);
      if (ASMJIT_UNLIKELY(err)) goto Failed;
4342

4343
4344
      re->_sourceSectionId = _section->getId();
      re->_data = static_cast<int64_t>(jumpAddress);
4345

4346
4347
4348
4349
4350
4351
4352
      if (ASMJIT_LIKELY(opCode)) {
        // 64-bit: Emit REX prefix so the instruction can be patched later.
        // REX prefix does nothing if not patched, but allows to patch the
        // instruction to use MOD/M and to point to a memory where the final
        // 64-bit address is stored.
        re->_size = 4;
        re->_sourceOffset = ip + inst32Size - 4;
4353

4354
4355
4356
4357
4358
        if (getArchType() != ArchInfo::kTypeX86 && x86IsJmpOrCall(instId)) {
          if (!rex) {
            re->_sourceOffset++;
            EMIT_BYTE(kX86ByteRex);
          }
4359

4360
4361
4362
          re->_type = RelocEntry::kTypeTrampoline;
          _code->_trampolinesSize += 8;
        }
4363

4364
4365
4366
        // Emit [PREFIX] OPCODE [/X] DISP32.
        if (opCode & X86Inst::kOpCode_MM_Mask)
          EMIT_BYTE(0x0F);
4367

4368
4369
4370
        EMIT_BYTE(opCode);
        if (opReg)
          EMIT_BYTE(x86EncodeMod(3, opReg, 0));
4371

4372
4373
4374
4375
4376
4377
4378
4379
4380
        EMIT_32(0);
      }
      else {
        re->_size = 1;
        re->_sourceOffset = ip + inst8Size - 1;

        // Emit OPCODE + DISP8.
        EMIT_BYTE(opCode8);
        EMIT_BYTE(0);
4381
      }
4382
      goto EmitDone;
4383
4384
    }

4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
    // Not Label|Imm -> Invalid.
    goto InvalidInstruction;

    // Emit jmp/call with relative displacement known at assembly-time. Decide
    // between 8-bit and 32-bit displacement encoding. Some instructions only
    // allow either 8-bit or 32-bit encoding, others allow both encodings.
EmitJmpCallRel:
    if (Utils::isInt8(static_cast<int32_t>(rel32 + inst32Size - inst8Size)) && opCode8 && !(options & X86Inst::kOptionLongForm)) {
      options |= X86Inst::kOptionShortForm;
      EMIT_BYTE(opCode8);
      EMIT_BYTE(rel32 + inst32Size - inst8Size);
      goto EmitDone;
    }
    else {
      if (ASMJIT_UNLIKELY(!opCode || (options & X86Inst::kOptionShortForm) != 0))
        goto InvalidDisplacement;

      options &= ~X86Inst::kOptionShortForm;
      if (opCode & X86Inst::kOpCode_MM_Mask)
        EMIT_BYTE(0x0F);
4405

4406
4407
4408
      EMIT_BYTE(opCode);
      if (opReg)
        EMIT_BYTE(x86EncodeMod(3, opReg, 0));
4409

4410
4411
4412
      EMIT_32(rel32);
      goto EmitDone;
    }
4413
4414
4415
  }

  // --------------------------------------------------------------------------
4416
  // [Emit - Relative]
4417
4418
  // --------------------------------------------------------------------------

4419
EmitRel:
4420
  {
4421
4422
    ASMJIT_ASSERT(!label->isBound());
    ASMJIT_ASSERT(relSize == 1 || relSize == 4);
4423
4424

    // Chain with label.
4425
4426
4427
4428
4429
4430
4431
4432
    size_t offset = (size_t)(cursor - _bufferData);
    LabelLink* link = _code->newLabelLink(label, _section->getId(), offset, relOffset);

    if (ASMJIT_UNLIKELY(!link))
      goto NoHeapMemory;

    if (re)
      link->relocId = re->getId();
4433
4434

    // Emit label size as dummy data.
4435
    if (relSize == 1)
4436
      EMIT_BYTE(0x01);
4437
4438
4439
    else // if (relSize == 4)
      EMIT_32(0x04040404);
  }
4440

4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
  if (imLen == 0)
    goto EmitDone;

  // --------------------------------------------------------------------------
  // [Emit - Immediate]
  // --------------------------------------------------------------------------

EmitImm:
  {
#if ASMJIT_ARCH_64BIT
    uint32_t i = imLen;
    uint64_t imm = static_cast<uint64_t>(imVal);
#else
    uint32_t i = imLen;
    uint32_t imm = static_cast<uint32_t>(imVal & 0xFFFFFFFFU);
#endif

    // Many instructions just use a single byte immediate, so make it fast.
    EMIT_BYTE(imm & 0xFFU); if (--i == 0) goto EmitDone;
    imm >>= 8;
    EMIT_BYTE(imm & 0xFFU); if (--i == 0) goto EmitDone;
    imm >>= 8;
    EMIT_BYTE(imm & 0xFFU); if (--i == 0) goto EmitDone;
    imm >>= 8;
    EMIT_BYTE(imm & 0xFFU); if (--i == 0) goto EmitDone;

    // Can be 1-4 or 8 bytes, this handles the remaining high DWORD of an 8-byte immediate.
    ASMJIT_ASSERT(i == 4);

#if ASMJIT_ARCH_64BIT
    imm >>= 8;
    EMIT_32(static_cast<uint32_t>(imm));
#else
    EMIT_32(static_cast<uint32_t>((static_cast<uint64_t>(imVal) >> 32) & 0xFFFFFFFFU));
#endif
4476
4477
4478
  }

  // --------------------------------------------------------------------------
4479
  // [Done]
4480
4481
  // --------------------------------------------------------------------------

4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
EmitDone:
#if !defined(ASMJIT_DISABLE_LOGGING)
  // Logging is a performance hit anyway, so make it the unlikely case.
  if (ASMJIT_UNLIKELY(options & CodeEmitter::kOptionLoggingEnabled))
    _emitLog(instId, options, o0, o1, o2, o3, relSize, imLen, cursor);
#endif // !ASMJIT_DISABLE_LOGGING

  resetOptions();
  resetExtraReg();
  resetInlineComment();

  _bufferPtr = cursor;
  return kErrorOk;

  // --------------------------------------------------------------------------
  // [Error Cases]
  // --------------------------------------------------------------------------

#define ERROR_HANDLER(ERROR)                \
ERROR:                                      \
  err = DebugUtils::errored(kError##ERROR); \
  goto Failed;

ERROR_HANDLER(NoHeapMemory)
ERROR_HANDLER(InvalidArgument)
ERROR_HANDLER(InvalidLabel)
ERROR_HANDLER(InvalidInstruction)
ERROR_HANDLER(InvalidLockPrefix)
ERROR_HANDLER(InvalidXAcquirePrefix)
ERROR_HANDLER(InvalidXReleasePrefix)
ERROR_HANDLER(InvalidRepPrefix)
ERROR_HANDLER(InvalidRexPrefix)
ERROR_HANDLER(InvalidBroadcast)
ERROR_HANDLER(InvalidEROrSAE)
ERROR_HANDLER(InvalidAddress)
ERROR_HANDLER(InvalidAddressIndex)
ERROR_HANDLER(InvalidAddress64Bit)
ERROR_HANDLER(InvalidDisplacement)
ERROR_HANDLER(InvalidSegment)
ERROR_HANDLER(InvalidImmediate)
ERROR_HANDLER(OperandSizeMismatch)
ERROR_HANDLER(AmbiguousOperandSize)
ERROR_HANDLER(NotConsecutiveRegs)

Failed:
  return _emitFailed(err, instId, options, o0, o1, o2, o3);
}

// ============================================================================
// [asmjit::X86Assembler - Align]
// ============================================================================

Error X86Assembler::align(uint32_t mode, uint32_t alignment) {
#if !defined(ASMJIT_DISABLE_LOGGING)
  if (_globalOptions & kOptionLoggingEnabled)
    _code->_logger->logf("%s.align %u\n", _code->_logger->getIndentation(), alignment);
#endif // !ASMJIT_DISABLE_LOGGING
4539

4540
4541
  if (mode >= kAlignCount)
    return setLastError(DebugUtils::errored(kErrorInvalidArgument));
4542

4543
4544
4545
4546
4547
  if (alignment <= 1)
    return kErrorOk;

  if (!Utils::isPowerOf2(alignment) || alignment > Globals::kMaxAlignment)
    return setLastError(DebugUtils::errored(kErrorInvalidArgument));
4548

4549
4550
4551
  uint32_t i = static_cast<uint32_t>(Utils::alignDiff<size_t>(getOffset(), alignment));
  if (i == 0)
    return kErrorOk;
4552

4553
4554
4555
  if (getRemainingSpace() < i) {
    Error err = _code->growBuffer(&_section->_buffer, i);
    if (ASMJIT_UNLIKELY(err)) return setLastError(err);
4556
4557
  }

4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
  uint8_t* cursor = _bufferPtr;
  uint8_t pattern = 0x00;

  switch (mode) {
    case kAlignCode: {
      if (_globalHints & kHintOptimizedAlign) {
        // Intel 64 and IA-32 Architectures Software Developer's Manual - Volume 2B (NOP).
        enum { kMaxNopSize = 9 };

        static const uint8_t nopData[kMaxNopSize][kMaxNopSize] = {
          { 0x90 },
          { 0x66, 0x90 },
          { 0x0F, 0x1F, 0x00 },
          { 0x0F, 0x1F, 0x40, 0x00 },
          { 0x0F, 0x1F, 0x44, 0x00, 0x00 },
          { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 },
          { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 },
          { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
          { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }
        };
4578

4579
4580
4581
        do {
          uint32_t n = std::min<uint32_t>(i, kMaxNopSize);
          const uint8_t* src = nopData[n - 1];
4582

4583
4584
4585
4586
4587
4588
          i -= n;
          do {
            EMIT_BYTE(*src++);
          } while (--n);
        } while (i);
      }
4589

4590
4591
4592
      pattern = 0x90;
      break;
    }
4593

4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
    case kAlignData:
      pattern = 0xCC;
      break;

    case kAlignZero:
      // Pattern already set to zero.
      break;
  }

  while (i) {
    EMIT_BYTE(pattern);
    i--;
  }

  _bufferPtr = cursor;
  return kErrorOk;
4610
4611
4612
4613
4614
}

} // asmjit namespace

// [Api-End]
4615
#include "../asmjit_apiend.h"
4616
4617

// [Guard]
4618
#endif // ASMJIT_BUILD_X86