x86cpuinfo.cpp 10.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
// [AsmJit]
// Complete x86/x64 JIT and Remote Assembler for C++.
//
// [License]
// Zlib - See LICENSE.md file in the package.

// [Export]
#define ASMJIT_EXPORTS

// [Guard]
#include "../build.h"
#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64)

// [Dependencies - AsmJit]
#include "../base/intutil.h"
#include "../x86/x86cpuinfo.h"

// 2009-02-05: Thanks to Mike Tajmajer for VC7.1 compiler support. It shouldn't
// affect x64 compilation, because x64 compiler starts with VS2005 (VC8.0).
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
#include <intrin.h>
#endif // _MSC_VER >= 1400

// [Api-Begin]
#include "../apibegin.h"

namespace asmjit {

// ============================================================================
// [asmjit::X86CpuVendor]
// ============================================================================

struct X86CpuVendor {
  uint32_t id;
  char text[12];
};

static const X86CpuVendor x86CpuVendorList[] = {
  { kCpuVendorIntel    , { 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' } },
  { kCpuVendorAmd      , { 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' } },
  { kCpuVendorVia      , { 'V', 'I', 'A',  0 , 'V', 'I', 'A',  0 , 'V', 'I', 'A',  0  } },
  { kCpuVendorVia      , { 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' } }
};

static ASMJIT_INLINE bool x86CpuVendorEq(const X86CpuVendor& info, const char* vendorString) {
  const uint32_t* a = reinterpret_cast<const uint32_t*>(info.text);
  const uint32_t* b = reinterpret_cast<const uint32_t*>(vendorString);

  return (a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]);
}

static ASMJIT_INLINE void x86SimplifyBrandString(char* s) {
  // Always clear the current character in the buffer. It ensures that there
  // is no garbage after the string NULL terminator.
  char* d = s;

  char prev = 0;
  char curr = s[0];
  s[0] = '\0';

  for (;;) {
    if (curr == 0)
      break;

    if (curr == ' ') {
      if (prev == '@' || s[1] == ' ' || s[1] == '@')
        goto _Skip;
    }

    d[0] = curr;
    d++;
    prev = curr;

_Skip:
    curr = *++s;
    s[0] = '\0';
  }

  d[0] = '\0';
}

// ============================================================================
// [asmjit::X86CpuUtil]
// ============================================================================

// This is messy, I know. Cpuid is implemented as intrinsic in VS2005, but
// we should support other compilers as well. Main problem is that MS compilers
// in 64-bit mode not allows to use inline assembler, so we need intrinsic and
// we need also asm version.

// callCpuId() and detectCpuInfo() for x86 and x64 platforms begins here.
#if defined(ASMJIT_HOST_X86) || defined(ASMJIT_HOST_X64)
void X86CpuUtil::callCpuId(uint32_t inEax, uint32_t inEcx, X86CpuId* outResult) {

#if defined(_MSC_VER)
// 2009-02-05: Thanks to Mike Tajmajer for supporting VC7.1 compiler.
// ASMJIT_HOST_X64 is here only for readibility, only VS2005 can compile 64-bit code.
# if _MSC_VER >= 1400 || defined(ASMJIT_HOST_X64)
  // Done by intrinsics.
  __cpuidex(reinterpret_cast<int*>(outResult->i), inEax, inEcx);
# else // _MSC_VER < 1400
  uint32_t cpuid_eax = inEax;
  uint32_t cpuid_ecx = inCax;
  uint32_t* cpuid_out = outResult->i;

  __asm {
    mov     eax, cpuid_eax
    mov     ecx, cpuid_ecx
    mov     edi, cpuid_out
    cpuid
    mov     dword ptr[edi +  0], eax
    mov     dword ptr[edi +  4], ebx
    mov     dword ptr[edi +  8], ecx
    mov     dword ptr[edi + 12], edx
  }
# endif // _MSC_VER < 1400

#elif defined(__GNUC__)
// Note, patched to preserve ebx/rbx register which is used by GCC.
# if defined(ASMJIT_HOST_X86)
#  define __myCpuId(inEax, inEcx, outEax, outEbx, outEcx, outEdx) \
  asm ("mov %%ebx, %%edi\n"  \
       "cpuid\n"             \
       "xchg %%edi, %%ebx\n" \
       : "=a" (outEax), "=D" (outEbx), "=c" (outEcx), "=d" (outEdx) : "a" (inEax), "c" (inEcx))
# else
#  define __myCpuId(inEax, inEcx, outEax, outEbx, outEcx, outEdx) \
  asm ("mov %%rbx, %%rdi\n"  \
       "cpuid\n"             \
       "xchg %%rdi, %%rbx\n" \
       : "=a" (outEax), "=D" (outEbx), "=c" (outEcx), "=d" (outEdx) : "a" (inEax), "c" (inEcx))
# endif
  __myCpuId(inEax, inEcx, outResult->eax, outResult->ebx, outResult->ecx, outResult->edx);
#endif // COMPILER
}

void X86CpuUtil::detect(X86CpuInfo* cpuInfo) {
  X86CpuId regs;

  uint32_t i;
  uint32_t maxId;

  // Clear everything except the '_size' member.
  ::memset(reinterpret_cast<uint8_t*>(cpuInfo) + sizeof(uint32_t),
    0, sizeof(CpuInfo) - sizeof(uint32_t));

  // Fill safe defaults.
  cpuInfo->_hwThreadsCount = CpuInfo::detectHwThreadsCount();

  // --------------------------------------------------------------------------
  // [CPUID EAX=0x00000000]
  // --------------------------------------------------------------------------

  // Get vendor string/id.
  callCpuId(0, 0, &regs);

  maxId = regs.eax;

  ::memcpy(cpuInfo->_vendorString, &regs.ebx, 4);
  ::memcpy(cpuInfo->_vendorString + 4, &regs.edx, 4);
  ::memcpy(cpuInfo->_vendorString + 8, &regs.ecx, 4);

  for (i = 0; i < ASMJIT_ARRAY_SIZE(x86CpuVendorList); i++) {
    if (x86CpuVendorEq(x86CpuVendorList[i], cpuInfo->_vendorString)) {
      cpuInfo->_vendorId = x86CpuVendorList[i].id;
      break;
    }
  }

  // --------------------------------------------------------------------------
  // [CPUID EAX=0x00000001]
  // --------------------------------------------------------------------------

  // Get feature flags in ecx/edx and family/model in eax.
  callCpuId(1, 0, &regs);

  // Fill family and model fields.
  cpuInfo->_family   = (regs.eax >> 8) & 0x0F;
  cpuInfo->_model    = (regs.eax >> 4) & 0x0F;
  cpuInfo->_stepping = (regs.eax     ) & 0x0F;

  // Use extended family and model fields.
  if (cpuInfo->_family == 0x0F) {
    cpuInfo->_family += ((regs.eax >> 20) & 0xFF);
    cpuInfo->_model  += ((regs.eax >> 16) & 0x0F) << 4;
  }

  cpuInfo->_processorType        = ((regs.eax >> 12) & 0x03);
  cpuInfo->_brandIndex           = ((regs.ebx      ) & 0xFF);
  cpuInfo->_flushCacheLineSize   = ((regs.ebx >>  8) & 0xFF) * 8;
  cpuInfo->_maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF);

  if (regs.ecx & 0x00000001U) cpuInfo->addFeature(kX86CpuFeatureSse3);
  if (regs.ecx & 0x00000002U) cpuInfo->addFeature(kX86CpuFeaturePclmulqdq);
  if (regs.ecx & 0x00000008U) cpuInfo->addFeature(kX86CpuFeatureMonitorMWait);
  if (regs.ecx & 0x00000200U) cpuInfo->addFeature(kX86CpuFeatureSsse3);
  if (regs.ecx & 0x00002000U) cpuInfo->addFeature(kX86CpuFeatureCmpXchg16B);
  if (regs.ecx & 0x00080000U) cpuInfo->addFeature(kX86CpuFeatureSse41);
  if (regs.ecx & 0x00100000U) cpuInfo->addFeature(kX86CpuFeatureSse42);
  if (regs.ecx & 0x00400000U) cpuInfo->addFeature(kX86CpuFeatureMovbe);
  if (regs.ecx & 0x00800000U) cpuInfo->addFeature(kX86CpuFeaturePopcnt);
  if (regs.ecx & 0x02000000U) cpuInfo->addFeature(kX86CpuFeatureAesni);
  if (regs.ecx & 0x40000000U) cpuInfo->addFeature(kX86CpuFeatureRdrand);

  if (regs.edx & 0x00000010U) cpuInfo->addFeature(kX86CpuFeatureRdtsc);
  if (regs.edx & 0x00000100U) cpuInfo->addFeature(kX86CpuFeatureCmpXchg8B);
  if (regs.edx & 0x00008000U) cpuInfo->addFeature(kX86CpuFeatureCmov);
  if (regs.edx & 0x00800000U) cpuInfo->addFeature(kX86CpuFeatureMmx);
  if (regs.edx & 0x01000000U) cpuInfo->addFeature(kX86CpuFeatureFxsr);
  if (regs.edx & 0x02000000U) cpuInfo->addFeature(kX86CpuFeatureSse).addFeature(kX86CpuFeatureMmxExt);
  if (regs.edx & 0x04000000U) cpuInfo->addFeature(kX86CpuFeatureSse).addFeature(kX86CpuFeatureSse2);
  if (regs.edx & 0x10000000U) cpuInfo->addFeature(kX86CpuFeatureMultithreading);

  if (cpuInfo->_vendorId == kCpuVendorAmd && (regs.edx & 0x10000000U)) {
    // AMD sets Multithreading to ON if it has more cores.
    if (cpuInfo->_hwThreadsCount == 1)
      cpuInfo->_hwThreadsCount = 2;
  }

  // Detect AVX.
  if (regs.ecx & 0x10000000U) {
    cpuInfo->addFeature(kX86CpuFeatureAvx);

    if (regs.ecx & 0x00000800U) cpuInfo->addFeature(kX86CpuFeatureXop);
    if (regs.ecx & 0x00004000U) cpuInfo->addFeature(kX86CpuFeatureFma3);
    if (regs.ecx & 0x00010000U) cpuInfo->addFeature(kX86CpuFeatureFma4);
    if (regs.ecx & 0x20000000U) cpuInfo->addFeature(kX86CpuFeatureF16C);
  }

  // Detect new features if the processor supports CPUID-07.
  if (maxId >= 7) {
    callCpuId(7, 0, &regs);

    if (regs.ebx & 0x00000001) cpuInfo->addFeature(kX86CpuFeatureFsGsBase);
    if (regs.ebx & 0x00000008) cpuInfo->addFeature(kX86CpuFeatureBmi);
    if (regs.ebx & 0x00000010) cpuInfo->addFeature(kX86CpuFeatureHle);
    if (regs.ebx & 0x00000100) cpuInfo->addFeature(kX86CpuFeatureBmi2);
    if (regs.ebx & 0x00000200) cpuInfo->addFeature(kX86CpuFeatureRepMovsbStosbExt);
    if (regs.ebx & 0x00000800) cpuInfo->addFeature(kX86CpuFeatureRtm);

    // AVX2 depends on AVX.
    if (cpuInfo->hasFeature(kX86CpuFeatureAvx)) {
      if (regs.ebx & 0x00000020) cpuInfo->addFeature(kX86CpuFeatureAvx2);
    }
  }

  // --------------------------------------------------------------------------
  // [CPUID EAX=0x80000000]
  // --------------------------------------------------------------------------

  // Calling cpuid with 0x80000000 as the in argument gets the number of valid
  // extended IDs.
  callCpuId(0x80000000, 0, &regs);

  uint32_t maxExtId = IntUtil::iMin<uint32_t>(regs.eax, 0x80000004);
  uint32_t* brand = reinterpret_cast<uint32_t*>(cpuInfo->_brandString);

  for (i = 0x80000001; i <= maxExtId; i++) {
    callCpuId(i, 0, &regs);

    switch (i) {
      case 0x80000001:
        if (regs.ecx & 0x00000001U) cpuInfo->addFeature(kX86CpuFeatureLahfSahf);
        if (regs.ecx & 0x00000020U) cpuInfo->addFeature(kX86CpuFeatureLzcnt);
        if (regs.ecx & 0x00000040U) cpuInfo->addFeature(kX86CpuFeatureSse4A);
        if (regs.ecx & 0x00000080U) cpuInfo->addFeature(kX86CpuFeatureMsse);
        if (regs.ecx & 0x00000100U) cpuInfo->addFeature(kX86CpuFeaturePrefetch);

        if (regs.edx & 0x00100000U) cpuInfo->addFeature(kX86CpuFeatureExecuteDisableBit);
        if (regs.edx & 0x00200000U) cpuInfo->addFeature(kX86CpuFeatureFfxsr);
        if (regs.edx & 0x00400000U) cpuInfo->addFeature(kX86CpuFeatureMmxExt);
        if (regs.edx & 0x08000000U) cpuInfo->addFeature(kX86CpuFeatureRdtscp);
        if (regs.edx & 0x40000000U) cpuInfo->addFeature(kX86CpuFeature3dNowExt).addFeature(kX86CpuFeatureMmxExt);
        if (regs.edx & 0x80000000U) cpuInfo->addFeature(kX86CpuFeature3dNow);
        break;

      case 0x80000002:
      case 0x80000003:
      case 0x80000004:
        *brand++ = regs.eax;
        *brand++ = regs.ebx;
        *brand++ = regs.ecx;
        *brand++ = regs.edx;
        break;

      default:
        // Additional features can be detected in the future.
        break;
    }
  }

  // Simplify the brand string (remove unnecessary spaces to make printing nicer).
  x86SimplifyBrandString(cpuInfo->_brandString);
}
#endif

} // asmjit namespace

// [Api-End]
#include "../apiend.h"

// [Guard]
#endif // ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64