gpu_info_oneapi.c 7.66 KB
Newer Older
Wang,Zhe's avatar
Wang,Zhe committed
1
2
3
4
5
6
7
8
9
10
#ifndef __APPLE__

#include "gpu_info_oneapi.h"

#include <string.h>

void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
{
  ze_result_t ret;
  resp->err = NULL;
Daniel Hiltgen's avatar
Daniel Hiltgen committed
11
12
13
14
  resp->oh.devices = NULL;
  resp->oh.num_devices = NULL;
  resp->oh.drivers = NULL;
  resp->oh.num_drivers = 0;
Wang,Zhe's avatar
Wang,Zhe committed
15
16
  const int buflen = 256;
  char buf[buflen + 1];
Daniel Hiltgen's avatar
Daniel Hiltgen committed
17
  int i, d, count;
Wang,Zhe's avatar
Wang,Zhe committed
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
  struct lookup
  {
    char *s;
    void **p;
  } l[] = {
      {"zesInit", (void *)&resp->oh.zesInit},
      {"zesDriverGet", (void *)&resp->oh.zesDriverGet},
      {"zesDeviceGet", (void *)&resp->oh.zesDeviceGet},
      {"zesDeviceGetProperties", (void *)&resp->oh.zesDeviceGetProperties},
      {"zesDeviceEnumMemoryModules",
       (void *)&resp->oh.zesDeviceEnumMemoryModules},
      {"zesMemoryGetProperties", (void *)&resp->oh.zesMemoryGetProperties},
      {"zesMemoryGetState", (void *)&resp->oh.zesMemoryGetState},
      {NULL, NULL},
  };

  resp->oh.handle = LOAD_LIBRARY(oneapi_lib_path, RTLD_LAZY);
  if (!resp->oh.handle)
  {
    char *msg = LOAD_ERR();
    snprintf(buf, buflen,
             "Unable to load %s library to query for Intel GPUs: %s\n",
             oneapi_lib_path, msg);
    free(msg);
    resp->err = strdup(buf);
    return;
  }

  // TODO once we've squashed the remaining corner cases remove this log
  LOG(resp->oh.verbose,
      "wiring Level-Zero management library functions in %s\n",
      oneapi_lib_path);

  for (i = 0; l[i].s != NULL; i++)
  {
    // TODO once we've squashed the remaining corner cases remove this log
    LOG(resp->oh.verbose, "dlsym: %s\n", l[i].s);

    *l[i].p = LOAD_SYMBOL(resp->oh.handle, l[i].s);
    if (!l[i].p)
    {
      resp->oh.handle = NULL;
      char *msg = LOAD_ERR();
      LOG(resp->oh.verbose, "dlerr: %s\n", msg);
      UNLOAD_LIBRARY(resp->oh.handle);
      snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, msg);
      free(msg);
      resp->err = strdup(buf);
      return;
    }
  }

  ret = (*resp->oh.zesInit)(0);
  if (ret != ZE_RESULT_SUCCESS)
  {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
73
74
    LOG(resp->oh.verbose, "zesInit err: %x\n", ret);
    snprintf(buf, buflen, "oneapi vram init failure: %x", ret);
Wang,Zhe's avatar
Wang,Zhe committed
75
    resp->err = strdup(buf);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
76
77
    oneapi_release(resp->oh);
    return;
Wang,Zhe's avatar
Wang,Zhe committed
78
79
  }

Daniel Hiltgen's avatar
Daniel Hiltgen committed
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
  count = 0;
  ret = (*resp->oh.zesDriverGet)(&resp->oh.num_drivers, NULL);
  if (ret != ZE_RESULT_SUCCESS)
  {
    LOG(resp->oh.verbose, "zesDriverGet err: %x\n", ret);
    snprintf(buf, buflen, "unable to get driver count: %x", ret);
    resp->err = strdup(buf);
    oneapi_release(resp->oh);
    return;
  }
  LOG(resp->oh.verbose, "oneapi driver count: %d\n", resp->oh.num_drivers);
  resp->oh.drivers = malloc(resp->oh.num_drivers * sizeof(zes_driver_handle_t));
  resp->oh.num_devices = malloc(resp->oh.num_drivers * sizeof(uint32_t));
  memset(&resp->oh.num_devices[0], 0, resp->oh.num_drivers * sizeof(uint32_t));
  resp->oh.devices = malloc(resp->oh.num_drivers * sizeof(zes_device_handle_t*));
  ret = (*resp->oh.zesDriverGet)(&resp->oh.num_drivers, &resp->oh.drivers[0]);
  if (ret != ZE_RESULT_SUCCESS)
  {
    LOG(resp->oh.verbose, "zesDriverGet err: %x\n", ret);
    snprintf(buf, buflen, "unable to get driver count: %x", ret);
    resp->err = strdup(buf);
    oneapi_release(resp->oh);
    return;
  }

  for (d = 0; d < resp->oh.num_drivers; d++) {
    ret = (*resp->oh.zesDeviceGet)(resp->oh.drivers[d], &resp->oh.num_devices[d], NULL);
    if (ret != ZE_RESULT_SUCCESS)
    {
      LOG(resp->oh.verbose, "zesDeviceGet err: %x\n", ret);
      snprintf(buf, buflen, "unable to get device count: %x", ret);
      resp->err = strdup(buf);
      oneapi_release(resp->oh);
      return;
    }
    resp->oh.devices[d] = malloc(resp->oh.num_devices[d] * sizeof(zes_device_handle_t));
    ret = (*resp->oh.zesDeviceGet)(resp->oh.drivers[d], &resp->oh.num_devices[d], resp->oh.devices[d]);
    if (ret != ZE_RESULT_SUCCESS)
    {
      LOG(resp->oh.verbose, "zesDeviceGet err: %x\n", ret);
      snprintf(buf, buflen, "unable to get device count: %x", ret);
      resp->err = strdup(buf);
      oneapi_release(resp->oh);
      return;
    }
    count += resp->oh.num_devices[d];
  }
Wang,Zhe's avatar
Wang,Zhe committed
127
128
129
130

  return;
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
131
void oneapi_check_vram(oneapi_handle_t h, int driver, int device, mem_info_t *resp)
Wang,Zhe's avatar
Wang,Zhe committed
132
133
134
135
136
137
138
139
140
141
142
143
144
145
{
  ze_result_t ret;
  resp->err = NULL;
  uint64_t totalMem = 0;
  uint64_t usedMem = 0;
  const int buflen = 256;
  char buf[buflen + 1];
  int i, d, m;

  if (h.handle == NULL)
  {
    resp->err = strdup("Level-Zero handle not initialized");
    return;
  }
Daniel Hiltgen's avatar
Daniel Hiltgen committed
146
147
148
149
150
  
  if (driver > h.num_drivers || device > h.num_devices[driver]) {
    resp->err = strdup("driver of device index out of bounds");
    return;
  }
Wang,Zhe's avatar
Wang,Zhe committed
151

Daniel Hiltgen's avatar
Daniel Hiltgen committed
152
153
154
155
156
157
158
159
160
161
162
163
  resp->total = 0;
  resp->free = 0;

  zes_device_ext_properties_t ext_props;
  ext_props.stype = ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES;
  ext_props.pNext = NULL;

  zes_device_properties_t props;
  props.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES;
  props.pNext = &ext_props;

  ret = (*h.zesDeviceGetProperties)(h.devices[driver][device], &props);
Wang,Zhe's avatar
Wang,Zhe committed
164
165
  if (ret != ZE_RESULT_SUCCESS)
  {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
166
    snprintf(buf, buflen, "unable to get device properties: %d", ret);
Wang,Zhe's avatar
Wang,Zhe committed
167
168
169
170
    resp->err = strdup(buf);
    return;
  }

Daniel Hiltgen's avatar
Daniel Hiltgen committed
171
  snprintf(&resp->gpu_name[0], GPU_NAME_LEN, props.modelName);
Wang,Zhe's avatar
Wang,Zhe committed
172

Daniel Hiltgen's avatar
Daniel Hiltgen committed
173
174
175
176
  // TODO this needs to map to ONEAPI_DEVICE_SELECTOR syntax
  // (this is probably wrong...)
  // TODO - the driver isn't included - what if there are multiple drivers?
  snprintf(&resp->gpu_id[0], GPU_ID_LEN, "%d", device);
Wang,Zhe's avatar
Wang,Zhe committed
177

Daniel Hiltgen's avatar
Daniel Hiltgen committed
178
  if (h.verbose)
Wang,Zhe's avatar
Wang,Zhe committed
179
  {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
    // When in verbose mode, report more information about
    // the card we discover.
    LOG(h.verbose, "[%d:%d] oneAPI device name: %s\n", driver, device,
        props.modelName);
    LOG(h.verbose, "[%d:%d] oneAPI brand: %s\n", driver, device,
        props.brandName);
    LOG(h.verbose, "[%d:%d] oneAPI vendor: %s\n", driver, device,
        props.vendorName);
    LOG(h.verbose, "[%d:%d] oneAPI S/N: %s\n", driver, device,
        props.serialNumber);
    LOG(h.verbose, "[%d:%d] oneAPI board number: %s\n", driver, device,
        props.boardNumber);
  }

  // TODO
  // Compute Capability equivalent in resp->major, resp->minor, resp->patch

  uint32_t memCount = 0;
  ret = (*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount, NULL);
  if (ret != ZE_RESULT_SUCCESS)
  {
    snprintf(buf, buflen,
              "unable to enumerate Level-Zero memory modules: %x", ret);
    resp->err = strdup(buf);
    return;
  }

  LOG(h.verbose, "discovered %d Level-Zero memory modules\n", memCount);

  zes_mem_handle_t *mems = malloc(memCount * sizeof(zes_mem_handle_t));
  (*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount, mems);

  for (m = 0; m < memCount; m++)
  {
    zes_mem_state_t state;
    state.stype = ZES_STRUCTURE_TYPE_MEM_STATE;
    state.pNext = NULL;
    ret = (*h.zesMemoryGetState)(mems[m], &state);
Wang,Zhe's avatar
Wang,Zhe committed
218
219
    if (ret != ZE_RESULT_SUCCESS)
    {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
220
      snprintf(buf, buflen, "unable to get memory state: %x", ret);
Wang,Zhe's avatar
Wang,Zhe committed
221
      resp->err = strdup(buf);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
222
      free(mems);
Wang,Zhe's avatar
Wang,Zhe committed
223
224
225
      return;
    }

Daniel Hiltgen's avatar
Daniel Hiltgen committed
226
227
228
    resp->total += state.size;
    resp->free += state.free;
  }
Wang,Zhe's avatar
Wang,Zhe committed
229

Daniel Hiltgen's avatar
Daniel Hiltgen committed
230
231
  free(mems);
}
Wang,Zhe's avatar
Wang,Zhe committed
232

Daniel Hiltgen's avatar
Daniel Hiltgen committed
233
234
235
236
237
238
239
void oneapi_release(oneapi_handle_t h)
{
  int d;
  LOG(h.verbose, "releasing oneapi library\n");
  for (d = 0; d < h.num_drivers; d++)
  {
    if (h.devices != NULL && h.devices[d] != NULL)
Wang,Zhe's avatar
Wang,Zhe committed
240
    {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
241
      free(h.devices[d]);
Wang,Zhe's avatar
Wang,Zhe committed
242
243
    }
  }
Daniel Hiltgen's avatar
Daniel Hiltgen committed
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
  if (h.devices != NULL)
  {
    free(h.devices);
    h.devices = NULL;
  }
  if (h.num_devices != NULL)
  {
    free(h.num_devices);
    h.num_devices = NULL;
  }
  if (h.drivers != NULL)
  {
    free(h.drivers);
    h.drivers = NULL;
  }
  h.num_drivers = 0;
  UNLOAD_LIBRARY(h.handle);
  h.handle = NULL;
}
Wang,Zhe's avatar
Wang,Zhe committed
263

Daniel Hiltgen's avatar
Daniel Hiltgen committed
264
265
266
267
268
269
270
271
272
273
274
int oneapi_get_device_count(oneapi_handle_t h, int driver) 
{
  if (h.handle == NULL || h.num_devices == NULL) 
  {
    return 0;
  }
  if (driver > h.num_drivers)
  {
    return 0;
  }
  return (int)h.num_devices[driver];
Wang,Zhe's avatar
Wang,Zhe committed
275
276
277
}

#endif // __APPLE__