gpu_info_oneapi.c 7.72 KB
Newer Older
Wang,Zhe's avatar
Wang,Zhe committed
1
2
3
4
5
6
#ifndef __APPLE__

#include "gpu_info_oneapi.h"

#include <string.h>

Daniel Hiltgen's avatar
Daniel Hiltgen committed
7
void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp) {
Wang,Zhe's avatar
Wang,Zhe committed
8
9
  ze_result_t ret;
  resp->err = NULL;
Daniel Hiltgen's avatar
Daniel Hiltgen committed
10
11
12
13
  resp->oh.devices = NULL;
  resp->oh.num_devices = NULL;
  resp->oh.drivers = NULL;
  resp->oh.num_drivers = 0;
Wang,Zhe's avatar
Wang,Zhe committed
14
15
  const int buflen = 256;
  char buf[buflen + 1];
Daniel Hiltgen's avatar
Daniel Hiltgen committed
16
  int i, d, count;
Daniel Hiltgen's avatar
Daniel Hiltgen committed
17
  struct lookup {
Wang,Zhe's avatar
Wang,Zhe committed
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
    char *s;
    void **p;
  } l[] = {
      {"zesInit", (void *)&resp->oh.zesInit},
      {"zesDriverGet", (void *)&resp->oh.zesDriverGet},
      {"zesDeviceGet", (void *)&resp->oh.zesDeviceGet},
      {"zesDeviceGetProperties", (void *)&resp->oh.zesDeviceGetProperties},
      {"zesDeviceEnumMemoryModules",
       (void *)&resp->oh.zesDeviceEnumMemoryModules},
      {"zesMemoryGetProperties", (void *)&resp->oh.zesMemoryGetProperties},
      {"zesMemoryGetState", (void *)&resp->oh.zesMemoryGetState},
      {NULL, NULL},
  };

  resp->oh.handle = LOAD_LIBRARY(oneapi_lib_path, RTLD_LAZY);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
33
  if (!resp->oh.handle) {
Wang,Zhe's avatar
Wang,Zhe committed
34
35
36
37
38
39
40
41
42
43
44
45
46
47
    char *msg = LOAD_ERR();
    snprintf(buf, buflen,
             "Unable to load %s library to query for Intel GPUs: %s\n",
             oneapi_lib_path, msg);
    free(msg);
    resp->err = strdup(buf);
    return;
  }

  // TODO once we've squashed the remaining corner cases remove this log
  LOG(resp->oh.verbose,
      "wiring Level-Zero management library functions in %s\n",
      oneapi_lib_path);

Daniel Hiltgen's avatar
Daniel Hiltgen committed
48
  for (i = 0; l[i].s != NULL; i++) {
Wang,Zhe's avatar
Wang,Zhe committed
49
50
51
52
    // TODO once we've squashed the remaining corner cases remove this log
    LOG(resp->oh.verbose, "dlsym: %s\n", l[i].s);

    *l[i].p = LOAD_SYMBOL(resp->oh.handle, l[i].s);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
53
    if (!l[i].p) {
Wang,Zhe's avatar
Wang,Zhe committed
54
55
56
57
58
59
60
61
62
63
64
65
      resp->oh.handle = NULL;
      char *msg = LOAD_ERR();
      LOG(resp->oh.verbose, "dlerr: %s\n", msg);
      UNLOAD_LIBRARY(resp->oh.handle);
      snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, msg);
      free(msg);
      resp->err = strdup(buf);
      return;
    }
  }

  ret = (*resp->oh.zesInit)(0);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
66
  if (ret != ZE_RESULT_SUCCESS) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
67
68
    LOG(resp->oh.verbose, "zesInit err: %x\n", ret);
    snprintf(buf, buflen, "oneapi vram init failure: %x", ret);
Wang,Zhe's avatar
Wang,Zhe committed
69
    resp->err = strdup(buf);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
70
71
    oneapi_release(resp->oh);
    return;
Wang,Zhe's avatar
Wang,Zhe committed
72
73
  }

Daniel Hiltgen's avatar
Daniel Hiltgen committed
74
75
  count = 0;
  ret = (*resp->oh.zesDriverGet)(&resp->oh.num_drivers, NULL);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
76
  if (ret != ZE_RESULT_SUCCESS) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
77
78
79
80
81
82
83
84
85
86
    LOG(resp->oh.verbose, "zesDriverGet err: %x\n", ret);
    snprintf(buf, buflen, "unable to get driver count: %x", ret);
    resp->err = strdup(buf);
    oneapi_release(resp->oh);
    return;
  }
  LOG(resp->oh.verbose, "oneapi driver count: %d\n", resp->oh.num_drivers);
  resp->oh.drivers = malloc(resp->oh.num_drivers * sizeof(zes_driver_handle_t));
  resp->oh.num_devices = malloc(resp->oh.num_drivers * sizeof(uint32_t));
  memset(&resp->oh.num_devices[0], 0, resp->oh.num_drivers * sizeof(uint32_t));
Daniel Hiltgen's avatar
Daniel Hiltgen committed
87
88
  resp->oh.devices =
      malloc(resp->oh.num_drivers * sizeof(zes_device_handle_t *));
Daniel Hiltgen's avatar
Daniel Hiltgen committed
89
  ret = (*resp->oh.zesDriverGet)(&resp->oh.num_drivers, &resp->oh.drivers[0]);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
90
  if (ret != ZE_RESULT_SUCCESS) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
91
92
93
94
95
96
97
98
    LOG(resp->oh.verbose, "zesDriverGet err: %x\n", ret);
    snprintf(buf, buflen, "unable to get driver count: %x", ret);
    resp->err = strdup(buf);
    oneapi_release(resp->oh);
    return;
  }

  for (d = 0; d < resp->oh.num_drivers; d++) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
99
100
101
    ret = (*resp->oh.zesDeviceGet)(resp->oh.drivers[d],
                                   &resp->oh.num_devices[d], NULL);
    if (ret != ZE_RESULT_SUCCESS) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
102
103
104
105
106
107
      LOG(resp->oh.verbose, "zesDeviceGet err: %x\n", ret);
      snprintf(buf, buflen, "unable to get device count: %x", ret);
      resp->err = strdup(buf);
      oneapi_release(resp->oh);
      return;
    }
Daniel Hiltgen's avatar
Daniel Hiltgen committed
108
109
110
111
112
    resp->oh.devices[d] =
        malloc(resp->oh.num_devices[d] * sizeof(zes_device_handle_t));
    ret = (*resp->oh.zesDeviceGet)(
        resp->oh.drivers[d], &resp->oh.num_devices[d], resp->oh.devices[d]);
    if (ret != ZE_RESULT_SUCCESS) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
113
114
115
116
117
118
119
120
      LOG(resp->oh.verbose, "zesDeviceGet err: %x\n", ret);
      snprintf(buf, buflen, "unable to get device count: %x", ret);
      resp->err = strdup(buf);
      oneapi_release(resp->oh);
      return;
    }
    count += resp->oh.num_devices[d];
  }
Wang,Zhe's avatar
Wang,Zhe committed
121
122
123
124

  return;
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
125
126
void oneapi_check_vram(oneapi_handle_t h, int driver, int device,
                       mem_info_t *resp) {
Wang,Zhe's avatar
Wang,Zhe committed
127
128
129
130
131
132
133
134
  ze_result_t ret;
  resp->err = NULL;
  uint64_t totalMem = 0;
  uint64_t usedMem = 0;
  const int buflen = 256;
  char buf[buflen + 1];
  int i, d, m;

Daniel Hiltgen's avatar
Daniel Hiltgen committed
135
  if (h.handle == NULL) {
Wang,Zhe's avatar
Wang,Zhe committed
136
137
138
    resp->err = strdup("Level-Zero handle not initialized");
    return;
  }
Daniel Hiltgen's avatar
Daniel Hiltgen committed
139

Daniel Hiltgen's avatar
Daniel Hiltgen committed
140
141
142
143
  if (driver > h.num_drivers || device > h.num_devices[driver]) {
    resp->err = strdup("driver of device index out of bounds");
    return;
  }
Wang,Zhe's avatar
Wang,Zhe committed
144

Daniel Hiltgen's avatar
Daniel Hiltgen committed
145
146
147
148
149
150
151
152
153
154
155
156
  resp->total = 0;
  resp->free = 0;

  zes_device_ext_properties_t ext_props;
  ext_props.stype = ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES;
  ext_props.pNext = NULL;

  zes_device_properties_t props;
  props.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES;
  props.pNext = &ext_props;

  ret = (*h.zesDeviceGetProperties)(h.devices[driver][device], &props);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
157
  if (ret != ZE_RESULT_SUCCESS) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
158
    snprintf(buf, buflen, "unable to get device properties: %d", ret);
Wang,Zhe's avatar
Wang,Zhe committed
159
160
161
162
    resp->err = strdup(buf);
    return;
  }

Daniel Hiltgen's avatar
Daniel Hiltgen committed
163
  snprintf(&resp->gpu_name[0], GPU_NAME_LEN, props.modelName);
Wang,Zhe's avatar
Wang,Zhe committed
164

Daniel Hiltgen's avatar
Daniel Hiltgen committed
165
166
167
168
  // TODO this needs to map to ONEAPI_DEVICE_SELECTOR syntax
  // (this is probably wrong...)
  // TODO - the driver isn't included - what if there are multiple drivers?
  snprintf(&resp->gpu_id[0], GPU_ID_LEN, "%d", device);
Wang,Zhe's avatar
Wang,Zhe committed
169

Daniel Hiltgen's avatar
Daniel Hiltgen committed
170
  if (h.verbose) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
    // When in verbose mode, report more information about
    // the card we discover.
    LOG(h.verbose, "[%d:%d] oneAPI device name: %s\n", driver, device,
        props.modelName);
    LOG(h.verbose, "[%d:%d] oneAPI brand: %s\n", driver, device,
        props.brandName);
    LOG(h.verbose, "[%d:%d] oneAPI vendor: %s\n", driver, device,
        props.vendorName);
    LOG(h.verbose, "[%d:%d] oneAPI S/N: %s\n", driver, device,
        props.serialNumber);
    LOG(h.verbose, "[%d:%d] oneAPI board number: %s\n", driver, device,
        props.boardNumber);
  }

  // TODO
  // Compute Capability equivalent in resp->major, resp->minor, resp->patch

  uint32_t memCount = 0;
Daniel Hiltgen's avatar
Daniel Hiltgen committed
189
190
191
192
193
  ret = (*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount,
                                        NULL);
  if (ret != ZE_RESULT_SUCCESS) {
    snprintf(buf, buflen, "unable to enumerate Level-Zero memory modules: %x",
             ret);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
194
195
196
197
198
199
200
201
202
    resp->err = strdup(buf);
    return;
  }

  LOG(h.verbose, "discovered %d Level-Zero memory modules\n", memCount);

  zes_mem_handle_t *mems = malloc(memCount * sizeof(zes_mem_handle_t));
  (*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount, mems);

Daniel Hiltgen's avatar
Daniel Hiltgen committed
203
  for (m = 0; m < memCount; m++) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
204
205
206
207
    zes_mem_state_t state;
    state.stype = ZES_STRUCTURE_TYPE_MEM_STATE;
    state.pNext = NULL;
    ret = (*h.zesMemoryGetState)(mems[m], &state);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
208
    if (ret != ZE_RESULT_SUCCESS) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
209
      snprintf(buf, buflen, "unable to get memory state: %x", ret);
Wang,Zhe's avatar
Wang,Zhe committed
210
      resp->err = strdup(buf);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
211
      free(mems);
Wang,Zhe's avatar
Wang,Zhe committed
212
213
214
      return;
    }

Daniel Hiltgen's avatar
Daniel Hiltgen committed
215
216
217
    resp->total += state.size;
    resp->free += state.free;
  }
Wang,Zhe's avatar
Wang,Zhe committed
218

Daniel Hiltgen's avatar
Daniel Hiltgen committed
219
220
  free(mems);
}
Wang,Zhe's avatar
Wang,Zhe committed
221

Daniel Hiltgen's avatar
Daniel Hiltgen committed
222
void oneapi_release(oneapi_handle_t h) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
223
224
  int d;
  LOG(h.verbose, "releasing oneapi library\n");
Daniel Hiltgen's avatar
Daniel Hiltgen committed
225
226
  for (d = 0; d < h.num_drivers; d++) {
    if (h.devices != NULL && h.devices[d] != NULL) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
227
      free(h.devices[d]);
Wang,Zhe's avatar
Wang,Zhe committed
228
229
    }
  }
Daniel Hiltgen's avatar
Daniel Hiltgen committed
230
  if (h.devices != NULL) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
231
232
233
    free(h.devices);
    h.devices = NULL;
  }
Daniel Hiltgen's avatar
Daniel Hiltgen committed
234
  if (h.num_devices != NULL) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
235
236
237
    free(h.num_devices);
    h.num_devices = NULL;
  }
Daniel Hiltgen's avatar
Daniel Hiltgen committed
238
  if (h.drivers != NULL) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
239
240
241
242
243
244
245
    free(h.drivers);
    h.drivers = NULL;
  }
  h.num_drivers = 0;
  UNLOAD_LIBRARY(h.handle);
  h.handle = NULL;
}
Wang,Zhe's avatar
Wang,Zhe committed
246

Daniel Hiltgen's avatar
Daniel Hiltgen committed
247
248
int oneapi_get_device_count(oneapi_handle_t h, int driver) {
  if (h.handle == NULL || h.num_devices == NULL) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
249
250
    return 0;
  }
Daniel Hiltgen's avatar
Daniel Hiltgen committed
251
  if (driver > h.num_drivers) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
252
253
254
    return 0;
  }
  return (int)h.num_devices[driver];
Wang,Zhe's avatar
Wang,Zhe committed
255
256
257
}

#endif // __APPLE__