mkfd_ioctl.h 8.89 KB
Newer Older
liming6's avatar
liming6 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
/*
 *Copyright 2016-2025 by HCSW Group.
 * All right reserved. See COPYRIGHT for detailed Information.
 */

#ifndef INCLUDE_ROCM_SMI_MKFD_IOCTL_H_
#define INCLUDE_ROCM_SMI_MKFD_IOCTL_H_

#include <linux/types.h>
#include <linux/ioctl.h>

/*
* Note: There are two duplicate definitions for kfd_process_run_mode
* and kfd_ioctl_set_process_mode_args, one in kfd_ioctl.h and 
* the other in mkfd_ioctl.h. This is because UMD needs to call the 
* interface from both kfd and mkfd paths, and the UMD management process
* and business process each contain their own header files, without 
* a common header file; Therefore, drivers must be careful when changing
* this definition, and changes on one side must be synchronized with
* changes on the other side to avoid inconsistent definitions on both sides
*/
enum mkfd_process_run_mode {
    KFD_PROCESS_NONE_MODE,
    KFD_PROCESS_GROUP_MODE,
    KFD_PROCESS_VGPU_MODE
};

/**
* @mode: Process running mode, process group mode or virtual gpu mode
* @indentifier: Only one process group can be uniquely identified 
*           for process group mode; using vgpu id (0 ~ 3) mask for 
*           virtual gpu mode.
* @vram_limit:  VRAM usage limit
*/
struct mkfd_ioctl_set_process_mode_args {
    enum mkfd_process_run_mode mode;
    __u64 indentifier;
    __u32 gpu_id; /*vgpu mode used*/
};

struct mkfd_ioctl_create_group_args {
    __u64 indentifier;
    __u64 vram_limit;
    __u64 group_id;
};

struct mkfd_ioctl_destroy_group_args {
    __u64 indentifier;
};

struct mkfd_ioctl_query_group_info_args {
    __u64 indentifier;
    __u64 vram_limit;
    __u64 vram_usage;
    __u32 gpu_id;
    __s32 result;
};

struct mkfd_ioctl_query_group_info_by_id_args {
    __u64 group_id;
    __u64 indentifier;
    __u64 vram_limit;
    __u64 vram_usage;
    __u32 gpu_id;
    __s32 result;
};

struct mkfd_ioctl_update_group_args {
    __u64 indentifier;
    __u32 gpu_id;
    __u64 vram_limit;
};

/**
* @gpu_id: create vgpu belong to the gpu_id
* @vgpu_id: vgpu_id 0-3
* @vram_limit: VRAM usage limit
*/

struct mkfd_ioctl_create_vgpu_args {
    __u32 gpu_id; /*to MKFD*/
    __u32 vgpu_id;  /*to MKFD*/
    __u64 group_id; /*to MKFD*/
    __u64 indentifier; /*to MKFD maybe 0*/
    __u64 vram_limit; /*to MKFD*/
    __u32 num_cu_mask; /*must be a multiple of 32*/
    __u64 cu_mask_ptr;
};

/**
* @gpu_id: Destroy vgpu belong to the gpu_id.
*/

struct mkfd_ioctl_destroy_vgpu_args {
    __u32 gpu_id; /*to MKFD*/
    __u32 vgpu_id;  /*to MKFD*/
};

enum mkfd_update_vgpu_type {
    MKFD_UPDATE_VGPU_VRAM_LIMIT, /*only update vram limit*/
    MKFD_UPDATE_VGPU_CU_MASK, /*only update cu mask*/
};

struct mkfd_ioctl_update_vgpu_args {
    __u32 type;
    __u32 gpu_id; /*to MKFD*/
    __u32 vgpu_id;  /*to MKFD*/
    __u64 vram_limit;
    __u32 num_cu_mask; /*must be a multiple of 32*/
    __u64 cu_mask_ptr; /*to MKFD*/
};

#define MKFD_MAX_NUM_SE 8
struct mkfd_ioctl_query_vgpu_info_args {
    __u32 gpu_id; /*to MKFD*/
    __u32 vgpu_id;  /*to MKFD*/
    __u64 vram_limit;
    __u64 vram_usage;
    __u64 group_id;
    __u64 indentifier;
    __u32 num_cu_mask; /*to mkfd*/
    __u64 cu_mask_ptr; /*from mkfd*/
    __u32 se_mask[MKFD_MAX_NUM_SE];
 };

struct mkfd_ioctl_query_vgpu_se_info_args {
    __u32 gpu_id;
    __u32 vgpu_id;
    __u32 num_cu_mask;
    __u64 cu_mask_ptr;
    __u32 se_mask[MKFD_MAX_NUM_SE];
};

struct mkfd_ioctl_get_cu_status_args {
    __u32 gpu_id;
	__u32 pad;
	__u64 buffer;
	__u64 buffer_size;
};

 struct mkfd_ioctl_get_wave_info_args {
	__u32  gpu_id; // to mkfd
	__u32  wave_count; //from kfd
	__u64  buffer; 
	__u64  buffer_size;
	__u32  gpr;   //need dump sgpr & vgpr
	__u32  pad;
};

typedef struct {
    __u32 mode;
    __u32 status;
    __u32 trapsts;
    __u32 hw_id;
    __u32 gpr_alloc;
    __u32 lds_alloc;
    __u32 ib_sts;
    __u32 pc_lo;
    __u32 pc_hi;
    __u32 inst_dw0;
    __u32 inst_dw1;
    __u32 ib_dbg0;
    __u32 ib_dbg1;
    __u32 m0;
    __u32 exec_lo;
    __u32 exec_hi;
    __u32 vgprs[64 * 256];
    __u32 sgprs[1024];
} mkfd_wave_info_t;

struct mkfd_ioctl_get_cu_busy_args {
    __u32 gpu_id;
    __u32 se_mask[MKFD_MAX_NUM_SE];
};

struct mkfd_ioctl_get_available_mem_size_args {
    __u64 available;
    __u32 gpu_id;
    __u32 pad;
};

struct mkfd_ioctl_register_hyuds_space_args {
	__u32 reserve; /* to KFD */	
};

struct mkfd_ioctl_create_hyuds_conn_args {
	int fd; /* from KFD */
};

struct mkfd_ioctl_atc_vmid_pasid_mapping_info_args {
	__u32 gpu_id; /* to KFD */
	__u8 vmid;   /*to KFD*/
	__u8 pad;
	__u16 pasid;   /*from KFD*/
};

struct mkfd_ioctl_regs_op_args {
	__u32 gpu_id; /* to KFD */
	bool read;
	bool pm_pg_lock;
	bool use_bank;
	bool use_ring;
	__u32 se_bank;
	__u32 sh_bank;
	__u32 instance_bank;
	__u32 me;
	__u32 pipe;
	__u32 queue;
	__u32 vmid;
	__u32 reg;
	__u32 value;
};

int mkfd_create_hyuds_conn(void);
int mkfd_ioctl_set_process_run_mode(struct file *filep, void *data);

#define HYUDS_SOCKET "hcu_uds_abstract_socket"

#define MKFD_IOCTL_BASE 'M'
#define MKFD_IO(nr)             _IO(MKFD_IOCTL_BASE, nr)
#define MKFD_IOR(nr, type)      _IOR(MKFD_IOCTL_BASE, nr, type)
#define MKFD_IOW(nr, type)      _IOW(MKFD_IOCTL_BASE, nr, type)
#define MKFD_IOWR(nr, type)     _IOWR(MKFD_IOCTL_BASE, nr, type)

#define MKFD_IOC_SET_PROCESS_RUN_MODE	\
            MKFD_IOWR(0x1, struct mkfd_ioctl_set_process_mode_args)

#define MKFD_IOC_CREATE_VGPU \
            MKFD_IOWR(0x2, struct mkfd_ioctl_create_vgpu_args)

#define MKFD_IOC_DESTROY_VGPU \
            MKFD_IOWR(0x3, struct mkfd_ioctl_destroy_vgpu_args)

#define MKFD_IOC_UPDATE_VGPU \
            MKFD_IOWR(0x4, struct mkfd_ioctl_update_vgpu_args)

#define MKFD_IOC_CREATE_GROUP \
            MKFD_IOWR(0x5, struct mkfd_ioctl_create_group_args)

#define MKFD_IOC_DESTROY_GROUP \
            MKFD_IOWR(0x6, struct mkfd_ioctl_destroy_group_args)

#define MKFD_IOC_QUERY_GROUP_INFO \
            MKFD_IOWR(0x7, struct mkfd_ioctl_query_group_info_args)

#define MKFD_IOC_QUERY_VGPU_INFO \
            MKFD_IOWR(0x8, struct mkfd_ioctl_query_vgpu_info_args) 

#define MKFD_IOC_QUERY_GROUP_INFO_BY_ID \
            MKFD_IOWR(0x9, struct mkfd_ioctl_query_group_info_by_id_args)

#define MKFD_IOC_UPDATE_GROUP \
            MKFD_IOWR(0x10, struct mkfd_ioctl_update_group_args)

#define MKFD_IOC_GET_WAGE_INFO \
            MKFD_IOWR(0x11, struct mkfd_ioctl_get_wave_info_args)

#define MKFD_IOC_GET_CU_BUSY_INFO \
            MKFD_IOWR(0x12, struct mkfd_ioctl_get_cu_busy_args)

#define MKFD_IOC_GET_AVAILABLE_MEM_SIZE \
            MKFD_IOWR(0x13, struct mkfd_ioctl_get_available_mem_size_args)

#define MKFD_IOC_REGISTER_HYUDS_SPACE \
            MKFD_IOWR(0x14, struct mkfd_ioctl_register_hyuds_space_args)

#define MKFD_IOC_CREATE_HYUDS_CONN			\
            MKFD_IOWR(0x15, struct mkfd_ioctl_create_hyuds_conn_args)
        
#define MKFD_IOC_GET_VMID_PASID_MAPPING			\
            MKFD_IOWR(0x16, struct mkfd_ioctl_atc_vmid_pasid_mapping_info_args)

#define MKFD_IOC_SECURITY_ATTESTATION   \
            MKFD_IOWR(0x17, struct mkfd_ioctl_security_attestation_args)

#define MKFD_IOC_REGS_OP   \
            MKFD_IOWR(0x18, struct mkfd_ioctl_regs_op_args)

#define MKFD_IOC_QUERY_VGPU_SE_INFO \
            MKFD_IOWR(0x19, struct mkfd_ioctl_query_vgpu_se_info_args)

#define MKFD_IOC_GET_CU_STATUS \
            MKFD_IOWR(0x1a, struct mkfd_ioctl_get_cu_status_args)

struct mkfd_ioctl_security_attestation_args {
	__u32 gpu_id;
	// Message version number(default 1)
	__u32 version;
	// Request structure address
	__u64 request_data;
	// Request structure size
	__u64 request_size;
	// Response structure address
	__u64 response_data;
	// Response structure size
	__u64 response_size;
	// Firmware error address.
	__u64 fw_err;
};

#define MKFD_COMMAND_START      0x01
#define MKFD_COMMAND_END        0x22

#define SOCKET_PATH		"/var/run/hycu_sk"
#define RESUT_FAIL   1
#define RESULT_OK    0

enum hycu_event {
	HYCU_EVENT_PCIE_LINK_DOWN = 0,
	HYCU_EVENT_RESET_BEGIN,
	HYCU_EVENT_RESET_FINISH,
	HYCU_EVENT_RASINFO,
	HYCU_EVENT_UNMAP_TIMEOUT,
	HYCU_EVENT_PROCESS_QUEUE_HANG,
	HYCU_EVENT_PROCESS_QUEUE_RECOVER,
	HYCU_EVENT_OVER_TEMPERATURE,
	HYCU_EVENT_BACO_RESET_PREP,
	HYCU_EVENT_BACO_RESET_POST,
	HYCU_EVENT_BACO_RESET_CODE,
};

enum hycu_module {
	HYCU_RAS_MODULE__UMC = 0,
	HYCU_RAS_MODULE__SDMA,
	HYCU_RAS_MODULE__GFX,
	HYCU_RAS_MODULE__MMHUB,
	HYCU_RAS_MODULE__ATHUB,
	HYCU_RAS_MODULE__PCIE_BIF,
	HYCU_RAS_MODULE__HDP,
	HYCU_RAS_MODULE__XGMI_WAFL,
	HYCU_RAS_MODULE__DF,
	HYCU_RAS_MODULE__SMN,
	HYCU_RAS_MODULE__SEM,
	HYCU_RAS_MODULE__MP0,
	HYCU_RAS_MODULE__MP1,
	HYCU_RAS_MODULE__FUSE,
	HYCU_RAS_MCA_MODULE__MP0,
	HYCU_RAS_MCA_MODULE__MP1,
	HYCU_RAS_MCA_MODULE__MPIO,
	HYCU_RAS_MCA_MODULE__IOHC,
	HYCU_RAS_MODULE_ALL,
};

struct ras_info {
	enum hycu_module module_id; // SDMA/NBIO/GC/ALL
	/* query ras counter */
	unsigned long ce_count;
	unsigned long ue_count;
};

struct proc_qinfo {
	pid_t pid;
	unsigned int queue_id;
};

struct hycu_message {
	uint32_t gpu_id;
	enum hycu_event event_id;
	union {
		struct ras_info ras;
		struct proc_qinfo pqinfo;
		int result;
	} data;
};

#endif // INCLUDE_ROCM_SMI_MKFD_IOCTL_H_