/*******************************************************************************
 * Copyright 2016-2023 by SW Group, Chengdu Haiguang IC Design Co., Ltd.
 * All right reserved. See COPYRIGHT for detailed Information.
 *
 * @file        hymgr.h
 * @brief       Header file for hymgr.
 *
 * @author      WangYan<wangwy@hygon.cn>
 * @date        2023/07/28
 * @history     1.
 ******************************************************************************/

#ifndef __INC_HYMGR_H_
#define __INC_HYMGR_H_

#include <stdint.h>

#ifdef CMDMAGIC
#undef CMDMAGIC
#endif
#define CMDMAGIC (('h' << 24) | ('y' << 16) | ('m' << 8) | ('c'))

#ifdef MIG_CU_MASK_FIELDS
#undef MIG_CU_MASK_FIELDS
#endif
#define MIG_CU_MASK_FIELDS 2 // 64bits per fields

#ifdef MKFD_MAX_NUM_SE
#undef MKFD_MAX_NUM_SE
#endif
#define MKFD_MAX_NUM_SE 8

typedef enum {
    FROM_TYPE_COMPUTE           = 0, // compute process
    FROM_TYPE_CLI               = 1, // command line interface porcess
    FROM_TYPE_SERVER            = 2, // manager process
    FROM_TYPE_SMI               = 3, // smi process
} FromType;

typedef enum {
    CTYPE_ERROR_COMMAND         = 0, // command failed or server error
    CTYPE_INVALID_COMMAND       = 1, // command invalid from client
    CTYPE_GET_TOPO_INFO,             // get topology info
    CTYPE_GET_GDDR_INFO,             // get gddr info
    CTYPE_SET_ECC_MODE,              // set KM/KME ecc mode
    CTYPE_GET_ECC_MODE,              // get KM/KME ecc mode
    CTYPE_SET_GDDR_TRAINING,         // set KM/KME enable/disable gddr training
    CTYPE_GET_GDDR_TRAINING,         // get KM/KME gddr training enable/disable
    CTYPE_LOAD_DRIVER,               // manual load driver
    CTYPE_UNLOAD_DRIVER,             // manual unload driver
    CTYPE_SET_AUTO_LOAD_DRIVER,      // set auto load driver
    CTYPE_GET_AUTO_LOAD_DRIVER,      // get auto load driver
    CTYPE_SET_DRIVER_PARAMS,         // set driver parameters when load driver
    CTYPE_GET_DRIVER_PARAMS,         // get driver parameters
    CTYPE_GET_EXCEPTION_INFO,        // get software/hardware exception info
    CTYPE_CREATE_VGPU,               // create vgpu for mig/virtual
    CTYPE_DESTROY_VGPU,              // destroy vgpu for mig/virtual
    CTYPE_QUERY_VGPU,                // query vgpu for mig/virtual
    CTYPE_SET_ECC_STATUS,            // set KM/KME enable/disable ecc mode
    CTYPE_GET_ECC_STATUS,            // get KM/KME ecc mode enable/disable
    CTYPE_CHECK_VERSION,             // check client commit id
} CommandType;

typedef enum {
    GDDR_TRAINING_NOT_RUN       = 0,
    GDDR_TRAINING_RUNNING       = 1,
    GDDR_TRAINING_DONE          = 2,
    GDDR_TRAINING_FAILED        = 3,
} GDDRStatus;

typedef enum {
    SRV_STATUS_SUCCESS              = 0,
    SRV_STATUS_TOPO_NOT_INIT,            // topology is not initialized
    SRV_STATUS_LOAD_CFG_ERROR,
    SRV_STATUS_SAVE_CFG_ERROR,
    SRV_STATUS_LOAD_DEV_ERROR,           // load device info failed
    SRV_STATUS_LOAD_DRIVER_ERROR,
    SRV_STATUS_UNLOAD_DRIVER_ERROR,
    SRV_STATUS_MALLOC_ERROR,             // malloc fialed
    SRV_STATUS_NO_DEVICE_FOUND,          // there is no device found
    SRV_STATUS_DEVICE_USED,              // device is used when set ecc
    SRV_STATUS_DEVICE_INCOMPATIBLE,      // devices incompatible
    SRV_STATUS_ECC_INCOMPATIBLE,         // ecc mode incompatible
    SRV_STATUS_ECC_MODE_INVALID,         // ecc mode is invalid
    SRV_STATUS_ECC_NOT_SUPPORT,          // the device is not support ecc
    SRV_STATUS_ECC_GET_ERROR,
    SRV_STATUS_ECC_SET_ERROR,
    SRV_STATUS_CREATE_VGPU_ERROR,
    SRV_STATUS_DESTROY_VGPU_ERROR,
    SRV_STATUS_QUERY_VGPU_ERROR,
    SRV_STATUS_UNSUPPORT_OPERATION,
    SRV_STATUS_VERSION_MISMATCH,
} SrvStatus;

typedef struct {
    int         header;                 // magic, 'h','y','m','c'
    int         from_pid;               // command from pid
    short       from_type;              // 0:compute, 1:cli, 2:server, 3:smi
    short       cmd_type;               // command type
    int         data_size;              // command data size
    char        cmd_data[0];            // command data
} Command;

struct vgpu_create_args {
    uint32_t    gpu_id;                          // to MKFD
    uint32_t    vgpu_id;                         // to MKFD
    uint64_t    group_id;                        // to MKFD
    uint64_t    indentifier;                     // to MKFD maybe 0
    uint64_t    vram_limit;                      // checkout with MKFD
    uint32_t    num_cu_mask;                     // must be a multiple of 32
    uint32_t    cu_mask[MIG_CU_MASK_FIELDS * 2]; // checkout with MKFD
};

struct vgpu_destroy_args {
    uint32_t    gpu_id;                          // to MKFD
    uint32_t    vgpu_id;                         // to MKFD
    uint32_t    num_cu_mask;                     // must be a multiple of 32
    uint32_t    cu_mask[MIG_CU_MASK_FIELDS * 2]; // checkout with MKFD
};

struct vgpu_query_args {
    uint32_t    gpu_id;                          // to MKFD
    uint32_t    vgpu_id;                         // to MKFD
    uint64_t    vram_limit;                      // from MKFD
    uint64_t    vram_usage;                      // from MKFD
    uint64_t    group_id;                        // from MKFD
    uint64_t    indentifier;                     // from MKFD
    uint32_t    num_cu_mask;                     // to MKFD
    uint32_t    cu_mask[MIG_CU_MASK_FIELDS * 2]; // from MKFD
    uint32_t    se_mask[MKFD_MAX_NUM_SE];        // from MKFD
};

static const char* GetHymgrErrorString(SrvStatus type) __attribute__((unused));
static const char* GetHymgrErrorString(SrvStatus type) {
    switch (type) {
    case SRV_STATUS_SUCCESS:            return "Success";
    case SRV_STATUS_TOPO_NOT_INIT:      return "Topology is not initialized";
    case SRV_STATUS_LOAD_CFG_ERROR:     return "Load config failed";
    case SRV_STATUS_SAVE_CFG_ERROR:     return "Save config failed";
    case SRV_STATUS_LOAD_DEV_ERROR:     return "Load devices failed";
    case SRV_STATUS_LOAD_DRIVER_ERROR:  return "Load driver failed";
    case SRV_STATUS_UNLOAD_DRIVER_ERROR:return "Unload driver failed";
    case SRV_STATUS_MALLOC_ERROR:       return "Malloc failed";
    case SRV_STATUS_NO_DEVICE_FOUND:    return "No devices is found";
    case SRV_STATUS_DEVICE_USED:        return "Devices is used";
    case SRV_STATUS_DEVICE_INCOMPATIBLE:return "Devices incompatible";
    case SRV_STATUS_ECC_INCOMPATIBLE:   return "Ecc mode incompatible";
    case SRV_STATUS_ECC_MODE_INVALID:   return "Ecc mode is invalid";
    case SRV_STATUS_ECC_NOT_SUPPORT:    return "Device is not support ecc set";
    case SRV_STATUS_ECC_GET_ERROR:      return "Get ecc failed";
    case SRV_STATUS_ECC_SET_ERROR:      return "Set ecc failed";
    case SRV_STATUS_CREATE_VGPU_ERROR:  return "Create vgpu failed";
    case SRV_STATUS_DESTROY_VGPU_ERROR: return "Destroy vgpu failed";
    case SRV_STATUS_QUERY_VGPU_ERROR:   return "Query vgpu failed";
    case SRV_STATUS_UNSUPPORT_OPERATION:return "Unsupported operation";
    case SRV_STATUS_VERSION_MISMATCH:   return "Version mismatch";
    default:                            break;
    }
    return "A generic error has occurred";
}

#endif // __INC_HYMGR_H_

