#ifndef SCCL_TOPO_H_
#define SCCL_TOPO_H_

#include <string.h>
#include "base.h"
#include "archinfo.h"
#include "xml.h"
#include "net.h"

namespace sccl {
namespace hardware {
namespace topology {
namespace topo {

#define SCCL_TOPO_NODE_TYPES 6
static constexpr int SCCL_TOPO_MAX_NODES = 256;
#define SCCL_TOPO_MAX_LINKS 32
#define SCCL_TOPO_MAX_HOPS (SCCL_TOPO_MAX_NODES * SCCL_TOPO_NODE_TYPES)

// 定义硬件拓扑类型枚举
typedef enum topoNodeType {
    GPU = 0, // 图形处理单元
    PCI = 1, // 外围组件互连
    NVS = 2, // 非易失性存储器
    CPU = 3, // 中央处理器，实际上是NUMA域
    NIC = 4, // 网络接口控制器
    NET = 5  // 网络
} topoNodeType_t;
extern const char* topoNodeTypeStr[];

// 定义链接类型和路径类型的枚举，以确保它们尽可能匹配
typedef enum topoLinkType {
    LINK_LOC = 0, // 本地链接
    LINK_NVL = 1, // NVLink链接
    // 路径类型PATH_NVB占位，不定义
    LINK_PCI = 3, // PCI链接
    // 路径类型PATH_PXB占位，不定义
    // 路径类型PATH_PXN占位，不定义
    // 路径类型PATH_PHB占位，不定义
    LINK_SYS = 7, // 系统链接
    LINK_NET = 8  // 网络链接
} topoLinkType_t;
extern const char* topoLinkTypeStr[];

// 定义 topoPathType_t 枚举类型，用于表示不同的路径类型。
enum topoPathType {
    PATH_LOC = 0, // 本地路径
    PATH_NVL = 1, // 通过 NVLink 连接
    PATH_NVB = 2, // 通过中间 GPU 使用 NVLink 连接
    PATH_PIX = 3, // 通过最多一个 PCIe 桥连接
    PATH_PXB = 4, // 通过多个 PCIe 桥连接（不经过 PCIe 主桥）
    PATH_PXN = 5, // GPU 和 NIC 之间通过中间 GPU 连接
    PATH_PHB = 6, // 通过 PCIe 以及 PCIe 主桥连接
    PATH_SYS = 7, // 通过 PCIe 以及 NUMA 节点之间的 SMP 互连连接
    PATH_NET = 8, // 通过网络连接
    PATH_DIS = 9  // 断开连接
};

////////////////////////////////////////////////////////////////////////////////////////////////

struct scclTopoNode;

struct scclTopoLink {
    int type;
    float bw;
    struct scclTopoNode* remNode;
};

struct scclTopoLinkList {
    int type;
    float bw;
    int count;
    struct scclTopoLink* list[SCCL_TOPO_MAX_HOPS];
};

struct scclTopoNode {
    int type;   // 节点类型
    int64_t id; // 节点ID
    // 类型特定数据
    union {
        struct {
            int dev;              // NVML设备编号
            int rank;             // 排名
            int cudaCompCap;      // CUDA计算能力
            int gdrSupport;       // GDR支持
            const char* gcn;      // GCN架构名称
            hipDeviceArch_t arch; // HIP设备架构
        } gpu;                    // GPU节点
        struct {
            uint64_t asic;   // ASIC标识
            int port;        // 端口编号
            float bw;        // 带宽
            float latency;   // 延迟
            int gdrSupport;  // GDR支持
            int collSupport; // 集合操作支持
            int maxChannels; // 最大通道数
            int64_t busId;   // 总线ID
        } net;               // 网络节点
        struct {
            int arch;           // 架构
            int vendor;         // 供应商
            int model;          // 模型
            cpu_set_t affinity; // CPU亲和性
        } cpu;                  // CPU节点
        struct {
            uint64_t device; // PCI设备
        } pci;               // PCI节点
    };
    int nlinks;                                     // 链接数量
    struct scclTopoLink links[SCCL_TOPO_MAX_LINKS]; // 链接列表
    // 预计算路径到GPU和NIC
    struct scclTopoLinkList* paths[SCCL_TOPO_NODE_TYPES];
    // 搜索期间使用
    uint64_t used;
};

struct scclTopoNodeSet {
    int count;                                      // 节点数量
    struct scclTopoNode nodes[SCCL_TOPO_MAX_NODES]; // 节点数组，最大数量由SCCL_TOPO_MAX_NODES定义
};

struct scclTopoSystem {
    struct scclTopoNodeSet nodes[SCCL_TOPO_NODE_TYPES]; // 节点集，用于存储不同类型的节点
    float maxBw;                                        // 系统最大带宽
    float baseBw;                                       // 基础带宽
    float totalBw;                                      // 系统总带宽
    int type;                                           // 系统类型
    int nRanks;                                         // 系统中的秩数
    int netGdrLevel;                                    // 网络GDR级别
    int tuning;                                         // 调优参数

    int pivotA2ANumBiRings; // Pivot A2A模式下的双向环路数量
    bool pivotA2AEnabled;   // 是否启用Pivot A2A通信模式
    bool treeDefined;       // 是否定义了树结构
    bool ll128Enabled;      // 是否启用了LL128模式
    bool mscclEnabled;      // 是否启用了MSCCL模式
};

#define LOC_BW 5000.0
#define SM60_NVLINK_BW 18.0
#define SM70_NVLINK_BW 20.0
#define SM80_NVLINK_BW 20.0
#define SM90_NVLINK_BW 20.0
#define SM86_NVLINK_BW 12.0
#define PCI_BW 12.0 // PCI Gen3 x16
#define QPI_BW 6.0
#define SKL_QPI_BW 10.0
#define ZPI_BW 6.0
#define YONGFENG_ZPI_BW 9.0
#define P9_BW 32.0
#define ARM_BW 6.0
#define NET_BW 12.0 // 100Gbit
#define VEGA_XGMI_WIDTH 24.0
#define MI200_XGMI_WIDTH 36.0
#define GFX94X_XGMI_WIDTH 48.0

// 英特尔CPU将GPU的P2P流量转换为64字节的PCI TLP，因此GPU之间的流量消耗更多的PCI带宽。
#define INTEL_P2P_OVERHEAD(bw) (bw * 6 / 5)

enum topoCpuArch {
    SCCL_TOPO_CPU_ARCH_X86   = 1,
    SCCL_TOPO_CPU_ARCH_POWER = 2,
    SCCL_TOPO_CPU_ARCH_ARM   = 3
};

enum topoCpuVendor {
    SCCL_TOPO_CPU_VENDOR_INTEL   = 1,
    SCCL_TOPO_CPU_VENDOR_AMD     = 2,
    SCCL_TOPO_CPU_VENDOR_ZHAOXIN = 3
};

enum topoCpuType {
    SCCL_TOPO_CPU_TYPE_BDW      = 1,
    SCCL_TOPO_CPU_TYPE_SKL      = 2,
    SCCL_TOPO_CPU_TYPE_ZEN      = 3,
    SCCL_TOPO_CPU_TYPE_ROME     = 4,
    SCCL_TOPO_CPU_TYPE_YONGFENG = 5
};

enum topoCpuPattern {
    SCCL_TOPO_PATTERN_BALANCED_TREE = 1,
    SCCL_TOPO_PATTERN_SPLIT_TREE    = 2,
    SCCL_TOPO_PATTERN_TREE          = 3,
    SCCL_TOPO_PATTERN_RING          = 4,
    SCCL_TOPO_PATTERN_NVLS          = 5
};

#define SCCL_TOPO_MAX_NODES 256

extern const char* topoPathTypeStr[];

#define SCCL_TOPO_CPU_INTEL_BDW 1
#define SCCL_TOPO_CPU_INTEL_SKL 2

enum topoSysType {
    SCCL_TOPO_UNDEF       = -1,
    SCCL_TOPO_CR8G        = 1,
    SCCL_TOPO_4P2H_ROME   = 2,
    SCCL_TOPO_GDR_ALL     = 4,
    SCCL_TOPO_16P1H       = 8,
    SCCL_TOPO_FORCE_INTRA = 16,
    SCCL_TOPO_XGMI_ALL    = 32
};

// struct scclTopoComm {
//     int type;
//     int id;

//     int rank;
//     int nRanks;
//     int node;
//     int nNodes;
//     int localRank;
//     int localRanks;
//     bool dmaBufSupport;

//     struct scclPeerInfo* peerInfo;
//     sccl::hardware::net::scclNet_t* scclNet;
// };

////////////////////////////////////////////////////////////////////////////////////////////////
// 检查是否存在Hsw驱动程序
bool isHswDriverExist();
// 获取InfiniBand (IB) 设备的数量
int getIBNum();

// 获取拓扑节点
scclResult_t scclTopoGetNode(struct scclTopoSystem* system, struct scclTopoNode** node, int type, uint64_t id);
// 创建拓扑节点
scclResult_t scclTopoCreateNode(struct scclTopoSystem* system, struct scclTopoNode** node, int type, uint64_t id);
// 移除拓扑节点
scclResult_t scclTopoRemoveNode(struct scclTopoSystem* system, int type, int id);
// 连接两个拓扑节点
scclResult_t scclTopoConnectNodes(struct scclTopoNode* node, struct scclTopoNode* remNode, int type, float bw);
// 从XML获取系统拓扑
scclResult_t scclTopoGetSystemFromXml(struct scclXml* xml, struct scclTopoSystem** topoSystem);
// 打印系统路径
scclResult_t scclTopoPrint(struct scclTopoSystem* system);
// 获取计算能力
scclResult_t scclTopoGetCompCap(struct scclTopoSystem* system, int* ccMin, int* ccMax);
// 将ID转换为索引
scclResult_t scclTopoIdToIndex(struct scclTopoSystem* system, int type, int64_t id, int* index);
// 将Rank转换为索引
scclResult_t scclTopoRankToIndex(struct scclTopoSystem* system, int rank, int* index);
// 将设备ID转换为Rank
scclResult_t scclTopoDevToRank(struct scclTopoSystem* system, int dev, int* rank);
// 获取XGMI速度
float scclTopoXGMISpeed(const char* gcn);
// 获取本地网络信息
scclResult_t scclTopoGetLocalNet(struct scclTopoSystem* system, int rank, int channelId, int* id);
// 获取本地GPU信息
scclResult_t scclTopoGetLocalGpu(struct scclTopoSystem* system, int net, int* gpuIndex);
// 获取CPU类型信息
scclResult_t scclTopoCpuType(struct scclTopoSystem* system, int* arch, int* vendor, int* model);
// 查找CPU亲和性
scclResult_t scclTopoGetCpuAffinity(struct scclTopoSystem* system, int rank, cpu_set_t* affinity);
// 获取GPU数量
scclResult_t scclTopoGetGpuCount(struct scclTopoSystem* system, int* count);
// 获取网络接口数量
scclResult_t scclTopoGetNetCount(struct scclTopoSystem* system, int* count);
// 获取NVS（非易失性存储器）数量
scclResult_t scclTopoGetNvsCount(struct scclTopoSystem* system, int* count);
// 获取本地排名
scclResult_t scclTopoGetLocalRank(struct scclTopoSystem* system, int rank, int* localRank);

// // 获取系统拓扑结构
// scclResult_t scclTopoGetSystem(struct scclTopoComm* comm, struct scclTopoSystem** system);
scclResult_t scclTopoGetSystem(struct scclTopoSystem** system);

} // namespace topo
} // namespace topology
} // namespace hardware
} // namespace sccl

#endif