#pragma once

#include <vector>
#include <queue>
#include <unordered_map>
#include <cstring> // 为了使用strlen

#include "base.h"
#include "graph_utils.h"

namespace sccl {
namespace hardware {
namespace topology {
namespace graph {

// 设置Path路径直接link的 bandwidth 和 speed

/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class PathFinder {
public:
    // 构造函数
    PathFinder(const BootstrapComm_t* bootstrap_comm, std::vector<char>& node_info_vec, size_t node_info_total_bytes);

    // 计算拓扑图中GPU节点之间的点对点映射，结果保存在graph中
    scclResult_t computeTopoGpuP2pMap(scclTopoGraph_t* topo_graph);

    // 打印函数
    void printGpuPaths();

private:
    // 使用广度优先搜索（BFS）查找从起始GPU节点到其他GPU节点的最短路径
    void bfsFindGpuPaths(uint64_t start_node_id);

    // 根据node.id查找节点的函数
    const scclTopoNode_t* findNodeById(uint64_t id) const;

    // 根据path中node确定link的类型
    scclResult_t determineLinkType(const std::vector<uint64_t>& path, LinkType_t* link_type);

private:
    ByteSpanArray<scclTopoNode_t> node_container_; // 使用NodeContainer来存储nodes数据

    std::unordered_map<uint64_t, std::vector<uint64_t>> graph_node_neighbors_;   // 使用无序映射存储图的节点和它们的邻居
    std::unordered_map<uint64_t, std::vector<std::vector<uint64_t>>> gpu_paths_; // 使用无序映射存储从每个GPU节点到其他GPU节点的所有路径
    // 存储node.id到nodes_span索引的映射
    std::unordered_map<uint64_t, size_t> id_to_index_;

    int rank        = -1; // 当前节点的全局排名
    int nRanks      = 0;  // 总的节点数量
    int localRank   = -1; // 当前节点在本地计算节点中的排名
    int nLocalRanks = 0;  // 本地计算节点中的节点总数
    int interRank   = -1; // 整个节点在全部节点中的位置
    int nInterRanks = 0;  // 全局拥有节点的个数
};

// 根据 node_id 获取 gpu_rank
int getGpuRankFromNodeId(uint64_t node_id, int nLocalRanks);

} // namespace graph
} // namespace topology
} // namespace hardware
} // namespace sccl