完成各个rank中的节点全部信息的allgather

379c4128 · lishen · ecf8df33 · 379c4128 · 379c4128 · 379c4128
Commit 379c4128 authored Jul 20, 2025 by lishen
9 changed files
--- a/src/hardware/net/net_socket/socket.h
+++ b/src/hardware/net/net_socket/socket.h
@@ -230,6 +230,7 @@ public:
 /**
 * @brief 管理接受连接的套接字的类，继承自scclSocketManager。
 *
+ * 服务器端接受客户端的连接请求，创建一个新的套接字用于通信
 * 该类在构造时初始化套接字，并接受来自监听套接字的连接。
 */
 class scclSocketAcceptManager : public scclSocketManager {

--- a/src/hardware/net/net_utils.cpp
+++ b/src/hardware/net/net_utils.cpp
@@ -98,13 +98,13 @@ bool matchIfList(const char* string, int port, struct netIf* ifList, int listSiz
 }

 scclResult_t printNetProps(const scclNetProperties_t* props, int rank, int localRank) {
-    printf("rank=%d, localRank=%d, device name=%s, pciPath=%s, guid=%lu, ptrSupport=%d, speed=%d, port=%d, latency=%f, maxComms=%d, maxRecvs=%d\n",
+    printf("rank=%d, localRank=%d, device name=%s, pciPath=%s, guid=%lu, ptrSupport=%u, speed=%d, port=%d, latency=%f, maxComms=%d, maxRecvs=%d\n",
           rank,
           localRank,
           props->name,
           props->pciPath,
           props->guid,
-           props->ptrSupport,
+           static_cast<unsigned int>(props->ptrSupport),
           props->speed,
           props->port,
           props->latency,

--- a/src/hardware/net/net_utils.h
+++ b/src/hardware/net/net_utils.h
@@ -7,7 +7,7 @@ namespace sccl {
 namespace hardware {
 namespace net {

-typedef enum {
+typedef enum : uint8_t {
    SCCL_PTR_HOST   = 0x1,
    SCCL_PTR_CUDA   = 0x2,
    SCCL_PTR_DMABUF = 0x4
@@ -19,15 +19,15 @@ constexpr int SCCL_NET_HANDLE_MAXSIZE = 128;

 ////////////////////////////////// 用于定义网络设备 //////////////////////////////////
 typedef struct {
-    char* name;     // 主要用于日志记录。
-    char* pciPath;  // PCI设备在/sys中的路径。
-    uint64_t guid;  // NIC芯片的唯一标识符。对于具有多个PCI功能（物理或虚拟）的卡非常重要。
-    int ptrSupport; // [SCCL_PTR_HOST|SCCL_PTR_CUDA|SCCL_PTR_DMABUF]
-    int speed;      // 端口速度，单位为Mbps。
-    int port;       // 端口号。
-    float latency;  // 网络延迟
-    int maxComms;   // 我们可以创建的最大通信数量
-    int maxRecvs;   // 最大分组接收数量。
+    char* name;         // 主要用于日志记录。
+    char* pciPath;      // PCI设备在/sys中的路径。
+    uint64_t guid;      // NIC芯片的唯一标识符。对于具有多个PCI功能（物理或虚拟）的卡非常重要。
+    uint8_t ptrSupport; // [SCCL_PTR_HOST|SCCL_PTR_CUDA|SCCL_PTR_DMABUF]
+    int speed;          // 端口速度，单位为Mbps。
+    int port;           // 端口号。
+    float latency;      // 网络延迟
+    int maxComms;       // 我们可以创建的最大通信数量
+    int maxRecvs;       // 最大分组接收数量。
 } scclNetProperties_t;

 /**

--- a/src/hardware/topology/bootstrap/bootstrap.cpp
+++ b/src/hardware/topology/bootstrap/bootstrap.cpp
--- a/src/hardware/topology/bootstrap/bootstrap.h
+++ b/src/hardware/topology/bootstrap/bootstrap.h
@@ -28,21 +28,23 @@ public:
    // 初始化bootstrap通信环境
    scclResult_t init(struct BootstrapComm* bootstrap_comm);

-    // 广播节点信息
-    scclResult_t bootstrapAllGather(struct scclNodeInfo*);
-
 private:
    // 创建根节点的数据广播
-    scclResult_t bootstrapRootGatherAndBroadcast(void* send_data, void* recv_data);
+    scclResult_t bootstrapRootGatherAndBroadcast(struct BootstrapNodeBasic* send_data_basic, std::vector<struct BootstrapNodeBasic>& recv_data_basic);

    // 初始化唯一ID信息结构体
    scclResult_t bootstrapCommInitNodeInfo(scclNet_t* scclNet, struct scclNodeInfo* node_info);

-    // scclResult_t bootstrapGetAllNodes(const struct scclNodeInfo* , struct BootstrapComm* comm);
+    // 广播节点信息
+    scclResult_t
+    bootstrapCommAllGather(std::vector<struct BootstrapNodeBasic>& all_node_basic, struct scclNodeInfo* node_info, struct scclNodeInfoSet* node_info_set);

 private:
-    int rank, nRanks;             // 初始化阶段获取MPI的值
-    int localRank, nLocalRanks;   // 通过bootstrapRootGatherAndBroadcast函数确定值
+    int rank, nRanks;           // 初始化阶段获取MPI的值
+    int localRank, nLocalRanks; // 通过bootstrapRootGatherAndBroadcast函数确定值
+    int interRank, nInterRanks; // 整个节点在全部节点中的位置
+
+    // TODO: 用于控制套接字终端的变量，目前不知道在哪里使用
    volatile uint32_t* abortFlag; // 中止标志，非阻塞套接字设置

    // 外部传入的0号节点的基础信息
@@ -54,12 +56,8 @@ private:
    pthread_mutex_t bootstrapMutex = PTHREAD_MUTEX_INITIALIZER;
    pthread_cond_t bootstrapCond   = PTHREAD_COND_INITIALIZER;

-    // 标志是否已经初始化
-    // 线程池变量
-    int max_pthreads               = 0;       // 用于存储最大并行线程数的整型变量
-    class ThreadPool* pthread_pool = nullptr; // 指向ThreadPool类实例的指针，初始值为nullptr
-    scclIpcSocket_t* ipcsocket     = nullptr; // 指向scclIpcSocket类实例的指针，初始值为nullptr
-    scclSocket_t* my_listen_sock   = nullptr; // 指向scclSocket类实例的指针，初始值为nullptr
+    // 节点内通信的类
+    scclIpcSocket_t* ipcsocket = nullptr; // 指向scclIpcSocket类实例的指针，初始值为nullptr
 };

 } // namespace bootstrap

--- a/src/hardware/topology/bootstrap/bootstrap_utils.cpp
+++ b/src/hardware/topology/bootstrap/bootstrap_utils.cpp
@@ -9,7 +9,7 @@ namespace bootstrap {

 ////////////////////////////// 结构体定义 //////////////////////////////
 // 构造函数定义
-scclNodeInfoSet::scclNodeInfoSet(int nRanks) : nUniqueInfos(nRanks) {
+scclNodeInfoSet::scclNodeInfoSet(int nRanks) {
    printf("scclNodeInfoSet 构造函数\n");
    node_info_vec.reserve(nRanks); // 预留空间
    printf("scclNodeInfoSet 预留空间\n");
@@ -21,6 +21,8 @@ void BootstrapComm::init(int rank, int nRanks, int localRank, int nLocalRanks) {
    this->nRanks      = nRanks;
    this->localRank   = localRank;
    this->nLocalRanks = nLocalRanks;
+    this->interRank   = rank / nLocalRanks;
+    this->nInterRanks = nRanks / nLocalRanks;
    node_info_set     = new scclNodeInfoSet(nRanks); // 假设需要动态分配
 };

@@ -192,16 +194,18 @@ scclResult_t getBusId(int hipDev, int64_t* busId) {
 }

 // 函数：打印 scclNodeInfo 结构体的信息
-scclResult_t printNodeInfo(struct scclNodeInfo* info) {
+scclResult_t printNodeInfo(const std::string& prefix, struct scclNodeInfo* info) {
    char addrline[net::SOCKET_NAME_MAXLEN + 1];

-    if(info->localRank == 0) {
+    // if(info->localRank == 0) {
+    if(1) {
        printf("==========================================\n"
-               "Total Rank: %d, Local Rank: %d, Host Hash: %lu, PID Hash: %lu\n"
+               "%s, Total Rank: %d, Local Rank: %d, Host Hash: %lu, PID Hash: %lu\n"
               "gpu: dev=%d, gpu.name=%s, gcn=%s, compCap=%d\n"
-               "net: count=%d, device name=%s, pciPath=%s, guid=%lu, ptrSupport=%d, speed=%d, port=%d, latency=%f, maxComms=%d, maxRecvs=%d\n"
-               "cpu: socketAddr=%s\n pci: busId=%ld\n"
+               "net: count=%d, device name=%s, pciPath=%s, guid=%lu, ptrSupport=%u, speed=%d, port=%d, latency=%f, maxComms=%d, maxRecvs=%d\n"
+               "cpu: socketAddr=%s\npci: busId=%ld"
               "\n==========================================\n",
+               prefix.c_str(),
               info->rank,
               info->localRank,
               info->hostHash,
@@ -211,15 +215,15 @@ scclResult_t printNodeInfo(struct scclNodeInfo* info) {
               info->localNode.gpu.gcn,
               info->localNode.gpu.compCap,
               info->localNode.net.count,
-               info->localNode.net.props.name,
-               info->localNode.net.props.pciPath,
-               info->localNode.net.props.guid,
-               info->localNode.net.props.ptrSupport,
-               info->localNode.net.props.speed,
-               info->localNode.net.props.port,
-               info->localNode.net.props.latency,
-               info->localNode.net.props.maxComms,
-               info->localNode.net.props.maxRecvs,
+               info->localNode.net.name,
+               info->localNode.net.pciPath,
+               info->localNode.net.guid,
+               static_cast<unsigned int>(info->localNode.net.ptrSupport),
+               info->localNode.net.speed,
+               info->localNode.net.port,
+               info->localNode.net.latency,
+               info->localNode.net.maxComms,
+               info->localNode.net.maxRecvs,
               net::net_socket::scclSocketToString(&info->localNode.cpu.listen_sock.addr, addrline),
               info->localNode.pci.busId);
    }

--- a/src/hardware/topology/bootstrap/bootstrap_utils.h
+++ b/src/hardware/topology/bootstrap/bootstrap_utils.h
@@ -37,9 +37,9 @@ struct bootstrapRootArgs {
 // 用于初始建立连接阶段，0号rank之外的进程向其传递的信息
 struct BootstrapNodeBasic {
    int rank;
-    int nRanks;               // 进程的总数量
-    uint64_t hostHash;        // 用于区分host的CPU编号
-    scclSocketAddress_t addr; // 各个进程的监听套接字地址，用于网络通信
+    int nRanks;        // 进程的总数量
+    uint64_t hostHash; // 用于区分host的CPU编号
+    scclSocket_t sock; // 各个进程的监听套接字地址，用于网络通信
 };

 // 定义每个rank所持有的所有拓扑节点
@@ -53,13 +53,21 @@ struct topoLocalNode {
    struct {
        int dev;      // NVML设备编号
        char name[8]; // 设备名称
-        char gcn[7];  // GCN架构名称
+        char gcn[8];  // GCN架构名称
        int compCap;  // CUDA计算能力
    } gpu;            // GPU节点
    struct {
-        int count; // 网卡数量
-        net::scclNetProperties_t props;
-    } net; // 网络节点
+        int count;          // 网卡数量
+        char name[8];       // 主要用于日志记录。
+        char pciPath[128];  // PCI设备在/sys中的路径。
+        uint64_t guid;      // NIC芯片的唯一标识符。对于具有多个PCI功能（物理或虚拟）的卡非常重要。
+        uint8_t ptrSupport; // [SCCL_PTR_HOST|SCCL_PTR_CUDA|SCCL_PTR_DMABUF]
+        int speed;          // 端口速度，单位为Mbps。
+        int port;           // 端口号。
+        float latency;      // 网络延迟
+        int maxComms;       // 可以创建的最大通信数量
+        int maxRecvs;       // 最大分组接收数量。
+    } net;                  // 网络节点
 };

 // 定义结构体 scclNodeInfo，用于存储每个rank的通信节点的信息
@@ -74,7 +82,6 @@ struct scclNodeInfo {

 // 每个节点的信息
 struct scclNodeInfoSet {
-    int nUniqueInfos; // 通信节点的数量
    std::vector<struct scclNodeInfo> node_info_vec;

    // 构造函数声明
@@ -95,8 +102,11 @@ public:
    int nRanks      = 0;   // 总的节点数量
    int localRank   = -1;  // 当前节点在本地计算节点中的排名
    int nLocalRanks = 0;   // 本地计算节点中的节点总数
-    int hipDev      = -1;  // CUDA 设备 ID
-    int deviceCnt   = 0;   // 设备数量
+    int interRank   = -1;  // 整个节点在全部节点中的位置
+    int nInterRanks = 0;   // 全局拥有节点的个数
+
+    int hipDev    = -1; // CUDA 设备 ID
+    int deviceCnt = 0;  // 设备数量

    // proxy通信
    uint64_t magic;               // 魔术数，用于验证结构体
@@ -125,7 +135,7 @@ scclResult_t getBusId(int hipDev, int64_t* busId);
 int scclCudaCompCap(void);

 // 打印唯一的拓扑信息
-scclResult_t printNodeInfo(struct scclNodeInfo* info);
+scclResult_t printNodeInfo(const std::string& prefix, struct scclNodeInfo* info);

 // 实现类似于std::span的功能，将字节数组转换为类型数组
 template <typename T>

--- a/src/utils/thread_pool.cpp
+++ b/src/utils/thread_pool.cpp
@@ -108,6 +108,12 @@ bool ThreadPool::allTasksCompleted() {
    return completed;
 }

+/**
+ * @brief 获取线程池的容量大小
+ * @return 返回线程池当前的最大工作线程数
+ */
+int ThreadPool::getThreadPoolSize() { return workers.capacity(); }
+
 /**
 * 设置指定线程的CPU亲和性，将其绑定到指定的核心上
 * @param thread 需要设置亲和性的线程

--- a/src/utils/thread_pool.h
+++ b/src/utils/thread_pool.h
@@ -35,6 +35,8 @@ public:

    // 检查是否所有任务都已完成
    bool allTasksCompleted();
+    // 获取线程池中工作线程的数量
+    int getThreadPoolSize();

 private:
    std::vector<pthread_t> workers;          // 工作线程列表