通过线程池实现ipcsocket，满足节点内通信

a4ac3320 · lishen · d9d23f34 · a4ac3320 · a4ac3320 · a4ac3320
Commit a4ac3320 authored Jul 07, 2025 by lishen
20 changed files
--- a/src/hardware/net/device/CMakeLists.txt
+++ b/src/hardware/net/device/CMakeLists.txt
--- a/src/hardware/net/device/ibvsymbols.cpp
+++ b/src/hardware/net/device/ibvsymbols.cpp
@@ -7,7 +7,7 @@
 namespace sccl {
 namespace hardware {
 namespace net {
-namespace device {
+namespace net_ib {

 #define ASSIGN_SYM(container, symbol, name) container->name = &symbol;

@@ -102,7 +102,7 @@ scclResult_t buildIbvSymbols(struct scclIbvSymbols* ibvSymbols) {
    return scclSuccess;
 }

-} // namespace device
+} // namespace net_ib
 } // namespace net
 } // namespace hardware
 } // namespace sccl
--- a/src/hardware/net/device/ibvsymbols.h
+++ b/src/hardware/net/device/ibvsymbols.h
@@ -6,7 +6,7 @@
 namespace sccl {
 namespace hardware {
 namespace net {
-namespace device {
+namespace net_ib {

 /* IB Verbs Function Pointers*/
 struct scclIbvSymbols {
@@ -41,7 +41,7 @@ struct scclIbvSymbols {
 /* Constructs IB verbs symbols per rdma-core linking or dynamic loading mode */
 scclResult_t buildIbvSymbols(struct scclIbvSymbols* ibvSymbols);

-} // namespace device
+} // namespace net_ib
 } // namespace net
 } // namespace hardware
 } // namespace sccl
--- a/src/hardware/net/device/ibvwrap.cpp
+++ b/src/hardware/net/device/ibvwrap.cpp
@@ -13,7 +13,7 @@
 namespace sccl {
 namespace hardware {
 namespace net {
-namespace device {
+namespace net_ib {

 static pthread_once_t initOnceControl = PTHREAD_ONCE_INIT;
 static scclResult_t initResult;
@@ -250,7 +250,7 @@ scclResult_t wrap_ibv_post_recv(struct ibv_qp* qp, struct ibv_recv_wr* wr, struc
    return scclSuccess;
 }

-} // namespace device
+} // namespace net_ib
 } // namespace net
 } // namespace hardware
 } // namespace sccl
--- a/src/hardware/net/device/ibvwrap.h
+++ b/src/hardware/net/device/ibvwrap.h
@@ -10,7 +10,7 @@
 namespace sccl {
 namespace hardware {
 namespace net {
-namespace device {
+namespace net_ib {

 typedef enum ibv_return_enum : uint8_t {
    IBV_SUCCESS = 0, //!< The operation was successful
@@ -112,7 +112,7 @@ scclResult_t wrap_ibv_post_recv(struct ibv_qp* qp, struct ibv_recv_wr* wr, struc
 // 获取事件类型字符串
 scclResult_t wrap_ibv_event_type_str(char** ret, enum ibv_event_type event);

-} // namespace device
+} // namespace net_ib
 } // namespace net
 } // namespace hardware
 } // namespace sccl
--- a/src/hardware/net/device/net_ib.cpp
+++ b/src/hardware/net/device/net_ib.cpp
@@ -9,15 +9,12 @@
 #include <netdb.h>

 #include "net_ib.h"
-#include "socket.h"
 #include "rocm_wrap.h"
 #include "base.h"

 namespace sccl {
 namespace hardware {
 namespace net {
-namespace device {
-
 namespace net_ib {

 ///////////////////////////////////////// 环境变量读取及设置 /////////////////////////////////////////
@@ -59,11 +56,8 @@ SCCL_PARAM(IbSplitDataOnQps, "IB_SPLIT_DATA_ON_QPS", 1);
 ///////////////////////////////////////// 参数及结构体设置 /////////////////////////////////////////
 #define MAXNAMESIZE 64
 #define MAX_IF_NAME_SIZE 16
-static char scclIbIfName[MAX_IF_NAME_SIZE + 1];
-static union host::scclSocketAddress scclIbIfAddr;
-
-// 定义一个静态变量 scclNIbDevs，用于存储 InfiniBand 设备的数量
-static int scclNIbDevs = -1;
+static char scclIbIfName[MAX_IF_NAME_SIZE + 1];          // 用于存储网络接口名称的字符数组
+static union net_socket::scclSocketAddress scclIbIfAddr; // 定义一个联合体类型的变量，用于存储网络接口地址

 struct scclIbMr {
    uintptr_t addr; // 内存地址
@@ -117,7 +111,7 @@ pthread_mutex_t scclIbLock = PTHREAD_MUTEX_INITIALIZER;
 static int scclIbRelaxedOrderingEnabled = 0;

 // 定义一个线程局部变量，用于存储重用的地址信息
-static thread_local union host::scclSocketAddress reusedAddr;
+static thread_local union net_socket::scclSocketAddress reusedAddr;

 // 定义一个线程局部变量，用于存储重用的套接字文件描述符
 static thread_local int reusedSockfd = -1;
@@ -128,7 +122,7 @@ pthread_t scclIbAsyncThread;
 // 定义一个常量，表示InfiniBand网络接口的最大接收数量
 static constexpr int SCCL_NET_IB_MAX_RECVS = 8;
 // 定义一个常量，表示最大字符串长度
-static constexpr int MAX_STR_LEN = 8;
+static constexpr int MAX_STR_LEN = 255;

 // 为每个并发接收支持SCCL_NET_MAX_REQUESTS
 static constexpr int MAX_REQUESTS = (SCCL_NET_MAX_REQUESTS * SCCL_NET_IB_MAX_RECVS);
@@ -146,12 +140,12 @@ scclIbRequest 结构体用于封装 InfiniBand 通信请求的详细信息，包
 联合体 union 根据请求类型（发送或接收）存储不同的数据结构，以支持灵活的通信操作。
 */
 struct scclIbRequest {
-    struct scclIbVerbs* verbs;     // 指向 scclIbVerbs 结构体的指针，包含 Infiniband 相关的操作
-    int type;                      // 请求的类型，例如发送或接收
-    int events;                    // 事件标志， 用于记录请求相关的事件状态
-    struct host::scclSocket* sock; // 指向 scclSocket 结构体的指针，表示网络套接字
-    struct scclIbGidInfo* gidInfo; // 指向 scclIbGidInfo 结构体的指针，包含全局标识符信息
-    int nreqs;                     // 请求的数量
+    struct scclIbVerbs* verbs;           // 指向 scclIbVerbs 结构体的指针，包含 Infiniband 相关的操作
+    int type;                            // 请求的类型，例如发送或接收
+    int events;                          // 事件标志， 用于记录请求相关的事件状态
+    struct net_socket::scclSocket* sock; // 指向 scclSocket 结构体的指针，表示网络套接字
+    struct scclIbGidInfo* gidInfo;       // 指向 scclIbGidInfo 结构体的指针，包含全局标识符信息
+    int nreqs;                           // 请求的数量
    // 联合体，用于存储不同类型请求的特定信息
    union {
        // send: 发送请求的相关信息
@@ -195,7 +189,7 @@ struct scclIbSendComm {
    struct scclIbRequest* fifoReqs[MAX_REQUESTS][SCCL_NET_IB_MAX_RECVS]; // FIFO请求指针数组
    struct ibv_send_wr wrs[SCCL_NET_IB_MAX_RECVS + 1];                   // 发送工作请求结构体数组
    struct ibv_sge sges[SCCL_NET_IB_MAX_RECVS];                          // 散布-聚集元素结构体数组
-    struct host::scclSocket sock;                                        // 套接字结构体
+    struct net_socket::scclSocket sock;                                  // 套接字结构体

    int ready;                           // 是否准备好
    struct ibv_qp* qps[SCCL_IB_MAX_QPS]; // 队列对指针数组
@@ -206,33 +200,6 @@ struct scclIbSendComm {
    struct scclIbGidInfo gidInfo;        // GID信息结构体
 };

-/*IB的通信状态*/
-enum scclIbCommState : uint8_t {
-    scclIbCommStateStart        = 0, // 初始状态
-    scclIbCommStateConnect      = 1, // 尝试连接状态
-    scclIbCommStateAccept       = 3, // 接受连接状态
-    scclIbCommStateSend         = 4, // 发送数据状态
-    scclIbCommStateRecv         = 5, // 接收数据状态
-    scclIbCommStateConnecting   = 6, // 正在连接状态
-    scclIbCommStateConnected    = 7, // 已连接状态
-    scclIbCommStatePendingReady = 8, // 等待准备状态
-};
-
-/*通信的阶段*/
-struct scclIbCommStage {
-    enum scclIbCommState state; // 通信阶段的状态
-    int offset;                 // 数据偏移量
-    void* buffer;               // 用于通信的缓冲区指针
-    void* comm;                 // 通信对象指针
-};
-
-/*监听通信的上下文*/
-struct scclIbListenComm {
-    int dev;                      // 设备标识符
-    struct host::scclSocket sock; // 用于网络通信的套接字
-    struct scclIbCommStage stage; // 通信阶段的状态
-};
-
 struct scclIbQpInfo {
    uint32_t lid;
    uint8_t ib_port;
@@ -270,7 +237,7 @@ struct scclIbRemFifo {
 struct scclIbRecvComm {
    struct scclIbVerbs verbs;
    struct scclIbRemFifo remFifo;
-    struct host::scclSocket sock;
+    struct net_socket::scclSocket sock;
    int ready;
    struct ibv_qp* qps[SCCL_IB_MAX_QPS];
    int nqps;
@@ -292,7 +259,7 @@ static_assert((offsetof(struct scclIbRecvComm, remFifo) % 32) == 0, "scclIbSendC
 * @param args 传入参数，应转换为ibv_context结构体指针
 * @return void* 线程返回值，始终返回NULL
 */
-static void* scclIbAsyncThreadMain(void* args) {
+void* scclNetIb::scclIbAsyncThreadMain(void* args) {
    // 将传入的参数转换为InfiniBand上下文结构体指针
    struct ibv_context* context = (struct ibv_context*)args;

@@ -337,7 +304,7 @@ static void* scclIbAsyncThreadMain(void* args) {
 * @param realPort 输出参数，记录实际端口号
 * @return scclResult_t 返回操作结果，成功返回scclSuccess
 */
-static scclResult_t scclIbGetPciPath(char* devName, char** path, int* realPort) {
+scclResult_t scclNetIb::scclIbGetPciPath(char* devName, char** path, int* realPort) {
    // 定义一个字符数组用于存储设备路径
    char devicePath[PATH_MAX];
    // 构造设备路径字符串，格式为 "/sys/class/infiniband/<devName>/device"
@@ -396,14 +363,14 @@ static int firstBitSet(int val, int max) {
 * @param width 输入的宽度值
 * @return 返回ibvWidths数组中对应的宽度索引值
 */
-static int scclIbWidth(int width) { return ibvWidths[firstBitSet(width, sizeof(ibvWidths) / sizeof(int) - 1)]; }
+int scclNetIb::scclIbWidth(int width) { return ibvWidths[firstBitSet(width, sizeof(ibvWidths) / sizeof(int) - 1)]; }

 /**
 * 根据给定的速度值查找并返回对应的IB传输速率
 * @param speed 输入的速度值
 * @return 返回ibvSpeeds数组中第一个匹配的IB传输速率
 */
-static int scclIbSpeed(int speed) { return ibvSpeeds[firstBitSet(speed, sizeof(ibvSpeeds) / sizeof(int) - 1)]; }
+int scclNetIb::scclIbSpeed(int speed) { return ibvSpeeds[firstBitSet(speed, sizeof(ibvSpeeds) / sizeof(int) - 1)]; }

 /**
 * 检查当前IB设备是否支持宽松排序(Relaxed Ordering)模式
@@ -412,7 +379,7 @@ static int scclIbSpeed(int speed) { return ibvSpeeds[firstBitSet(speed, sizeof(i
 * @note 通过查询IBVERBS_1.8 API的ibv_reg_mr_iova2函数来检测IBV_ACCESS_RELAXED_ORDERING支持
 * @see scclParamIbPciRelaxedOrdering() 获取当前配置的RO模式
 */
-static int scclIbRelaxedOrderingCapable(void) {
+int scclNetIb::scclIbRelaxedOrderingCapable(void) {
    int roMode     = scclParamIbPciRelaxedOrdering();
    scclResult_t r = scclInternalError;
    if(roMode == 1 || roMode == 2) {
@@ -432,7 +399,7 @@ static int scclIbRelaxedOrderingCapable(void) {
 * @param shownIbHcaEnv 计数器，用于控制日志输出次数
 * @return char* 处理后的IB设备环境变量值
 */
-static char* scclIbGetIbHca(int& shownIbHcaEnv, bool* searchNot, bool* searchExact) {
+char* scclNetIb::scclIbGetIbHca(int& shownIbHcaEnv, bool* searchNot, bool* searchExact) {
    // 检查用户是否定义了要使用的IB设备:端口
    char* userIbEnv = getenv("SCCL_IB_HCA");
    if(userIbEnv != NULL && shownIbHcaEnv++ == 0)
@@ -463,7 +430,7 @@ static char* scclIbGetIbHca(int& shownIbHcaEnv, bool* searchNot, bool* searchExa
 * @note 缓冲区最大长度为MAX_STR_LEN，超出部分会被截断
 *       文件内容末尾会自动添加字符串结束符'\0'
 */
-scclResult_t scclGetStrFromSys(const char* path, const char* fileName, char* strValue) {
+scclResult_t scclNetIb::scclGetStrFromSys(const char* path, const char* fileName, char* strValue) {
    char filePath[PATH_MAX];
    sprintf(filePath, "%s/%s", path, fileName);
    int offset = 0;
@@ -494,7 +461,7 @@ scclResult_t scclGetStrFromSys(const char* path, const char* fileName, char* str
 * @param ibDev IB设备号
 * @return scclResult_t 返回scclSuccess表示支持，返回scclSystemError表示不支持
 */
-scclResult_t scclIbGdrSupport(int ibDev) {
+scclResult_t scclNetIb::scclIbGdrSupport(int ibDev) {
    static int moduleLoaded = -1;
    if(moduleLoaded == -1) {
 #if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__)
@@ -527,7 +494,7 @@ scclResult_t scclIbGdrSupport(int ibDev) {
 * @param dev 设备索引
 * @return scclResult_t 返回scclSuccess表示支持，scclSystemError表示不支持
 */
-scclResult_t scclIbDmaBufSupport(int dev) {
+scclResult_t scclNetIb::scclIbDmaBufSupport(int dev) {
    static int dmaBufSupported = -1;
    if(dmaBufSupported == -1) {
        scclResult_t res;
@@ -552,9 +519,9 @@ failure:
 }

 struct scclIbHandle {
-    union host::scclSocketAddress connectAddr; // Filled by the target (目标填充)
-    uint64_t magic;                            // random number to help debugging (用于调试的随机数)
-    struct scclIbCommStage stage;              // Used by the other side when connecting (连接时由另一侧使用)
+    union net_socket::scclSocketAddress connectAddr; // Filled by the target (目标填充)
+    uint64_t magic;                                  // random number to help debugging (用于调试的随机数)
+    struct scclIbCommStage stage;                    // Used by the other side when connecting (连接时由另一侧使用)
 };

 /**
@@ -572,7 +539,7 @@ struct scclIbHandle {
 * @note 该函数会递增设备的PD引用计数，并在首次调用时为设备分配PD
 * @note 创建的CQ大小为2*MAX_REQUESTS*IB_QPS_PER_CONNECTION，以支持接收请求的双重完成
 */
-scclResult_t scclIbInitVerbs(int dev, struct ibv_context* ctx, struct scclIbVerbs* verbs) {
+scclResult_t scclNetIb::scclIbInitVerbs(int dev, struct ibv_context* ctx, struct scclIbVerbs* verbs) {
    verbs->dev = dev;

    pthread_mutex_lock(&scclIbDevs[dev].lock);
@@ -593,7 +560,20 @@ scclResult_t scclIbInitVerbs(int dev, struct ibv_context* ctx, struct scclIbVerb
    return scclSuccess;
 }

-scclResult_t scclIbCreateQp(uint8_t ib_port, struct scclIbVerbs* verbs, int access_flags, struct ibv_qp** qp) {
+/**
+ * 创建并初始化一个InfiniBand队列对(QP)
+ *
+ * @param ib_port IB端口号
+ * @param verbs IB verbs结构体指针
+ * @param access_flags QP访问权限标志
+ * @param qp 输出的QP指针
+ *
+ * @return 返回scclSuccess表示成功，否则返回错误码
+ *
+ * @note QP类型为可靠连接(RC)，发送队列大小为2*MAX_REQUESTS，
+ *       接收队列大小为MAX_REQUESTS，支持内联数据发送(如果配置启用)
+ */
+scclResult_t scclNetIb::scclIbCreateQp(uint8_t ib_port, struct scclIbVerbs* verbs, int access_flags, struct ibv_qp** qp) {
    struct ibv_qp_init_attr qpInitAttr;
    memset(&qpInitAttr, 0, sizeof(struct ibv_qp_init_attr));
    qpInitAttr.send_cq = verbs->cq;
@@ -616,7 +596,20 @@ scclResult_t scclIbCreateQp(uint8_t ib_port, struct scclIbVerbs* verbs, int acce
    return scclSuccess;
 }

-scclResult_t scclIbRtrQp(struct ibv_qp* qp, uint32_t qpn, struct scclIbQpInfo* info) {
+/**
+ * 将IB QP状态修改为RTR（Ready to Receive）状态
+ *
+ * @param qp IB QP指针
+ * @param qpn 目标QP号
+ * @param info QP配置信息，包含MTU、链路层类型、端口号等参数
+ *
+ * @return 成功返回scclSuccess，失败返回错误码
+ *
+ * @note 根据链路层类型(以太网/IB)设置不同的AH属性
+ *       以太网需要设置全局路由头(GRH)相关参数
+ *       IB链路需要设置目标LID
+ */
+scclResult_t scclNetIb::scclIbRtrQp(struct ibv_qp* qp, uint32_t qpn, struct scclIbQpInfo* info) {
    struct ibv_qp_attr qpAttr;
    memset(&qpAttr, 0, sizeof(struct ibv_qp_attr));
    qpAttr.qp_state           = IBV_QPS_RTR;
@@ -645,7 +638,16 @@ scclResult_t scclIbRtrQp(struct ibv_qp* qp, uint32_t qpn, struct scclIbQpInfo* i
    return scclSuccess;
 }

-scclResult_t scclIbRtsQp(struct ibv_qp* qp) {
+/**
+ * 将IB(InfiniBand)队列对(QP)状态修改为RTS(Ready To Send)状态
+ *
+ * @param qp IB队列对指针
+ * @return 成功返回scclSuccess，失败返回错误码
+ *
+ * 该函数配置QP属性并调用ibv_modify_qp将其状态改为RTS状态，
+ * 设置了超时时间、重试次数、RNR重试次数、SQ PSN和最大RD原子操作数等参数。
+ */
+scclResult_t scclNetIb::scclIbRtsQp(struct ibv_qp* qp) {
    struct ibv_qp_attr qpAttr;
    memset(&qpAttr, 0, sizeof(struct ibv_qp_attr));
    qpAttr.qp_state      = IBV_QPS_RTS;
@@ -670,7 +672,17 @@ const char* reqTypeStr[] = {"Unused", "Send", "Recv", "Flush"};
 static_assert((offsetof(struct scclIbSendComm, fifo) % 32) == 0, "scclIbSendComm fifo must be 32-byte aligned");
 static_assert((sizeof(struct scclIbSendFifo) % 32) == 0, "scclIbSendFifo element size must be 32-byte multiples");

-scclResult_t scclIbDestroyVerbs(struct scclIbVerbs* verbs) {
+/**
+ * @brief 销毁IB Verbs资源
+ *
+ * 释放指定的IB Verbs资源，包括完成队列(CQ)和保护域(PD)。
+ * 当PD的引用计数减至0时，会自动释放PD资源。
+ * 该函数是线程安全的，使用互斥锁保护共享资源。
+ *
+ * @param verbs 指向要销毁的IB Verbs结构体
+ * @return scclResult_t 返回操作结果，scclSuccess表示成功
+ */
+scclResult_t scclNetIb::scclIbDestroyVerbs(struct scclIbVerbs* verbs) {
    scclResult_t res;
    SCCLCHECK(wrap_ibv_destroy_cq(verbs->cq));

@@ -684,7 +696,17 @@ returning:
    return res;
 }

-scclResult_t scclIbGetRequest(struct scclIbVerbs* verbs, struct scclIbRequest** req) {
+/**
+ * @brief 从verbs请求池中获取一个未使用的请求结构体
+ *
+ * @param verbs 指向scclIbVerbs结构体的指针，包含请求池
+ * @param req 输出参数，用于返回获取到的请求结构体指针
+ * @return scclResult_t 成功返回scclSuccess，失败返回scclInternalError
+ *
+ * 该函数遍历verbs请求池，查找第一个未使用的请求(SCCL_NET_IB_REQ_UNUSED)，
+ * 初始化其字段后返回。如果所有请求都在使用中，则返回错误。
+ */
+scclResult_t scclNetIb::scclIbGetRequest(struct scclIbVerbs* verbs, struct scclIbRequest** req) {
    for(int i = 0; i < MAX_REQUESTS; i++) {
        struct scclIbRequest* r = verbs->reqs + i;
        if(r->type == SCCL_NET_IB_REQ_UNUSED) {
@@ -700,14 +722,36 @@ scclResult_t scclIbGetRequest(struct scclIbVerbs* verbs, struct scclIbRequest**
    *req = NULL;
    return scclInternalError;
 }
-scclResult_t scclIbFreeRequest(struct scclIbRequest* r) {
+
+/**
+ * 释放IB网络请求资源。
+ *
+ * 将请求类型标记为未使用状态，但不实际释放内存。
+ *
+ * @param r 要释放的IB网络请求指针
+ * @return 总是返回scclSuccess表示操作成功
+ */
+scclResult_t scclNetIb::scclIbFreeRequest(struct scclIbRequest* r) {
    r->type = SCCL_NET_IB_REQ_UNUSED;
    return scclSuccess;
 }

-scclResult_t scclIbTest(void* request, int* done, int* size);
-
-scclResult_t scclIbMultiSend(struct scclIbSendComm* comm, int slot) {
+/**
+ * @brief 执行IB网络的多发送操作
+ *
+ * 该函数处理IB网络的多发送请求，包括设置发送工作请求(WR)和分散/聚集元素(SGE)，
+ * 并处理自适应路由(AR)和QP分割等高级功能。
+ *
+ * @param comm 指向scclIbSendComm结构的指针，包含发送通信上下文
+ * @param slot 要使用的发送槽位索引
+ * @return scclResult_t 返回操作结果，成功返回scclSuccess，失败返回错误码
+ *
+ * @note 1. 支持多QP分割发送，确保128B对齐
+ *       2. 使用RDMA_WRITE_WITH_IMM发送立即数据
+ *       3. 当请求数>32时会返回错误
+ *       4. 自适应路由模式下会发送两次WR
+ */
+scclResult_t scclNetIb::scclIbMultiSend(struct scclIbSendComm* comm, int slot) {
    struct scclIbRequest** reqs           = comm->fifoReqs[slot];
    volatile struct scclIbSendFifo* slots = comm->fifo[slot];
    int nreqs                             = slots[0].nreqs;
@@ -792,7 +836,23 @@ scclResult_t scclIbMultiSend(struct scclIbSendComm* comm, int slot) {
    return scclSuccess;
 }

-scclResult_t scclIbPostFifo(struct scclIbRecvComm* comm, int n, void** data, int* sizes, int* tags, void** mhandles, struct scclIbRequest* req) {
+/**
+ * @brief 通过IB Verbs RDMA写入操作向远程FIFO队列提交数据
+ *
+ * @param comm 指向接收通信上下文的指针
+ * @param n 要发送的数据块数量
+ * @param data 数据指针数组
+ * @param sizes 数据大小数组
+ * @param tags 数据标签数组
+ * @param mhandles 内存句柄数组
+ * @param req 请求结构体指针
+ * @return scclResult_t 返回操作结果(scclSuccess表示成功)
+ *
+ * @note 该函数会将数据打包到本地FIFO元素中，并通过RDMA写入到远程FIFO队列。
+ *       每MAX_REQUESTS次操作会触发一次带信号(SIGNALED)的发送，以避免发送队列堵塞。
+ *       使用IBV_WR_RDMA_WRITE操作码进行数据传输。
+ */
+scclResult_t scclNetIb::scclIbPostFifo(struct scclIbRecvComm* comm, int n, void** data, int* sizes, int* tags, void** mhandles, struct scclIbRequest* req) {
    struct ibv_send_wr wr;
    memset(&wr, 0, sizeof(wr));

@@ -852,10 +912,15 @@ scclResult_t scclIbPostFifo(struct scclIbRecvComm* comm, int n, void** data, int
    return scclSuccess;
 }

-} // namespace net_ib
-
+////////////////////////////////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////// scclNetIb调用的函数 ////////////////////////////////////////
-namespace net_ib {
+scclNetIb::scclNetIb() : scclNetBase("IB") {}
+
+scclNetIb::~scclNetIb() {
+    if(ibComm != nullptr) {
+        free(ibComm);
+    }
+}

 /**
 * @brief 初始化InfiniBand硬件设备
@@ -872,7 +937,9 @@ namespace net_ib {
 * @note 函数内部会处理环境变量SCCL_IB_HCA来过滤特定设备
 * @note 使用互斥锁scclIbLock保证线程安全
 */
-scclResult_t scclIbInit(void) {
+scclResult_t scclNetIb::init() {
+    SCCLCHECK(scclCalloc(&ibComm, 1));
+
    // 如果IB被禁用，返回内部错误
    if(scclParamIbDisable())
        return scclInternalError;
@@ -894,7 +961,7 @@ scclResult_t scclIbInit(void) {
        if(scclNIbDevs == -1) {
            scclNIbDevs = 0;
            // 查找网络接口
-            if(host::scclFindSocketInterfaces(scclIbIfName, &scclIbIfAddr, MAX_IF_NAME_SIZE, 1) != 1) {
+            if(net_socket::scclFindSocketInterfaces(scclIbIfName, &scclIbIfAddr, MAX_IF_NAME_SIZE, 1) != 1) {
                WARN("NET/IB : No IP interface found.");
                return scclInternalError;
            }
@@ -1042,14 +1109,14 @@ scclResult_t scclIbInit(void) {
            // line 是设备的相关信息字符串
            // scclIbRelaxedOrderingEnabled 是一个布尔值，指示是否启用了Relaxed Ordering
            // scclIbIfName 是IB接口的名称
-            // host::scclSocketToString 是一个函数，用于将socket地址转换为字符串
+            // net_socket::scclSocketToString 是一个函数，用于将socket地址转换为字符串
            // addrline 是存储转换后地址字符串的数组
            INFO(SCCL_LOG_NET,
                 "NET/IB : Using%s %s; OOB %s:%s",
                 line,
                 scclIbRelaxedOrderingEnabled ? "[RO]" : "",
                 scclIbIfName,
-                 host::scclSocketToString(&scclIbIfAddr, addrline));
+                 net_socket::scclSocketToString(&scclIbIfAddr, addrline));
        }
        pthread_mutex_unlock(&scclIbLock);
    }
@@ -1062,7 +1129,7 @@ scclResult_t scclIbInit(void) {
 * @param ndev [out] 用于存储设备数量的指针
 * @return scclResult_t 返回操作结果，scclSuccess表示成功
 */
-scclResult_t scclIbGetDevicesNum(int* ndev) {
+scclResult_t scclNetIb::devices(int* ndev) {
    *ndev = scclNIbDevs;
    return scclSuccess;
 }
@@ -1077,10 +1144,11 @@ scclResult_t scclIbGetDevicesNum(int* ndev) {
 * @param props 用于存储设备属性的结构体指针
 * @return scclResult_t 返回操作结果，成功返回scclSuccess
 */
-scclResult_t scclIbGetProperties(int dev, scclNetProperties_t* props) {
-    props->name       = scclIbDevs[dev].devName;
-    props->pciPath    = scclIbDevs[dev].pciPath;
-    props->guid       = scclIbDevs[dev].guid;
+scclResult_t scclNetIb::getProperties(int dev, scclNetProperties_t* props) {
+    props->name    = scclIbDevs[dev].devName;
+    props->pciPath = scclIbDevs[dev].pciPath;
+    props->guid    = scclIbDevs[dev].guid;
+
    props->ptrSupport = SCCL_PTR_HOST;
    if(scclIbGdrSupport(dev) == scclSuccess) {
        props->ptrSupport |= SCCL_PTR_CUDA; // GDR support via nv_peermem
@@ -1111,41 +1179,60 @@ scclResult_t scclIbGetProperties(int dev, scclNetProperties_t* props) {
 * 3. 根据配置决定是否复用套接字
 * 4. 启动套接字监听并获取连接地址
 */
-scclResult_t scclIbListen(int dev, void* opaqueHandle, void** listenComm) {
-    // 创建并初始化通信结构体
-    struct scclIbListenComm* comm;
-    SCCLCHECK(scclCalloc(&comm, 1));
+scclResult_t scclNetIb::listen(int dev, void* opaqueHandle, void** listenComm) {
+    memset(ibComm, 0, sizeof(struct scclIbListenComm));
+
    struct scclIbHandle* handle = (struct scclIbHandle*)opaqueHandle;
+    // 静态断言，确保 scclIbHandle 结构体的大小不超过 SCCL_NET_HANDLE_MAXSIZE
    static_assert(sizeof(struct scclIbHandle) < SCCL_NET_HANDLE_MAXSIZE, "scclIbHandle size too large");
+    // 将 handle 指向的内存区域清零，大小为 scclIbHandle 结构体的大小
    memset(handle, 0, sizeof(struct scclIbHandle));

    // 设置设备和处理句柄
-    comm->dev     = dev;
+    ibComm->dev   = dev;
    handle->magic = SCCL_SOCKET_MAGIC;
-    SCCLCHECK(host::scclSocketInit(&comm->sock, &scclIbIfAddr, handle->magic, host::scclSocketTypeNetIb, NULL, 1));
+    SCCLCHECK(net_socket::scclSocketInit(&ibComm->sock, &scclIbIfAddr, handle->magic, net_socket::scclSocketTypeNetIb, NULL, 1));

    // 如果启用了端口复用，则复用套接字地址和文件描述符
    if(scclParamIbSockServerPortReuse()) {
        if(reusedSockfd == -1) {
-            SCCLCHECK(scclSocketListen(&comm->sock));
-            memcpy(&reusedAddr, &comm->sock.addr, sizeof(union host::scclSocketAddress));
-            reusedSockfd = comm->sock.fd;
+            SCCLCHECK(scclSocketListen(&ibComm->sock));
+            memcpy(&reusedAddr, &ibComm->sock.addr, sizeof(union net_socket::scclSocketAddress));
+            reusedSockfd = ibComm->sock.fd;
        } else {
-            memcpy(&comm->sock.addr, &reusedAddr, sizeof(union host::scclSocketAddress));
-            comm->sock.fd = reusedSockfd;
+            memcpy(&ibComm->sock.addr, &reusedAddr, sizeof(union net_socket::scclSocketAddress));
+            ibComm->sock.fd = reusedSockfd;
        }
    } else {
-        SCCLCHECK(host::scclSocketListen(&comm->sock));
+        SCCLCHECK(net_socket::scclSocketListen(&ibComm->sock));
    }

    // 获取套接字地址并设置监听通信
-    SCCLCHECK(host::scclSocketGetAddr(&comm->sock, &handle->connectAddr));
-    *listenComm = comm;
+    SCCLCHECK(net_socket::scclSocketGetAddr(&ibComm->sock, &handle->connectAddr));
+    *listenComm = ibComm;

    return scclSuccess;
 }

-scclResult_t scclIbConnect(int dev, void* opaqueHandle, void** sendComm) {
+/**
+ * @brief 建立IB网络连接并初始化通信资源
+ *
+ * 该函数负责完成以下操作：
+ * 1. 初始化socket连接
+ * 2. 创建IB QP队列对
+ * 3. 交换QP信息
+ * 4. 完成QP状态转换(RTR/RTS)
+ * 5. 注册内存区域
+ *
+ * @param dev 设备索引
+ * @param opaqueHandle 包含连接信息的句柄
+ * @param sendComm 输出参数，返回建立的发送通信上下文
+ * @return scclResult_t 返回操作结果状态码
+ *
+ * @note 该函数使用状态机模式处理异步连接过程
+ * @warning 不能重复连接已建立的sendComm
+ */
+scclResult_t scclNetIb::connect(int dev, void* opaqueHandle, void** sendComm) {
    struct scclIbHandle* handle   = (struct scclIbHandle*)opaqueHandle;
    struct scclIbCommStage* stage = &handle->stage;
    struct scclIbSendComm* comm   = (struct scclIbSendComm*)stage->comm;
@@ -1166,14 +1253,14 @@ scclResult_t scclIbConnect(int dev, void* opaqueHandle, void** sendComm) {
    }

    SCCLCHECK(scclIbMalloc((void**)&comm, sizeof(struct scclIbSendComm)));
-    SCCLCHECK(host::scclSocketInit(&comm->sock, &handle->connectAddr, handle->magic, host::scclSocketTypeNetIb, NULL, 1));
+    SCCLCHECK(net_socket::scclSocketInit(&comm->sock, &handle->connectAddr, handle->magic, net_socket::scclSocketTypeNetIb, NULL, 1));
    stage->comm  = comm;
    stage->state = scclIbCommStateConnect;
-    SCCLCHECK(host::scclSocketConnect(&comm->sock, scclParamIbSockClientPortReuse()));
+    SCCLCHECK(net_socket::scclSocketConnect(&comm->sock, scclParamIbSockClientPortReuse()));

 ib_connect_check:
    /* since scclSocketConnect is async, we must check if connection is complete */
-    SCCLCHECK(host::scclSocketReady(&comm->sock, &ready));
+    SCCLCHECK(net_socket::scclSocketReady(&comm->sock, &ready));
    if(!ready)
        return scclSuccess;

@@ -1292,7 +1379,7 @@ ib_send_ready:
 * @param recvComm 输出参数，接收通信句柄
 * @return scclResult_t 返回操作结果，成功返回scclSuccess
 */
-scclResult_t scclIbAccept(void* listenComm, void** recvComm) {
+scclResult_t scclNetIb::accept(void* listenComm, void** recvComm) {
    struct scclIbListenComm* lComm = (struct scclIbListenComm*)listenComm;
    struct scclIbCommStage* stage  = &lComm->stage;
    struct scclIbRecvComm* rComm   = (struct scclIbRecvComm*)stage->comm;
@@ -1315,11 +1402,11 @@ scclResult_t scclIbAccept(void* listenComm, void** recvComm) {
    SCCLCHECK(scclIbMalloc((void**)&rComm, sizeof(struct scclIbRecvComm)));
    stage->comm  = rComm;
    stage->state = scclIbCommStateAccept;
-    SCCLCHECK(host::scclSocketInit(&rComm->sock));
-    SCCLCHECK(host::scclSocketAccept(&rComm->sock, &lComm->sock));
+    SCCLCHECK(net_socket::scclSocketInit(&rComm->sock));
+    SCCLCHECK(net_socket::scclSocketAccept(&rComm->sock, &lComm->sock));

 ib_accept_check:
-    SCCLCHECK(host::scclSocketReady(&rComm->sock, &ready));
+    SCCLCHECK(net_socket::scclSocketReady(&rComm->sock, &ready));
    if(!ready)
        return scclSuccess;

@@ -1329,7 +1416,7 @@ ib_accept_check:
    SCCLCHECK(scclIbMalloc((void**)&stage->buffer, sizeof(remQpInfo)));

 ib_recv:
-    SCCLCHECK(host::scclSocketProgress(SCCL_SOCKET_RECV, &rComm->sock, stage->buffer, sizeof(remQpInfo), &stage->offset));
+    SCCLCHECK(net_socket::scclSocketProgress(SCCL_SOCKET_RECV, &rComm->sock, stage->buffer, sizeof(remQpInfo), &stage->offset));
    if(stage->offset != sizeof(remQpInfo))
        return scclSuccess;

@@ -1416,7 +1503,7 @@ ib_recv:
    memcpy(stage->buffer, &qpInfo, sizeof(struct scclIbQpInfo));

 ib_send:
-    SCCLCHECK(host::scclSocketProgress(SCCL_SOCKET_SEND, &rComm->sock, stage->buffer, sizeof(struct scclIbQpInfo), &stage->offset));
+    SCCLCHECK(net_socket::scclSocketProgress(SCCL_SOCKET_SEND, &rComm->sock, stage->buffer, sizeof(struct scclIbQpInfo), &stage->offset));
    if(stage->offset < sizeof(struct scclIbQpInfo))
        return scclSuccess;

@@ -1424,7 +1511,7 @@ ib_send:
    stage->state  = scclIbCommStatePendingReady;

 ib_recv_ready:
-    SCCLCHECK(host::scclSocketProgress(SCCL_SOCKET_RECV, &rComm->sock, &rComm->ready, sizeof(int), &stage->offset));
+    SCCLCHECK(net_socket::scclSocketProgress(SCCL_SOCKET_RECV, &rComm->sock, &rComm->ready, sizeof(int), &stage->offset));
    if(stage->offset != sizeof(int))
        return scclSuccess;

@@ -1440,7 +1527,7 @@ ib_recv_ready:
 }

 /* DMA-BUF support */
-scclResult_t scclIbRegMrDmaBuf(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle) {
+scclResult_t scclNetIb::regMrDmaBuf(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle) {
    static_assert(offsetof(struct scclIbSendComm, verbs) == offsetof(struct scclIbRecvComm, verbs), "Send and recv comms must have verbs at the same offset");
    assert(size > 0);

@@ -1498,11 +1585,21 @@ returning:
    return res;
 }

-scclResult_t scclIbRegMr(void* comm, void* data, int size, int type, void** mhandle) {
-    return scclIbRegMrDmaBuf(comm, data, (size_t)size, type, 0ULL, -1, mhandle);
+scclResult_t scclNetIb::regMr(void* comm, void* data, int size, int type, void** mhandle) {
+    return regMrDmaBuf(comm, data, (size_t)size, type, 0ULL, -1, mhandle);
 }

-scclResult_t scclIbDeregMr(void* comm, void* mhandle) {
+/**
+ * @brief 注销IB内存区域(MR)
+ *
+ * 该函数用于注销指定的IB内存区域(MR)，并更新MR缓存。如果MR的引用计数减至0，
+ * 则从缓存中移除并调用ibv_dereg_mr释放资源。
+ *
+ * @param comm 通信上下文指针
+ * @param mhandle 要注销的内存区域句柄
+ * @return scclResult_t 返回操作结果(scclSuccess表示成功)
+ */
+scclResult_t scclNetIb::deregMr(void* comm, void* mhandle) {
    struct scclIbVerbs* verbs   = (struct scclIbVerbs*)comm;
    struct scclIbMrCache* cache = &scclIbDevs[verbs->dev].mrCache;
    scclResult_t res;
@@ -1529,10 +1626,10 @@ returning:
    return res;
 }

-scclResult_t scclIbIsend(void* sendComm, void* data, int size, int tag, void* mhandle, void** request) {
+scclResult_t scclNetIb::isend(void* sendComm, void* data, int size, int tag, void* mhandle, void** request) {
    struct scclIbSendComm* comm = (struct scclIbSendComm*)sendComm;
    if(comm->ready == 0) {
-        WARN("NET/IB: scclIbIsend() called when comm->ready == 0");
+        WARN("NET/IB: isend() called when comm->ready == 0");
        return scclInternalError;
    }
    if(comm->ready == 0) {
@@ -1567,26 +1664,26 @@ scclResult_t scclIbIsend(void* sendComm, void* data, int size, int tag, void* mh
        // Sanity checks to catch user collective call count/size mismatches
        if(size > slots[r].size) {
            char line[SOCKET_NAME_MAXLEN + 1];
-            union host::scclSocketAddress addr;
-            host::scclSocketGetAddr(&comm->sock, &addr);
+            union net_socket::scclSocketAddress addr;
+            net_socket::scclSocketGetAddr(&comm->sock, &addr);
            WARN("NET/IB : req %d/%d tag %x peer %s collective mismatch error, local size %d remote size %d",
                 r,
                 nreqs,
                 tag,
-                 host::scclSocketToString(&addr, line),
+                 net_socket::scclSocketToString(&addr, line),
                 size,
                 slots[r].size);
            return scclInvalidUsage;
        } // plus any potential programming errors
        else if(slots[r].size < 0 || slots[r].addr == 0 || slots[r].rkey == 0) {
            char line[SOCKET_NAME_MAXLEN + 1];
-            union host::scclSocketAddress addr;
-            host::scclSocketGetAddr(&comm->sock, &addr);
+            union net_socket::scclSocketAddress addr;
+            net_socket::scclSocketGetAddr(&comm->sock, &addr);
            WARN("NET/IB : req %d/%d tag %x peer %s posted incorrect receive info: size %d addr %lx rkey %x",
                 r,
                 nreqs,
                 tag,
-                 host::scclSocketToString(&addr, line),
+                 net_socket::scclSocketToString(&addr, line),
                 slots[r].size,
                 slots[r].addr,
                 slots[r].rkey);
@@ -1626,10 +1723,10 @@ scclResult_t scclIbIsend(void* sendComm, void* data, int size, int tag, void* mh
    return scclSuccess;
 }

-scclResult_t scclIbIrecv(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request) {
+scclResult_t scclNetIb::irecv(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request) {
    struct scclIbRecvComm* comm = (struct scclIbRecvComm*)recvComm;
    if(comm->ready == 0) {
-        WARN("NET/IB: scclIbIrecv() called when comm->ready == 0");
+        WARN("NET/IB: irecv() called when comm->ready == 0");
        return scclInternalError;
    }
    if(comm->ready == 0) {
@@ -1672,7 +1769,7 @@ scclResult_t scclIbIrecv(void* recvComm, int n, void** data, int* sizes, int* ta
    return scclSuccess;
 }

-scclResult_t scclIbIflush(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request) {
+scclResult_t scclNetIb::iflush(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request) {
    struct scclIbRecvComm* comm = (struct scclIbRecvComm*)recvComm;
    int last                    = -1;
    for(int i = 0; i < n; i++)
@@ -1706,7 +1803,7 @@ scclResult_t scclIbIflush(void* recvComm, int n, void** data, int* sizes, void**
    return scclSuccess;
 }

-scclResult_t scclIbTest(void* request, int* done, int* sizes) {
+scclResult_t scclNetIb::test(void* request, int* done, int* sizes) {
    struct scclIbRequest* r = (struct scclIbRequest*)request;
    *done                   = 0;

@@ -1732,8 +1829,8 @@ scclResult_t scclIbTest(void* request, int* done, int* sizes) {
            struct ibv_wc* wc = wcs + w;
            if(wc->status != IBV_WC_SUCCESS) {
                char line[SOCKET_NAME_MAXLEN + 1];
-                union host::scclSocketAddress addr;
-                host::scclSocketGetAddr(r->sock, &addr);
+                union net_socket::scclSocketAddress addr;
+                net_socket::scclSocketGetAddr(r->sock, &addr);
                char localGidString[INET6_ADDRSTRLEN]  = "";
                char remoteGidString[INET6_ADDRSTRLEN] = "";
                const char *localGidStr = NULL, *remoteGidStr = NULL;
@@ -1742,7 +1839,7 @@ scclResult_t scclIbTest(void* request, int* done, int* sizes) {
                    remoteGidStr = inet_ntop(AF_INET6, &r->gidInfo->remoteGid, remoteGidString, sizeof(remoteGidString));
                }
                WARN("NET/IB : Got completion from peer %s with error %d, opcode %d, len %d, vendor err %d (%s)%s%s%s%s",
-                     host::scclSocketToString(&addr, line),
+                     net_socket::scclSocketToString(&addr, line),
                     wc->status,
                     wc->opcode,
                     wc->byte_len,
@@ -1782,10 +1879,10 @@ scclResult_t scclIbTest(void* request, int* done, int* sizes) {
    }
 }

-scclResult_t scclIbCloseSend(void* sendComm) {
+scclResult_t scclNetIb::closeSend(void* sendComm) {
    struct scclIbSendComm* comm = (struct scclIbSendComm*)sendComm;
    if(comm) {
-        SCCLCHECK(host::scclSocketClose(&comm->sock));
+        SCCLCHECK(net_socket::scclSocketClose(&comm->sock));
        for(int q = 0; q < comm->nqps; q++)
            if(comm->qps[q] != NULL)
                SCCLCHECK(wrap_ibv_destroy_qp(comm->qps[q]));
@@ -1797,11 +1894,11 @@ scclResult_t scclIbCloseSend(void* sendComm) {
    return scclSuccess;
 }

-scclResult_t scclIbCloseRecv(void* recvComm) {
+scclResult_t scclNetIb::closeRecv(void* recvComm) {
    struct scclIbRecvComm* comm = (struct scclIbRecvComm*)recvComm;
    if(comm) {
        if(!scclParamIbSockServerPortReuse() || reusedSockfd != comm->sock.fd)
-            SCCLCHECK(host::scclSocketClose(&comm->sock));
+            SCCLCHECK(net_socket::scclSocketClose(&comm->sock));
        for(int q = 0; q < comm->nqps; q++)
            if(comm->qps[q] != NULL)
                SCCLCHECK(wrap_ibv_destroy_qp(comm->qps[q]));
@@ -1819,36 +1916,16 @@ scclResult_t scclIbCloseRecv(void* recvComm) {
    return scclSuccess;
 }

-scclResult_t scclIbCloseListen(void* listenComm) {
+scclResult_t scclNetIb::closeListen(void* listenComm) {
    struct scclIbListenComm* comm = (struct scclIbListenComm*)listenComm;
    if(comm) {
-        SCCLCHECK(host::scclSocketClose(&comm->sock));
+        SCCLCHECK(net_socket::scclSocketClose(&comm->sock));
        free(comm);
    }
    return scclSuccess;
 }

 } // namespace net_ib
-
-scclNet_t scclNetIb = {"IB",
-                       net_ib::scclIbInit,
-                       net_ib::scclIbGetDevicesNum,
-                       net_ib::scclIbGetProperties,
-                       net_ib::scclIbListen,
-                       net_ib::scclIbConnect,
-                       net_ib::scclIbAccept,
-                       net_ib::scclIbRegMr,
-                       net_ib::scclIbRegMrDmaBuf,
-                       net_ib::scclIbDeregMr,
-                       net_ib::scclIbIsend,
-                       net_ib::scclIbIrecv,
-                       net_ib::scclIbIflush,
-                       net_ib::scclIbTest,
-                       net_ib::scclIbCloseSend,
-                       net_ib::scclIbCloseRecv,
-                       net_ib::scclIbCloseListen};
-
-} // namespace device
 } // namespace net
 } // namespace hardware
 } // namespace sccl
--- a/src/hardware/net/net_ib/net_ib.h
+++ b/src/hardware/net/net_ib/net_ib.h
+#pragma once
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <poll.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "ibvwrap.h"
+#include "socket.h"
+#include "net_utils.h"
+
+namespace sccl {
+namespace hardware {
+namespace net {
+namespace net_ib {
+
+/*IB的通信状态*/
+enum scclIbCommState : uint8_t {
+    scclIbCommStateStart        = 0, // 初始状态
+    scclIbCommStateConnect      = 1, // 尝试连接状态
+    scclIbCommStateAccept       = 3, // 接受连接状态
+    scclIbCommStateSend         = 4, // 发送数据状态
+    scclIbCommStateRecv         = 5, // 接收数据状态
+    scclIbCommStateConnecting   = 6, // 正在连接状态
+    scclIbCommStateConnected    = 7, // 已连接状态
+    scclIbCommStatePendingReady = 8, // 等待准备状态
+};
+
+/*通信的阶段*/
+struct scclIbCommStage {
+    enum scclIbCommState state; // 通信阶段的状态
+    int offset;                 // 数据偏移量
+    void* buffer;               // 用于通信的缓冲区指针
+    void* comm;                 // 通信对象指针
+};
+
+/*监听通信的上下文*/
+struct scclIbListenComm {
+    int dev;                            // 设备标识符
+    struct net_socket::scclSocket sock; // 用于网络通信的套接字
+    struct scclIbCommStage stage;       // 通信阶段的状态
+};
+
+//////////////////////////////////
+class scclNetIb : public scclNetBase {
+public:
+    // 构造函数和析构函数
+    scclNetIb();
+    virtual ~scclNetIb();
+
+    // 初始化网络。
+    scclResult_t init() override;
+    // 返回适配器的数量。
+    scclResult_t devices(int* ndev) override;
+    // 获取各种设备属性。
+    scclResult_t getProperties(int dev, scclNetProperties_t* props) override;
+    // 创建一个接收对象并提供一个句柄以连接到它。该句柄最多可以是 SCCL_NET_HANDLE_MAXSIZE 字节，并将在排名之间交换以创建连接。
+    scclResult_t listen(int dev, void* handle, void** listenComm) override;
+    // 连接到一个句柄并返回一个发送 comm 对象给该对等体。
+    // 此调用不应阻塞以建立连接，而应成功返回 sendComm == NULL，并期望再次调用直到 sendComm != NULL。
+    scclResult_t connect(int dev, void* handle, void** sendComm) override;
+    // 在远程对等体调用 connect 后最终确定连接建立。
+    // 此调用不应阻塞以建立连接，而应成功返回 recvComm == NULL，并期望再次调用直到 recvComm != NULL。
+    scclResult_t accept(void* listenComm, void** recvComm) override;
+    // 注册/注销内存。Comm 可以是 sendComm 或 recvComm。
+    // 类型是 SCCL_PTR_HOST 或 SCCL_PTR_CUDA。
+    scclResult_t regMr(void* comm, void* data, int size, int type, void** mhandle) override;
+    /* DMA-BUF 支持 */
+    scclResult_t regMrDmaBuf(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle) override;
+    // 注销IB内存区域(MR)
+    scclResult_t deregMr(void* comm, void* mhandle) override;
+    // 异步发送到对等体。
+    // 如果调用不能执行（或会阻塞），则可能返回 request == NULL
+    scclResult_t isend(void* sendComm, void* data, int size, int tag, void* mhandle, void** request) override;
+    // 异步从对等体接收。 如果调用不能执行（或会阻塞），则可能返回 request == NULL
+    scclResult_t irecv(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request) override;
+    // 执行刷新/栅栏操作，以确保所有使用 SCCL_PTR_CUDA 接收到的数据对 GPU 可见
+    scclResult_t iflush(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request) override;
+    // 测试请求是否完成。如果 size 不为 NULL，则返回发送/接收的字节数。
+    scclResult_t test(void* request, int* done, int* sizes) override;
+    // 关闭并释放 send/recv comm 对象
+    scclResult_t closeSend(void* sendComm) override;
+    scclResult_t closeRecv(void* recvComm) override;
+    scclResult_t closeListen(void* listenComm) override;
+
+private:
+    struct scclIbListenComm* ibComm = nullptr;
+    // 定义一个静态变量 scclNIbDevs，用于存储 InfiniBand 设备的数量
+    int scclNIbDevs = -1;
+
+private:
+    // IB异步事件处理线程主函数
+    static void* scclIbAsyncThreadMain(void* args);
+    // 获取IB设备的PCI路径并处理多端口和虚拟功能合并
+    scclResult_t scclIbGetPciPath(char* devName, char** path, int* realPort);
+    // 根据输入的宽度值，返回对应的IB(InfiniBand)链路宽度索引
+    int scclIbWidth(int width);
+    // 根据给定的速度值查找并返回对应的IB传输速率
+    int scclIbSpeed(int speed);
+    // 检查当前IB设备是否支持宽松排序(Relaxed Ordering)模式
+    int scclIbRelaxedOrderingCapable(void);
+    // 获取并处理用户指定的IB设备环境变量
+    char* scclIbGetIbHca(int& shownIbHcaEnv, bool* searchNot, bool* searchExact);
+    // 从系统文件中读取字符串内容
+    scclResult_t scclGetStrFromSys(const char* path, const char* fileName, char* strValue);
+    // 检查IB设备是否支持GPU Direct RDMA (GDR)
+    scclResult_t scclIbGdrSupport(int ibDev);
+    // 检查设备是否支持DMA-BUF功能
+    scclResult_t scclIbDmaBufSupport(int dev);
+    // 初始化InfiniBand Verbs资源
+    scclResult_t scclIbInitVerbs(int dev, struct ibv_context* ctx, struct scclIbVerbs* verbs);
+    // 创建并初始化一个InfiniBand队列对(QP)
+    scclResult_t scclIbCreateQp(uint8_t ib_port, struct scclIbVerbs* verbs, int access_flags, struct ibv_qp** qp);
+    // 将IB QP状态修改为RTR（Ready to Receive）状态
+    scclResult_t scclIbRtrQp(struct ibv_qp* qp, uint32_t qpn, struct scclIbQpInfo* info);
+    // 将IB(InfiniBand)队列对(QP)状态修改为RTS(Ready To Send)状态
+    scclResult_t scclIbRtsQp(struct ibv_qp* qp);
+    // 销毁IB Verbs资源
+    scclResult_t scclIbDestroyVerbs(struct scclIbVerbs* verbs);
+    // 从verbs请求池中获取一个未使用的请求结构体
+    scclResult_t scclIbGetRequest(struct scclIbVerbs* verbs, struct scclIbRequest** req);
+    // 释放IB网络请求资源。
+    scclResult_t scclIbFreeRequest(struct scclIbRequest* r);
+    // 执行IB网络的多发送操作
+    scclResult_t scclIbMultiSend(struct scclIbSendComm* comm, int slot);
+    // 通过IB Verbs RDMA写入操作向远程FIFO队列提交数据
+    scclResult_t scclIbPostFifo(struct scclIbRecvComm* comm, int n, void** data, int* sizes, int* tags, void** mhandles, struct scclIbRequest* req);
+};
+
+} // namespace net_ib
+} // namespace net
+} // namespace hardware
+} // namespace sccl
--- a/src/hardware/net/host/CMakeLists.txt
+++ b/src/hardware/net/host/CMakeLists.txt
--- a/src/hardware/net/host/net_socket.cpp
+++ b/src/hardware/net/host/net_socket.cpp
@@ -9,8 +9,6 @@
 namespace sccl {
 namespace hardware {
 namespace net {
-namespace host {
-
 namespace net_socket {

 #define MAX_LINE_LEN (2047)
@@ -26,7 +24,28 @@ static struct scclNetSocketDev scclNetSocketDevs[MAX_IFS];

 pthread_mutex_t scclNetSocketLock = PTHREAD_MUTEX_INITIALIZER;

-static scclResult_t scclNetSocketGetPciPath(char* devName, char** pciPath) {
+SCCL_PARAM(SocketNsocksPerThread, "NSOCKS_PERTHREAD", -2);
+SCCL_PARAM(SocketNthreads, "SOCKET_NTHREADS", -2);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////// scclNetSocket调用的函数 ////////////////////////////////////////
+scclNetSocket::scclNetSocket() : scclNetBase("Socket") {}
+scclNetSocket::~scclNetSocket() {
+    if(socketComm != nullptr) {
+        free(socketComm);
+    }
+}
+
+/**
+ * 获取网络设备的PCI路径
+ *
+ * @param devName 网络设备名称
+ * @param pciPath 输出参数，用于存储PCI路径的指针
+ * @return 返回操作结果(scclSuccess表示成功)
+ *
+ * @note 如果设备不存在，pciPath可能返回NULL
+ */
+scclResult_t scclNetSocket::scclNetSocketGetPciPath(char* devName, char** pciPath) {
    char devicePath[PATH_MAX];
    snprintf(devicePath, PATH_MAX, "/sys/class/net/%s/device", devName);
    // May return NULL if the file doesn't exist.
@@ -34,7 +53,9 @@ static scclResult_t scclNetSocketGetPciPath(char* devName, char** pciPath) {
    return scclSuccess;
 }

-scclResult_t scclNetSocketInit(void) {
+scclResult_t scclNetSocket::init() {
+    SCCLCHECK(scclMalloc(&socketComm, 1));
+
    if(scclNetIfs == -1) {
        pthread_mutex_lock(&scclNetSocketLock);
        if(scclNetIfs == -1) {
@@ -69,12 +90,22 @@ scclResult_t scclNetSocketInit(void) {
    return scclSuccess;
 }

-scclResult_t scclNetSocketDevices(int* ndev) {
+scclResult_t scclNetSocket::devices(int* ndev) {
    *ndev = scclNetIfs;
    return scclSuccess;
 }

-static scclResult_t scclNetSocketGetSpeed(char* devName, int* speed) {
+/**
+ * @brief 获取指定网络设备的速度（单位：Mbps）
+ *
+ * 该函数通过读取/sys/class/net/<设备名>/speed文件来获取网络设备的速度。
+ * 如果读取失败或速度为0，则默认返回10Gbps（10000Mbps）。
+ *
+ * @param devName 网络设备名称
+ * @param speed 输出参数，用于存储获取到的速度值
+ * @return scclResult_t 始终返回scclSuccess表示成功
+ */
+scclResult_t scclNetSocket::scclNetSocketGetSpeed(char* devName, int* speed) {
    *speed = 0;
    char speedPath[PATH_MAX];
    sprintf(speedPath, "/sys/class/net/%s/speed", devName);
@@ -93,7 +124,17 @@ static scclResult_t scclNetSocketGetSpeed(char* devName, int* speed) {
    return scclSuccess;
 }

-scclResult_t scclNetSocketGetProperties(int dev, scclNetProperties_t* props) {
+/**
+ * @brief 获取网络套接字设备的属性
+ *
+ * @param dev 设备索引
+ * @param props 用于存储设备属性的结构体指针
+ * @return scclResult_t 返回操作结果，scclSuccess表示成功
+ *
+ * 该函数用于填充指定网络设备的属性信息，包括设备名称、PCI路径、速度等。
+ * 注意：延迟(latency)和端口(port)属性当前未设置。
+ */
+scclResult_t scclNetSocket::getProperties(int dev, scclNetProperties_t* props) {
    props->name       = scclNetSocketDevs[dev].devName;
    props->pciPath    = scclNetSocketDevs[dev].pciPath;
    props->guid       = dev;
@@ -106,97 +147,19 @@ scclResult_t scclNetSocketGetProperties(int dev, scclNetProperties_t* props) {
    return scclSuccess;
 }

-/* Communication functions */
-
-#define MAX_SOCKETS 64
-#define MAX_THREADS 16
-#define MAX_REQUESTS SCCL_NET_MAX_REQUESTS
-#define MIN_CHUNKSIZE (64 * 1024)
-
-SCCL_PARAM(SocketNsocksPerThread, "NSOCKS_PERTHREAD", -2);
-SCCL_PARAM(SocketNthreads, "SOCKET_NTHREADS", -2);
-
-enum scclNetSocketCommState : uint8_t {
-    scclNetSocketCommStateStart   = 0,
-    scclNetSocketCommStateConnect = 1,
-    scclNetSocketCommStateAccept  = 3,
-    scclNetSocketCommStateSend    = 4,
-    scclNetSocketCommStateRecv    = 5,
-};
-
-struct scclNetSocketCommStage {
-    enum scclNetSocketCommState state;
-    uint8_t iteration;
-    struct scclSocket* sock;
-    struct scclNetSocketComm* comm;
-};
-
-struct scclNetSocketHandle {
-    union scclSocketAddress connectAddr;
-    uint64_t magic; // random number to help debugging
-    int nSocks;
-    int nThreads;
-    struct scclNetSocketCommStage stage;
-};
-
-struct scclNetSocketTask {
-    int op;
-    void* data;
-    int size;
-    struct scclSocket* sock;
-    int offset;
-    int used;
-    scclResult_t result;
-};
-
-struct scclNetSocketRequest {
-    int op;
-    void* data;
-    int size;
-    struct scclSocket* ctrlSock;
-    int offset;
-    int used;
-    struct scclNetSocketComm* comm;
-    struct scclNetSocketTask* tasks[MAX_SOCKETS];
-    int nSubs;
-};
-
-struct scclNetSocketTaskQueue {
-    int next;
-    int len;
-    struct scclNetSocketTask* tasks;
-};
-
-struct scclNetSocketThreadResources {
-    struct scclNetSocketTaskQueue threadTaskQueue;
-    int stop;
-    struct scclNetSocketComm* comm;
-    pthread_mutex_t threadLock;
-    pthread_cond_t threadCond;
-};
-
-struct scclNetSocketListenComm {
-    struct scclSocket sock;
-    struct scclNetSocketCommStage stage;
-    int nSocks;
-    int nThreads;
-    int dev;
-};
-
-struct scclNetSocketComm {
-    struct scclSocket ctrlSock;
-    struct scclSocket socks[MAX_SOCKETS];
-    int dev;
-    int cudaDev;
-    int nSocks;
-    int nThreads;
-    int nextSock;
-    struct scclNetSocketRequest requests[MAX_REQUESTS];
-    pthread_t helperThread[MAX_THREADS];
-    struct scclNetSocketThreadResources threadResources[MAX_THREADS];
-};
-
-void* persistentSocketThread(void* args_) {
+/**
+ * @brief 持久化socket线程处理函数
+ *
+ * 该线程持续处理socket任务队列中的任务，每个线程负责处理nSocksPerThread个socket。
+ * 当任务队列为空时，线程会等待条件变量通知；当收到停止信号时，线程退出。
+ *
+ * @param args_ 线程参数，包含通信结构、任务队列和同步原语
+ * @return void* 总是返回NULL
+ *
+ * @note 线程会循环处理任务直到收到停止信号
+ * @warning 如果socket处理出错，线程会直接退出并打印警告信息
+ */
+void* scclNetSocket::persistentSocketThread(void* args_) {
    struct scclNetSocketThreadResources* resource = (struct scclNetSocketThreadResources*)args_;
    struct scclNetSocketComm* comm                = resource->comm;
    struct scclNetSocketTaskQueue* myQueue        = &resource->threadTaskQueue;
@@ -235,7 +198,18 @@ void* persistentSocketThread(void* args_) {
    }
 }

-scclResult_t scclNetSocketGetNsockNthread(int dev, int* ns, int* nt) {
+/**
+ * @brief 获取指定设备的socket和线程数量配置
+ *
+ * 根据设备类型和参数配置，自动检测或设置每个线程的socket数量和线程数量。
+ * 支持AWS和GCP设备的自动检测，并确保配置不超过最大限制。
+ *
+ * @param dev 设备索引
+ * @param ns 输出参数，返回总socket数量
+ * @param nt 输出参数，返回线程数量
+ * @return scclResult_t 返回操作结果，scclSuccess表示成功
+ */
+scclResult_t scclNetSocket::scclNetSocketGetNsockNthread(int dev, int* ns, int* nt) {
    int nSocksPerThread = scclParamSocketNsocksPerThread();
    int nThreads        = scclParamSocketNthreads();
    if(nThreads > MAX_THREADS) {
@@ -287,28 +261,28 @@ scclResult_t scclNetSocketGetNsockNthread(int dev, int* ns, int* nt) {
    return scclSuccess;
 }

-scclResult_t scclNetSocketListen(int dev, void* opaqueHandle, void** listenComm) {
+scclResult_t scclNetSocket::listen(int dev, void* opaqueHandle, void** listenComm) {
    if(dev < 0 || dev >= scclNetIfs) { // data transfer socket is based on specified dev
        return scclInternalError;
    }
    struct scclNetSocketHandle* handle = (struct scclNetSocketHandle*)opaqueHandle;
    memset(handle, 0, sizeof(struct scclNetSocketHandle));
    static_assert(sizeof(struct scclNetSocketHandle) <= SCCL_NET_HANDLE_MAXSIZE, "scclNetSocketHandle size too large");
-    struct scclNetSocketListenComm* comm;
-    SCCLCHECK(scclCalloc(&comm, 1));
+    memset(socketComm, 0, sizeof(struct scclNetSocketListenComm));
+
    handle->magic = SCCL_SOCKET_MAGIC;
-    SCCLCHECK(scclSocketInit(&comm->sock, &scclNetSocketDevs[dev].addr, handle->magic, scclSocketTypeNetSocket, NULL, 1));
-    SCCLCHECK(scclSocketListen(&comm->sock));
-    SCCLCHECK(scclSocketGetAddr(&comm->sock, &handle->connectAddr));
-    SCCLCHECK(scclNetSocketGetNsockNthread(dev, &comm->nSocks, &comm->nThreads));
-    handle->nSocks   = comm->nSocks;
-    handle->nThreads = comm->nThreads;
-    comm->dev        = dev;
-    *listenComm      = comm;
+    SCCLCHECK(scclSocketInit(&socketComm->sock, &scclNetSocketDevs[dev].addr, handle->magic, scclSocketTypeNetSocket, NULL, 1));
+    SCCLCHECK(scclSocketListen(&socketComm->sock));
+    SCCLCHECK(scclSocketGetAddr(&socketComm->sock, &handle->connectAddr));
+    SCCLCHECK(scclNetSocketGetNsockNthread(dev, &socketComm->nSocks, &socketComm->nThreads));
+    handle->nSocks   = socketComm->nSocks;
+    handle->nThreads = socketComm->nThreads;
+    socketComm->dev  = dev;
+    *listenComm      = socketComm;
    return scclSuccess;
 }

-scclResult_t scclNetSocketConnect(int dev, void* opaqueHandle, void** sendComm) {
+scclResult_t scclNetSocket::connect(int dev, void* opaqueHandle, void** sendComm) {
    if(dev < 0 || dev >= scclNetIfs) { // data transfer socket is based on specified dev
        return scclInternalError;
    }
@@ -331,7 +305,7 @@ scclResult_t scclNetSocketConnect(int dev, void* opaqueHandle, void** sendComm)
    comm->nSocks   = handle->nSocks;
    comm->nThreads = handle->nThreads;
    comm->dev      = dev;
-    HIPCHECK(hipGetDevice(&comm->cudaDev));
+    HIPCHECK(hipGetDevice(&comm->hipDev));
    for(; i < comm->nSocks + 1; i++) {
        sock = (i == comm->nSocks) ? &comm->ctrlSock : comm->socks + i;
        SCCLCHECK(scclSocketInit(sock, &handle->connectAddr, handle->magic, scclSocketTypeNetSocket, NULL, 1));
@@ -357,7 +331,7 @@ scclResult_t scclNetSocketConnect(int dev, void* opaqueHandle, void** sendComm)
    return scclSuccess;
 }

-scclResult_t scclNetSocketAccept(void* listenComm, void** recvComm) {
+scclResult_t scclNetSocket::accept(void* listenComm, void** recvComm) {
    struct scclNetSocketListenComm* lComm = (struct scclNetSocketListenComm*)listenComm;
    struct scclNetSocketCommStage* stage  = &lComm->stage;
    struct scclNetSocketComm* rComm       = stage->comm;
@@ -376,7 +350,7 @@ scclResult_t scclNetSocketAccept(void* listenComm, void** recvComm) {
    rComm->nSocks   = lComm->nSocks;
    rComm->nThreads = lComm->nThreads;
    rComm->dev      = lComm->dev;
-    HIPCHECK(hipGetDevice(&rComm->cudaDev));
+    HIPCHECK(hipGetDevice(&rComm->hipDev));
    for(; i < rComm->nSocks + 1; i++) {
        uint8_t sendSockIdx;

@@ -434,7 +408,51 @@ scclResult_t scclNetSocketGetRequest(struct scclNetSocketComm* comm, int op, voi
    return scclInternalError;
 }

-scclResult_t scclNetSocketGetTask(struct scclNetSocketComm* comm, int op, void* data, int size, struct scclNetSocketTask** req) {
+scclResult_t scclNetSocket::regMr(void* comm, void* data, int size, int type, void** mhandle) {
+    return (type != SCCL_PTR_HOST) ? scclInternalError : scclSuccess;
+}
+
+scclResult_t scclNetSocket::regMrDmaBuf(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle) {
+    WARN("NET/Socket : unable to check DMA-BUF support");
+    return scclSuccess;
+}
+
+scclResult_t scclNetSocket::deregMr(void* comm, void* mhandle) { return scclSuccess; }
+
+scclResult_t scclNetSocket::isend(void* sendComm, void* data, int size, int tag, void* mhandle, void** request) {
+    struct scclNetSocketComm* comm = (struct scclNetSocketComm*)sendComm;
+    SCCLCHECK(scclNetSocketGetRequest(comm, SCCL_SOCKET_SEND, data, size, (struct scclNetSocketRequest**)request));
+    return scclSuccess;
+}
+
+scclResult_t scclNetSocket::irecv(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request) {
+    struct scclNetSocketComm* comm = (struct scclNetSocketComm*)recvComm;
+    if(n != 1)
+        return scclInternalError;
+    SCCLCHECK(scclNetSocketGetRequest(comm, SCCL_SOCKET_RECV, data[0], sizes[0], (struct scclNetSocketRequest**)request));
+    return scclSuccess;
+}
+
+scclResult_t scclNetSocket::iflush(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request) {
+    // We don't support HIP pointers, so we don't need a flush operation
+    return scclInternalError;
+}
+
+/**
+ * 为指定通信对象创建并获取一个网络套接字任务
+ *
+ * @param comm 网络套接字通信对象指针
+ * @param op 操作类型(SCCL_SOCKET_SEND/SCCL_SOCKET_RECV)
+ * @param data 任务数据缓冲区指针
+ * @param size 数据大小
+ * @param req [out] 返回创建的任务指针
+ *
+ * @return 成功返回scclSuccess，失败返回scclInternalError
+ *
+ * @note 该函数会初始化线程资源(首次调用时)，创建持久化线程处理任务队列
+ * @warning 当任务队列已满时会返回错误并打印警告
+ */
+scclResult_t scclNetSocket::scclNetSocketGetTask(struct scclNetSocketComm* comm, int op, void* data, int size, struct scclNetSocketTask** req) {
    int tid                                  = comm->nextSock % comm->nThreads;
    struct scclNetSocketThreadResources* res = comm->threadResources + tid;
    struct scclNetSocketTaskQueue* queue     = &res->threadTaskQueue;
@@ -450,7 +468,7 @@ scclResult_t scclNetSocketGetTask(struct scclNetSocketComm* comm, int op, void*
        pthread_mutex_init(&res->threadLock, NULL);
        pthread_cond_init(&res->threadCond, NULL);
        pthread_create(comm->helperThread + tid, NULL, persistentSocketThread, res);
-        scclSetThreadName(comm->helperThread[tid], "SCCL Sock%c%1u%2u%2u", op == SCCL_SOCKET_SEND ? 'S' : 'R', comm->dev, tid, comm->cudaDev);
+        scclSetThreadName(comm->helperThread[tid], "SCCL Sock%c%1u%2u%2u", op == SCCL_SOCKET_SEND ? 'S' : 'R', comm->dev, tid, comm->hipDev);
    }
    struct scclNetSocketTask* r = queue->tasks + queue->next;
    if(r->used == 0) {
@@ -473,7 +491,7 @@ scclResult_t scclNetSocketGetTask(struct scclNetSocketComm* comm, int op, void*
    return scclInternalError;
 }

-scclResult_t scclNetSocketTest(void* request, int* done, int* size) {
+scclResult_t scclNetSocket::test(void* request, int* done, int* size) {
    *done                          = 0;
    struct scclNetSocketRequest* r = (struct scclNetSocketRequest*)request;
    if(r == NULL) {
@@ -555,43 +573,7 @@ scclResult_t scclNetSocketTest(void* request, int* done, int* size) {
    return scclSuccess;
 }

-scclResult_t scclNetSocketRegMr(void* comm, void* data, int size, int type, void** mhandle) {
-    return (type != SCCL_PTR_HOST) ? scclInternalError : scclSuccess;
-}
-scclResult_t scclNetSocketDeregMr(void* comm, void* mhandle) { return scclSuccess; }
-
-scclResult_t scclNetSocketIsend(void* sendComm, void* data, int size, int tag, void* mhandle, void** request) {
-    struct scclNetSocketComm* comm = (struct scclNetSocketComm*)sendComm;
-    SCCLCHECK(scclNetSocketGetRequest(comm, SCCL_SOCKET_SEND, data, size, (struct scclNetSocketRequest**)request));
-    return scclSuccess;
-}
-
-scclResult_t scclNetSocketIrecv(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request) {
-    struct scclNetSocketComm* comm = (struct scclNetSocketComm*)recvComm;
-    if(n != 1)
-        return scclInternalError;
-    SCCLCHECK(scclNetSocketGetRequest(comm, SCCL_SOCKET_RECV, data[0], sizes[0], (struct scclNetSocketRequest**)request));
-    return scclSuccess;
-}
-
-scclResult_t scclNetSocketIflush(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request) {
-    // We don't support HIP pointers, so we don't need a flush operation
-    return scclInternalError;
-}
-
-scclResult_t scclNetSocketCloseListen(void* opaqueComm) {
-    struct scclNetSocketListenComm* comm = (struct scclNetSocketListenComm*)opaqueComm;
-    if(comm) {
-        int ready;
-        SCCLCHECK(scclSocketReady(&comm->sock, &ready));
-        if(ready)
-            SCCLCHECK(scclSocketClose(&comm->sock));
-        free(comm);
-    }
-    return scclSuccess;
-}
-
-scclResult_t scclNetSocketClose(void* opaqueComm) {
+scclResult_t scclNetSocket::closeSend(void* opaqueComm) {
    struct scclNetSocketComm* comm = (struct scclNetSocketComm*)opaqueComm;
    if(comm) {
        for(int i = 0; i < comm->nThreads; i++) {
@@ -619,27 +601,21 @@ scclResult_t scclNetSocketClose(void* opaqueComm) {
    return scclSuccess;
 }

-} // namespace net_socket
+scclResult_t scclNetSocket::closeRecv(void* opaqueComm) { return closeSend(opaqueComm); }

-scclNet_t scclNetSocket = {"Socket",
-                           net_socket::scclNetSocketInit,
-                           net_socket::scclNetSocketDevices,
-                           net_socket::scclNetSocketGetProperties,
-                           net_socket::scclNetSocketListen,
-                           net_socket::scclNetSocketConnect,
-                           net_socket::scclNetSocketAccept,
-                           net_socket::scclNetSocketRegMr,
-                           NULL, // No DMA-BUF support
-                           net_socket::scclNetSocketDeregMr,
-                           net_socket::scclNetSocketIsend,
-                           net_socket::scclNetSocketIrecv,
-                           net_socket::scclNetSocketIflush,
-                           net_socket::scclNetSocketTest,
-                           net_socket::scclNetSocketClose,
-                           net_socket::scclNetSocketClose,
-                           net_socket::scclNetSocketCloseListen};
-
-} // namespace host
+scclResult_t scclNetSocket::closeListen(void* opaqueComm) {
+    struct scclNetSocketListenComm* comm = (struct scclNetSocketListenComm*)opaqueComm;
+    if(comm) {
+        int ready;
+        SCCLCHECK(scclSocketReady(&comm->sock, &ready));
+        if(ready)
+            SCCLCHECK(scclSocketClose(&comm->sock));
+        free(comm);
+    }
+    return scclSuccess;
+}
+
+} // namespace net_socket
 } // namespace net
 } // namespace hardware
 } // namespace sccl
--- a/src/hardware/net/net_socket/net_socket.h
+++ b/src/hardware/net/net_socket/net_socket.h
+#pragma once
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <poll.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "base.h"
+#include "net_utils.h"
+#include "socket.h"
+
+namespace sccl {
+namespace hardware {
+namespace net {
+namespace net_socket {
+
+/* Communication functions */
+static constexpr int MAX_SOCKETS   = 64;
+static constexpr int MAX_THREADS   = 16;
+static constexpr int MAX_REQUESTS  = SCCL_NET_MAX_REQUESTS;
+static constexpr int MIN_CHUNKSIZE = (64 * 1024);
+
+enum scclNetSocketCommState : uint8_t {
+    scclNetSocketCommStateStart   = 0,
+    scclNetSocketCommStateConnect = 1,
+    scclNetSocketCommStateAccept  = 3,
+    scclNetSocketCommStateSend    = 4,
+    scclNetSocketCommStateRecv    = 5,
+};
+
+struct scclNetSocketCommStage {
+    enum scclNetSocketCommState state;
+    uint8_t iteration;
+    struct scclSocket* sock;
+    struct scclNetSocketComm* comm = nullptr;
+};
+
+struct scclNetSocketHandle {
+    union scclSocketAddress connectAddr;
+    uint64_t magic; // random number to help debugging
+    int nSocks;
+    int nThreads;
+    struct scclNetSocketCommStage stage;
+};
+
+struct scclNetSocketTask {
+    int op;
+    void* data;
+    int size;
+    struct scclSocket* sock = nullptr;
+    int offset;
+    int used;
+    scclResult_t result;
+};
+
+struct scclNetSocketRequest {
+    int op;
+    void* data;
+    int size;
+    struct scclSocket* ctrlSock = nullptr;
+    int offset;
+    int used;
+    struct scclNetSocketComm* comm               = nullptr;
+    struct scclNetSocketTask* tasks[MAX_SOCKETS] = {nullptr};
+    int nSubs;
+};
+
+struct scclNetSocketTaskQueue {
+    int next;
+    int len;
+    struct scclNetSocketTask* tasks = nullptr;
+};
+
+struct scclNetSocketThreadResources {
+    struct scclNetSocketTaskQueue threadTaskQueue;
+    int stop;
+    struct scclNetSocketComm* comm = nullptr;
+    pthread_mutex_t threadLock;
+    pthread_cond_t threadCond;
+};
+
+struct scclNetSocketListenComm {
+    struct scclSocket sock;
+    struct scclNetSocketCommStage stage;
+    int nSocks;
+    int nThreads;
+    int dev;
+};
+
+struct scclNetSocketComm {
+    struct scclSocket ctrlSock;
+    struct scclSocket socks[MAX_SOCKETS];
+    int dev;
+    int hipDev;
+    int nSocks;
+    int nThreads;
+    int nextSock;
+    struct scclNetSocketRequest requests[MAX_REQUESTS];
+    pthread_t helperThread[MAX_THREADS];
+    struct scclNetSocketThreadResources threadResources[MAX_THREADS];
+};
+
+//////////////////////////////////
+class scclNetSocket : public scclNetBase {
+public:
+    // 构造函数和析构函数
+    scclNetSocket();
+    virtual ~scclNetSocket();
+
+    // 初始化网络。
+    scclResult_t init() override;
+    // 返回适配器的数量。
+    scclResult_t devices(int* ndev) override;
+    // 获取各种设备属性。
+    scclResult_t getProperties(int dev, scclNetProperties_t* props) override;
+    // 创建一个接收对象并提供一个句柄以连接到它。该句柄最多可以是 SCCL_NET_HANDLE_MAXSIZE 字节，并将在排名之间交换以创建连接。
+    scclResult_t listen(int dev, void* handle, void** listenComm) override;
+    // 连接到一个句柄并返回一个发送 comm 对象给该对等体。
+    // 此调用不应阻塞以建立连接，而应成功返回 sendComm == NULL，并期望再次调用直到 sendComm != NULL。
+    scclResult_t connect(int dev, void* handle, void** sendComm) override;
+    // 在远程对等体调用 connect 后最终确定连接建立。
+    // 此调用不应阻塞以建立连接，而应成功返回 recvComm == NULL，并期望再次调用直到 recvComm != NULL。
+    scclResult_t accept(void* listenComm, void** recvComm) override;
+    // 注册/注销内存。Comm 可以是 sendComm 或 recvComm。
+    // 类型是 SCCL_PTR_HOST 或 SCCL_PTR_CUDA。
+    scclResult_t regMr(void* comm, void* data, int size, int type, void** mhandle) override;
+    /* DMA-BUF 支持 */
+    scclResult_t regMrDmaBuf(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle) override;
+    // 注销IB内存区域(MR)
+    scclResult_t deregMr(void* comm, void* mhandle) override;
+    // 异步发送到对等体。
+    // 如果调用不能执行（或会阻塞），则可能返回 request == NULL
+    scclResult_t isend(void* sendComm, void* data, int size, int tag, void* mhandle, void** request) override;
+    // 异步从对等体接收。 如果调用不能执行（或会阻塞），则可能返回 request == NULL
+    scclResult_t irecv(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request) override;
+    // 执行刷新/栅栏操作，以确保所有使用 SCCL_PTR_CUDA 接收到的数据对 GPU 可见
+    scclResult_t iflush(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request) override;
+    // 测试请求是否完成。如果 size 不为 NULL，则返回发送/接收的字节数。
+    scclResult_t test(void* request, int* done, int* sizes) override;
+    // 关闭并释放 send/recv comm 对象
+    scclResult_t closeSend(void* sendComm) override;
+    scclResult_t closeRecv(void* recvComm) override;
+    scclResult_t closeListen(void* listenComm) override;
+
+private:
+    struct scclNetSocketListenComm* socketComm = nullptr;
+
+private:
+    // 获取网络设备的PCI路径
+    static scclResult_t scclNetSocketGetPciPath(char* devName, char** pciPath);
+    // 获取指定网络设备的速度（单位：Mbps）
+    scclResult_t scclNetSocketGetSpeed(char* devName, int* speed);
+    // 持久化socket线程处理函数
+    static void* persistentSocketThread(void* args_);
+    // 为指定通信对象创建并获取一个网络套接字任务
+    scclResult_t scclNetSocketGetTask(struct scclNetSocketComm* comm, int op, void* data, int size, struct scclNetSocketTask** req);
+    // 获取指定设备的socket和线程数量配置
+    scclResult_t scclNetSocketGetNsockNthread(int dev, int* ns, int* nt);
+};
+
+} // namespace net_socket
+} // namespace net
+} // namespace hardware
+} // namespace sccl
--- a/src/hardware/net/host/socket.cpp
+++ b/src/hardware/net/host/socket.cpp
@@ -15,7 +15,7 @@
 namespace sccl {
 namespace hardware {
 namespace net {
-namespace host {
+namespace net_socket {

 namespace socket_base {
 /**
@@ -383,7 +383,7 @@ static scclResult_t socketFinalizeConnect(struct scclSocket* sock) {
    return scclSuccess;
 }

-static scclResult_t socketProgressState(struct host::scclSocket* sock) {
+static scclResult_t socketProgressState(struct scclSocket* sock) {
    if(sock->state == scclSocketStateAccepting) {
        SCCLCHECK(socketTryAccept(sock));
    }
@@ -588,8 +588,13 @@ int scclFindInterfaceMatchSubnet(char* ifNames, union scclSocketAddress* localAd
 * @brief 查找可用的socket网络接口
 *
 * 该函数用于查找系统中可用的网络接口，支持通过环境变量指定接口或自动探测。
- * 查找顺序：1) 用户指定的接口(SCCL_SOCKET_IFNAME) 2) IB接口 3) 与SCCL_COMM_ID同子网的接口
- * 4) 排除docker和lo的其他接口 5) docker接口 6) lo接口
+ * 查找顺序：
+ * 1) 用户指定的接口(SCCL_SOCKET_IFNAME)
+ * 2) IB接口
+ * 3) 与SCCL_COMM_ID同子网的接口
+ * 4) 排除docker和lo的其他接口
+ * 5) docker接口
+ * 6) lo接口
 *
 * @param ifNames 输出参数，存储找到的接口名称
 * @param ifAddrs 输出参数，存储找到的接口地址
@@ -630,9 +635,9 @@ int scclFindSocketInterfaces(char* ifNames, union scclSocketAddress* ifAddrs, in
            WARN("No socket network interface found. ");
        }

-        // // Then look for anything else (but not docker or lo)
-        // if(nIfs == 0)
-        //     nIfs = socket_base::findSocketInterfaces("^docker,lo", ifNames, ifAddrs, sock_family, ifNameMaxSize, maxIfs);
+        // Then look for anything else (but not docker or lo)
+        if(nIfs == 0)
+            nIfs = socket_base::findSocketInterfaces("^docker,lo", ifNames, ifAddrs, sock_family, ifNameMaxSize, maxIfs);
        // // Finally look for docker, then lo.
        // if(nIfs == 0)
        //     nIfs = socket_base::findSocketInterfaces("docker", ifNames, ifAddrs, sock_family, ifNameMaxSize, maxIfs);
@@ -868,6 +873,16 @@ scclResult_t scclSocketListen(struct scclSocket* sock) {
    return scclSuccess;
 }

+/**
+ * 获取socket地址信息
+ *
+ * @param sock 要获取地址的socket指针，不能为NULL
+ * @param addr 用于存储获取到的地址信息的缓冲区
+ * @return scclResult_t 返回操作结果：
+ *         - scclInvalidArgument: 参数无效(sock为NULL)
+ *         - scclInternalError: socket未就绪
+ *         - scclSuccess: 操作成功
+ */
 scclResult_t scclSocketGetAddr(struct scclSocket* sock, union scclSocketAddress* addr) {
    if(sock == NULL) {
        WARN("scclSocketGetAddr: pass NULL socket");
@@ -1101,7 +1116,7 @@ scclResult_t scclSocketSetFd(int fd, struct scclSocket* sock) {
    return scclSuccess;
 }

-} // namespace host
+} // namespace net_socket
 } // namespace net
 } // namespace hardware
 } // namespace sccl
--- a/src/hardware/net/host/socket.h
+++ b/src/hardware/net/host/socket.h
@@ -11,7 +11,7 @@
 namespace sccl {
 namespace hardware {
 namespace net {
-namespace host {
+namespace net_socket {

 #define MAX_IFS 16                                   // 最大接口数量
 #define MAX_IF_NAME_SIZE 16                          // 每个接口名称的最大长度
@@ -114,7 +114,7 @@ scclResult_t scclSocketGetFd(struct scclSocket* sock, int* fd);
 // 设置socket文件描述符
 scclResult_t scclSocketSetFd(int fd, struct scclSocket* sock);

-} // namespace host
+} // namespace net_socket
 } // namespace net
 } // namespace hardware
 } // namespace sccl
--- a/src/hardware/net/net_utils.cpp
+++ b/src/hardware/net/net_utils.cpp
@@ -97,6 +97,22 @@ bool matchIfList(const char* string, int port, struct netIf* ifList, int listSiz
    return false;
 }

+scclResult_t printNetProps(const scclNetProperties_t* props, int rank, int localRank) {
+    printf("rank=%d, localRank=%d, device name=%s, pciPath=%s, guid=%lu, ptrSupport=%d, speed=%d, port=%d, latency=%f, maxComms=%d, maxRecvs=%d\n",
+           rank,
+           localRank,
+           props->name,
+           props->pciPath,
+           props->guid,
+           props->ptrSupport,
+           props->speed,
+           props->port,
+           props->latency,
+           props->maxComms,
+           props->maxRecvs);
+    return scclSuccess;
+}
+
 } // namespace net
 } // namespace hardware
 } // namespace sccl
--- a/src/hardware/net/net_utils.h
+++ b/src/hardware/net/net_utils.h
@@ -7,18 +7,11 @@ namespace sccl {
 namespace hardware {
 namespace net {

-struct netIf {       // 网络接口结构体
-    char prefix[64]; // 网络前缀
-    int port;        // 端口号
-};
-
-// 解析字符串列表，将结果存储在网络接口列表中
-int parseStringList(const char* string, struct netIf* ifList, int maxList);
-
-// 根据给定的字符串和端口，匹配网络接口列表中的接口
-bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize, bool matchExact);
-
-scclResult_t rocmLibraryInit(void);
+typedef enum {
+    SCCL_PTR_HOST   = 0x1,
+    SCCL_PTR_CUDA   = 0x2,
+    SCCL_PTR_DMABUF = 0x4
+} sccl_ptr_t;

 ////////////////////////////////// 用于定义网络设备 //////////////////////////////////
 typedef struct {
@@ -33,53 +26,87 @@ typedef struct {
    int maxRecvs;   // 最大分组接收数量。
 } scclNetProperties_t;

-typedef struct {
-    // 网络的名称（主要用于日志）
-    const char* name;
+/**
+ * @brief scclNetBase 类定义了网络通信的基础接口
+ *
+ * 该类是一个抽象基类，提供了网络初始化、设备管理、连接建立、内存注册、
+ * 数据传输等核心功能的纯虚函数接口。具体实现应由派生类完成。
+ *
+ * 主要功能包括：
+ * - 网络初始化和设备属性查询
+ * - 监听/连接建立和管理
+ * - 内存注册和注销
+ * - 异步发送/接收操作
+ * - 请求状态测试
+ * - 连接关闭
+ *
+ * 接口设计为非阻塞式，支持异步操作。
+ */
+typedef class scclNetBase {
+public:
+    // 构造函数和析构函数
+    scclNetBase(const char* net_name) : name(net_name) {};
+    virtual ~scclNetBase() {};
+
    // 初始化网络。
-    scclResult_t (*init)();
+    virtual scclResult_t init() = 0;
    // 返回适配器的数量。
-    scclResult_t (*devices)(int* ndev);
+    virtual scclResult_t devices(int* ndev) = 0;
    // 获取各种设备属性。
-    scclResult_t (*getProperties)(int dev, scclNetProperties_t* props);
+    virtual scclResult_t getProperties(int dev, scclNetProperties_t* props) = 0;
    // 创建一个接收对象并提供一个句柄以连接到它。该句柄最多可以是 SCCL_NET_HANDLE_MAXSIZE 字节，并将在排名之间交换以创建连接。
-    scclResult_t (*listen)(int dev, void* handle, void** listenComm);
+    virtual scclResult_t listen(int dev, void* handle, void** listenComm) = 0;
    // 连接到一个句柄并返回一个发送 comm 对象给该对等体。
    // 此调用不应阻塞以建立连接，而应成功返回 sendComm == NULL，并期望再次调用直到 sendComm != NULL。
-    scclResult_t (*connect)(int dev, void* handle, void** sendComm);
+    virtual scclResult_t connect(int dev, void* handle, void** sendComm) = 0;
    // 在远程对等体调用 connect 后最终确定连接建立。
    // 此调用不应阻塞以建立连接，而应成功返回 recvComm == NULL，并期望再次调用直到 recvComm != NULL。
-    scclResult_t (*accept)(void* listenComm, void** recvComm);
+    virtual scclResult_t accept(void* listenComm, void** recvComm) = 0;
    // 注册/注销内存。Comm 可以是 sendComm 或 recvComm。
    // 类型是 SCCL_PTR_HOST 或 SCCL_PTR_CUDA。
-    scclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
+    virtual scclResult_t regMr(void* comm, void* data, int size, int type, void** mhandle) = 0;
    /* DMA-BUF 支持 */
-    scclResult_t (*regMrDmaBuf)(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
-    scclResult_t (*deregMr)(void* comm, void* mhandle);
+    virtual scclResult_t regMrDmaBuf(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle) = 0;
+    // 注销IB内存区域(MR)
+    virtual scclResult_t deregMr(void* comm, void* mhandle) = 0;
    // 异步发送到对等体。
    // 如果调用不能执行（或会阻塞），则可能返回 request == NULL
-    scclResult_t (*isend)(void* sendComm, void* data, int size, int tag, void* mhandle, void** request);
+    virtual scclResult_t isend(void* sendComm, void* data, int size, int tag, void* mhandle, void** request) = 0;
    // 异步从对等体接收。 如果调用不能执行（或会阻塞），则可能返回 request == NULL
-    scclResult_t (*irecv)(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request);
+    virtual scclResult_t irecv(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request) = 0;
    // 执行刷新/栅栏操作，以确保所有使用 SCCL_PTR_CUDA 接收到的数据对 GPU 可见
-    scclResult_t (*iflush)(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request);
+    virtual scclResult_t iflush(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request) = 0;
    // 测试请求是否完成。如果 size 不为 NULL，则返回发送/接收的字节数。
-    scclResult_t (*test)(void* request, int* done, int* sizes);
+    virtual scclResult_t test(void* request, int* done, int* sizes) = 0;
    // 关闭并释放 send/recv comm 对象
-    scclResult_t (*closeSend)(void* sendComm);
-    scclResult_t (*closeRecv)(void* recvComm);
-    scclResult_t (*closeListen)(void* listenComm);
-} scclNet_t;
+    virtual scclResult_t closeSend(void* sendComm)     = 0;
+    virtual scclResult_t closeRecv(void* recvComm)     = 0;
+    virtual scclResult_t closeListen(void* listenComm) = 0;

-////////////////////////////////// 其他定义 //////////////////////////////////
+public:
+    // 网络的名称（主要用于日志）
+    const char* name;

-typedef enum sccl_ptr {
-    SCCL_PTR_HOST   = 0x1,
-    SCCL_PTR_CUDA   = 0x2,
-    SCCL_PTR_DMABUF = 0x4
-} sccl_ptr_t;
+} scclNet_t;
+
+////////////////////////////////// 功能函数 //////////////////////////////////
+// 初始化 ROCm 库
+scclResult_t rocmLibraryInit(void);

 #define SCCL_NET_HANDLE_MAXSIZE 128
+struct netIf {       // 网络接口结构体
+    char prefix[64]; // 网络前缀
+    int port;        // 端口号
+};
+
+// 解析字符串列表，将结果存储在网络接口列表中
+int parseStringList(const char* string, struct netIf* ifList, int maxList);
+
+// 根据给定的字符串和端口，匹配网络接口列表中的接口
+bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize, bool matchExact);
+
+// 打印网络属性信息
+scclResult_t printNetProps(const scclNetProperties_t* props, int rank, int localRank);

 } // namespace net
 } // namespace hardware

--- a/src/hardware/net/rocm_wrap.cpp
+++ b/src/hardware/net/rocm_wrap.cpp
@@ -170,6 +170,13 @@ error:

 } // namespace rocm_wrap

+/**
+ * 初始化 ROCm 库
+ *
+ * 该函数使用 pthread_once 确保 ROCm 库只被初始化一次。
+ *
+ * @return 返回初始化结果，scclResult_t 类型。
+ */
 scclResult_t rocmLibraryInit() {
    pthread_once(&rocm_wrap::initOnceControl, rocm_wrap::initOnceFunc);
    return rocm_wrap::initResult;

--- a/src/hardware/net/rocm_wrap.h
+++ b/src/hardware/net/rocm_wrap.h
@@ -23,6 +23,7 @@ DECLARE_ROCM_PFN_EXTERN(hsa_status_string);

 } // namespace rocm_wrap

+// 初始化 ROCm 库
 scclResult_t rocmLibraryInit(void);

 } // namespace net

--- a/src/hardware/topo_bak/cpuset.h
+++ b/src/hardware/topo_bak/cpuset.h
-/*************************************************************************
- * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
- *
- * See LICENSE.txt for license information
- ************************************************************************/
-
-#ifndef SCCL_CPUSET_H_
-#define SCCL_CPUSET_H_
-
-#include "base.h"
-
-namespace sccl {
-namespace hardware {
-namespace topology {
-namespace topo {
-// Convert local_cpus, e.g. 0003ff,f0003fff to cpu_set_t
-
-/**
- * 将十六进制字符转换为对应的整数值
- *
- * @param c 输入的十六进制字符（0-9, a-f）
- * @return 返回对应的整数值（0-15），如果输入无效则返回-1
- */
-static int hexToInt(char c) {
-    int v = c - '0';
-    if(v < 0)
-        return -1;
-    if(v > 9)
-        v = 10 + c - 'a';
-    if((v < 0) || (v > 15))
-        return -1;
-    return v;
-}
-
-#define CPU_SET_N_U32 (sizeof(cpu_set_t) / sizeof(uint32_t))
-
-/**
- * 将十六进制字符串转换为CPU集合掩码
- *
- * @param str 输入的十六进制字符串，用逗号分隔不同部分
- * @param mask 输出的CPU集合掩码
- * @return scclSuccess 表示转换成功
- *
- * @note 字符串从左到右对应掩码从高到低的32位字
- *       每个字符代表4位十六进制数
- *       遇到非十六进制字符会提前终止转换
- */
-static scclResult_t scclStrToCpuset(const char* str, cpu_set_t* mask) {
-    uint32_t cpumasks[CPU_SET_N_U32];
-    int m       = CPU_SET_N_U32 - 1;
-    cpumasks[m] = 0;
-    for(int o = 0; o < strlen(str); o++) {
-        char c = str[o];
-        if(c == ',') {
-            m--;
-            cpumasks[m] = 0;
-        } else {
-            int v = hexToInt(c);
-            if(v == -1)
-                break;
-            cpumasks[m] <<= 4;
-            cpumasks[m] += v;
-        }
-    }
-    // Copy cpumasks to mask
-    for(int a = 0; m < CPU_SET_N_U32; a++, m++) {
-        memcpy(((uint32_t*)mask) + a, cpumasks + m, sizeof(uint32_t));
-    }
-    return scclSuccess;
-}
-
-/**
- * 将CPU集合掩码转换为十六进制字符串表示
- *
- * @param mask 输入的CPU集合掩码
- * @param str 输出的字符串缓冲区，用于存储转换结果
- * @return 返回操作结果(scclSuccess表示成功)
- *
- * 转换规则：
- * 1. 将cpu_set_t按字节从高到低转换为十六进制字符串
- * 2. 每4个字节后添加一个逗号分隔符
- * 3. 忽略前导零
- */
-static scclResult_t scclCpusetToStr(cpu_set_t* mask, char* str) {
-    int c       = 0;
-    uint8_t* m8 = (uint8_t*)mask;
-    for(int o = sizeof(cpu_set_t) - 1; o >= 0; o--) {
-        if(c == 0 && m8[o] == 0)
-            continue;
-        sprintf(str + c, "%02x", m8[o]);
-        c += 2;
-        if(o && o % 4 == 0) {
-            sprintf(str + c, ",");
-            c++;
-        }
-    }
-    str[c] = '\0';
-    return scclSuccess;
-}
-
-/**
- * 将CPU集合掩码转换为范围字符串表示
- *
- * @param mask 输入的CPU集合掩码
- * @param str  用于存储结果的缓冲区
- * @param len  缓冲区长度
- * @return     返回转换后的字符串指针(即str参数)
- *
- * 该函数将CPU集合掩码转换为可读的范围字符串格式，例如"0-3,5,7-9"。
- * 如果缓冲区空间不足，结果会被截断。空集合会返回空字符串。
- */
-static char* scclCpusetToRangeStr(cpu_set_t* mask, char* str, size_t len) {
-    int c     = 0;
-    int start = -1;
-    // Iterate through all possible CPU bits plus one extra position
-    for(int cpu = 0; cpu <= CPU_SETSIZE; cpu++) {
-        int isSet = (cpu == CPU_SETSIZE) ? 0 : CPU_ISSET(cpu, mask);
-        // Start of a new range
-        if(isSet && start == -1) {
-            start = cpu;
-        }
-        // End of a range, add comma between ranges
-        if(!isSet && start != -1) {
-            if(cpu - 1 == start) {
-                c += snprintf(str + c, len - c, "%s%d", c ? "," : "", start);
-            } else {
-                c += snprintf(str + c, len - c, "%s%d-%d", c ? "," : "", start, cpu - 1);
-            }
-            if(c >= len - 1)
-                break;
-            start = -1;
-        }
-    }
-    if(c == 0)
-        str[0] = '\0';
-    return str;
-}
-
-} // namespace topo
-} // namespace topology
-} // namespace hardware
-} // namespace sccl
-#endif
--- a/src/hardware/topo_bak/detect_topo.cc
+++ b/src/hardware/topo_bak/detect_topo.cc
--- a/src/hardware/topo_bak/detect_topo.h
+++ b/src/hardware/topo_bak/detect_topo.h
--- a/src/hardware/topo_bak/nvmlwrap.cc
+++ b/src/hardware/topo_bak/nvmlwrap.cc
-/*************************************************************************
- * Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * See LICENSE.txt for license information
- ************************************************************************/
-
-#include "nvmlwrap.h"
-#include "base.h"
-
-#include <initializer_list>
-#include <memory>
-#include <mutex>
-
-namespace sccl {
-namespace hardware {
-namespace topology {
-
-int scclNvmlDeviceCount = 0;
-scclNvmlDeviceInfo scclNvmlDevices[scclNvmlMaxDevices];
-scclNvmlDevicePairInfo scclNvmlDevicePairs[scclNvmlMaxDevices][scclNvmlMaxDevices];
-
-#if SCCL_NVML_DIRECT
-#define SCCL_NVML_FN(name, rettype, arglist) constexpr rettype(*pfn_##name) arglist = name;
-#else
-#include <dlfcn.h>
-#define SCCL_NVML_FN(name, rettype, arglist) rettype(*pfn_##name) arglist = nullptr;
-#endif
-
-namespace {
-SCCL_NVML_FN(nvmlInit, nvmlReturn_t, ())
-SCCL_NVML_FN(nvmlInit_v2, nvmlReturn_t, ())
-SCCL_NVML_FN(nvmlShutdown, nvmlReturn_t, ())
-SCCL_NVML_FN(nvmlDeviceGetCount, nvmlReturn_t, (unsigned int*))
-SCCL_NVML_FN(nvmlDeviceGetCount_v2, nvmlReturn_t, (unsigned int*))
-SCCL_NVML_FN(nvmlDeviceGetHandleByPciBusId, nvmlReturn_t, (const char* pciBusId, nvmlDevice_t* device))
-SCCL_NVML_FN(nvmlDeviceGetHandleByIndex, nvmlReturn_t, (unsigned int index, nvmlDevice_t* device))
-SCCL_NVML_FN(nvmlDeviceGetIndex, nvmlReturn_t, (nvmlDevice_t device, unsigned* index))
-SCCL_NVML_FN(nvmlErrorString, char const*, (nvmlReturn_t r))
-SCCL_NVML_FN(nvmlDeviceGetNvLinkState, nvmlReturn_t, (nvmlDevice_t device, unsigned int link, nvmlEnableState_t* isActive))
-SCCL_NVML_FN(nvmlDeviceGetNvLinkRemotePciInfo, nvmlReturn_t, (nvmlDevice_t device, unsigned int link, nvmlPciInfo_t* pci))
-SCCL_NVML_FN(nvmlDeviceGetNvLinkCapability, nvmlReturn_t, (nvmlDevice_t device, unsigned int link, nvmlNvLinkCapability_t capability, unsigned int* capResult))
-SCCL_NVML_FN(nvmlDeviceGetCudaComputeCapability, nvmlReturn_t, (nvmlDevice_t device, int* major, int* minor))
-SCCL_NVML_FN(nvmlDeviceGetP2PStatus, nvmlReturn_t, (nvmlDevice_t device1, nvmlDevice_t device2, nvmlGpuP2PCapsIndex_t p2pIndex, nvmlGpuP2PStatus_t* p2pStatus))
-SCCL_NVML_FN(nvmlDeviceGetFieldValues, nvmlReturn_t, (nvmlDevice_t device, int valuesCount, nvmlFieldValue_t* values))
-
-std::mutex lock; // NVML has had some thread safety bugs
-bool initialized                    = false;
-thread_local bool threadInitialized = false;
-scclResult_t initResult;
-} // namespace
-
-scclResult_t scclNvmlEnsureInitialized() {
-    // Optimization to avoid repeatedly grabbing the lock when we only want to
-    // read from the global tables.
-    if(threadInitialized)
-        return initResult;
-    threadInitialized = true;
-
-    std::lock_guard<std::mutex> locked(lock);
-
-    if(initialized)
-        return initResult;
-    initialized = true;
-
-#if !SCCL_NVML_DIRECT
-    if(pfn_nvmlInit == nullptr) {
-        void* libhandle = dlopen("libnvidia-ml.so.1", RTLD_NOW);
-        if(libhandle == nullptr) {
-            WARN("Failed to open libnvidia-ml.so.1");
-            initResult = scclSystemError;
-            return initResult;
-        }
-
-        struct Symbol {
-            void** ppfn;
-            char const* name;
-        };
-        std::initializer_list<Symbol> symbols = {{(void**)&pfn_nvmlInit, "nvmlInit"},
-                                                 {(void**)&pfn_nvmlInit_v2, "nvmlInit_v2"},
-                                                 {(void**)&pfn_nvmlShutdown, "nvmlShutdown"},
-                                                 {(void**)&pfn_nvmlDeviceGetCount, "nvmlDeviceGetCount"},
-                                                 {(void**)&pfn_nvmlDeviceGetCount_v2, "nvmlDeviceGetCount_v2"},
-                                                 {(void**)&pfn_nvmlDeviceGetHandleByPciBusId, "nvmlDeviceGetHandleByPciBusId"},
-                                                 {(void**)&pfn_nvmlDeviceGetHandleByIndex, "nvmlDeviceGetHandleByIndex"},
-                                                 {(void**)&pfn_nvmlDeviceGetIndex, "nvmlDeviceGetIndex"},
-                                                 {(void**)&pfn_nvmlErrorString, "nvmlErrorString"},
-                                                 {(void**)&pfn_nvmlDeviceGetNvLinkState, "nvmlDeviceGetNvLinkState"},
-                                                 {(void**)&pfn_nvmlDeviceGetNvLinkRemotePciInfo, "nvmlDeviceGetNvLinkRemotePciInfo"},
-                                                 {(void**)&pfn_nvmlDeviceGetNvLinkCapability, "nvmlDeviceGetNvLinkCapability"},
-                                                 {(void**)&pfn_nvmlDeviceGetCudaComputeCapability, "nvmlDeviceGetCudaComputeCapability"},
-                                                 {(void**)&pfn_nvmlDeviceGetP2PStatus, "nvmlDeviceGetP2PStatus"},
-                                                 {(void**)&pfn_nvmlDeviceGetFieldValues, "nvmlDeviceGetFieldValues"}};
-        for(Symbol sym : symbols) {
-            *sym.ppfn = dlsym(libhandle, sym.name);
-        }
-    }
-#endif
-
-#if SCCL_NVML_DIRECT
-    bool have_v2 = true;
-#else
-    bool have_v2 = pfn_nvmlInit_v2 != nullptr; // if this compare is done in the SCCL_NVML_DIRECT=1 case then GCC warns about it never being null
-#endif
-    nvmlReturn_t res1 = (have_v2 ? pfn_nvmlInit_v2 : pfn_nvmlInit)();
-    if(res1 != NVML_SUCCESS) {
-        WARN("nvmlInit%s() failed: %s", have_v2 ? "_v2" : "", pfn_nvmlErrorString(res1));
-        initResult = scclSystemError;
-        return initResult;
-    }
-
-    unsigned int ndev;
-    res1 = (have_v2 ? pfn_nvmlDeviceGetCount_v2 : pfn_nvmlDeviceGetCount)(&ndev);
-    if(res1 != NVML_SUCCESS) {
-        WARN("nvmlDeviceGetCount%s() failed: %s", have_v2 ? "_v2" : "", pfn_nvmlErrorString(res1));
-        initResult = scclSystemError;
-        return initResult;
-    }
-
-    scclNvmlDeviceCount = int(ndev);
-    if(scclNvmlMaxDevices < scclNvmlDeviceCount) {
-        WARN("nvmlDeviceGetCount() reported more devices (%d) than the internal maximum (scclNvmlMaxDevices=%d)", scclNvmlDeviceCount, scclNvmlMaxDevices);
-        initResult = scclInternalError;
-        return initResult;
-    }
-
-    for(int a = 0; a < scclNvmlDeviceCount; a++) {
-        res1 = pfn_nvmlDeviceGetHandleByIndex(a, &scclNvmlDevices[a].handle);
-        if(res1 != NVML_SUCCESS) {
-            WARN("nvmlDeviceGetHandleByIndex(%d) failed: %s", int(a), pfn_nvmlErrorString(res1));
-            initResult = scclSystemError;
-            return initResult;
-        }
-
-        res1 = pfn_nvmlDeviceGetCudaComputeCapability(
-            scclNvmlDevices[a].handle, &scclNvmlDevices[a].computeCapabilityMajor, &scclNvmlDevices[a].computeCapabilityMinor);
-        if(res1 != NVML_SUCCESS) {
-            WARN("nvmlDeviceGetCudaComputeCapability(%d) failed: %s", int(a), pfn_nvmlErrorString(res1));
-            initResult = scclSystemError;
-            return initResult;
-        }
-    }
-
-    for(int a = 0; a < scclNvmlDeviceCount; a++) {
-        for(int b = 0; b < scclNvmlDeviceCount; b++) {
-            nvmlDevice_t da = scclNvmlDevices[a].handle;
-            nvmlDevice_t db = scclNvmlDevices[b].handle;
-
-            res1 = pfn_nvmlDeviceGetP2PStatus(da, db, NVML_P2P_CAPS_INDEX_READ, &scclNvmlDevicePairs[a][b].p2pStatusRead);
-            if(res1 != NVML_SUCCESS) {
-                WARN("nvmlDeviceGetP2PStatus(%d,%d,NVML_P2P_CAPS_INDEX_READ) failed: %s", a, b, pfn_nvmlErrorString(res1));
-                initResult = scclSystemError;
-                return initResult;
-            }
-
-            res1 = pfn_nvmlDeviceGetP2PStatus(da, db, NVML_P2P_CAPS_INDEX_WRITE, &scclNvmlDevicePairs[a][b].p2pStatusWrite);
-            if(res1 != NVML_SUCCESS) {
-                WARN("nvmlDeviceGetP2PStatus(%d,%d,NVML_P2P_CAPS_INDEX_READ) failed: %s", a, b, pfn_nvmlErrorString(res1));
-                initResult = scclSystemError;
-                return initResult;
-            }
-        }
-    }
-
-    initResult = scclSuccess;
-    return initResult;
-}
-
-#define NVMLCHECK(name, ...)                                             \
-    do {                                                                 \
-        nvmlReturn_t e44241808 = pfn_##name(__VA_ARGS__);                \
-        if(e44241808 != NVML_SUCCESS) {                                  \
-            WARN(#name "() failed: %s", pfn_nvmlErrorString(e44241808)); \
-            return scclSystemError;                                      \
-        }                                                                \
-    } while(0)
-
-#define NVMLTRY(name, ...)                                                                  \
-    do {                                                                                    \
-        if(!SCCL_NVML_DIRECT && pfn_##name == nullptr)                                      \
-            return scclInternalError; /* missing symbol is not a warned error */            \
-        nvmlReturn_t e44241808 = pfn_##name(__VA_ARGS__);                                   \
-        if(e44241808 != NVML_SUCCESS) {                                                     \
-            if(e44241808 != NVML_ERROR_NOT_SUPPORTED)                                       \
-                INFO(SCCL_LOG_TOPO, #name "() failed: %s", pfn_nvmlErrorString(e44241808)); \
-            return scclSystemError;                                                         \
-        }                                                                                   \
-    } while(0)
-
-scclResult_t scclNvmlDeviceGetHandleByPciBusId(const char* pciBusId, nvmlDevice_t* device) {
-    SCCLCHECK(scclNvmlEnsureInitialized());
-    std::lock_guard<std::mutex> locked(lock);
-    NVMLCHECK(nvmlDeviceGetHandleByPciBusId, pciBusId, device);
-    return scclSuccess;
-}
-
-scclResult_t scclNvmlDeviceGetHandleByIndex(unsigned int index, nvmlDevice_t* device) {
-    SCCLCHECK(scclNvmlEnsureInitialized());
-    *device = scclNvmlDevices[index].handle;
-    return scclSuccess;
-}
-
-scclResult_t scclNvmlDeviceGetIndex(nvmlDevice_t device, unsigned* index) {
-    SCCLCHECK(scclNvmlEnsureInitialized());
-    for(int d = 0; d < scclNvmlDeviceCount; d++) {
-        if(scclNvmlDevices[d].handle == device) {
-            *index = d;
-            return scclSuccess;
-        }
-    }
-    return scclInvalidArgument;
-}
-
-scclResult_t scclNvmlDeviceGetNvLinkState(nvmlDevice_t device, unsigned int link, nvmlEnableState_t* isActive) {
-    SCCLCHECK(scclNvmlEnsureInitialized());
-    std::lock_guard<std::mutex> locked(lock);
-    NVMLTRY(nvmlDeviceGetNvLinkState, device, link, isActive);
-    return scclSuccess;
-}
-
-scclResult_t scclNvmlDeviceGetNvLinkRemotePciInfo(nvmlDevice_t device, unsigned int link, nvmlPciInfo_t* pci) {
-    SCCLCHECK(scclNvmlEnsureInitialized());
-    std::lock_guard<std::mutex> locked(lock);
-    NVMLTRY(nvmlDeviceGetNvLinkRemotePciInfo, device, link, pci);
-    return scclSuccess;
-}
-
-scclResult_t scclNvmlDeviceGetNvLinkCapability(nvmlDevice_t device, unsigned int link, nvmlNvLinkCapability_t capability, unsigned int* capResult) {
-    SCCLCHECK(scclNvmlEnsureInitialized());
-    std::lock_guard<std::mutex> locked(lock);
-    NVMLTRY(nvmlDeviceGetNvLinkCapability, device, link, capability, capResult);
-    return scclSuccess;
-}
-
-scclResult_t scclNvmlDeviceGetCudaComputeCapability(nvmlDevice_t device, int* major, int* minor) {
-    SCCLCHECK(scclNvmlEnsureInitialized());
-
-    for(int d = 0; d < scclNvmlDeviceCount; d++) {
-        if(device == scclNvmlDevices[d].handle) {
-            *major = scclNvmlDevices[d].computeCapabilityMajor;
-            *minor = scclNvmlDevices[d].computeCapabilityMinor;
-            return scclSuccess;
-        }
-    }
-    return scclInvalidArgument;
-}
-
-scclResult_t scclNvmlDeviceGetP2PStatus(nvmlDevice_t device1, nvmlDevice_t device2, nvmlGpuP2PCapsIndex_t p2pIndex, nvmlGpuP2PStatus_t* p2pStatus) {
-    SCCLCHECK(scclNvmlEnsureInitialized());
-
-    if(p2pIndex == NVML_P2P_CAPS_INDEX_READ || p2pIndex == NVML_P2P_CAPS_INDEX_WRITE) {
-        int a = -1, b = -1;
-        for(int d = 0; d < scclNvmlDeviceCount; d++) {
-            if(device1 == scclNvmlDevices[d].handle)
-                a = d;
-            if(device2 == scclNvmlDevices[d].handle)
-                b = d;
-        }
-        if(a == -1 || b == -1)
-            return scclInvalidArgument;
-        if(p2pIndex == NVML_P2P_CAPS_INDEX_READ)
-            *p2pStatus = scclNvmlDevicePairs[a][b].p2pStatusRead;
-        else
-            *p2pStatus = scclNvmlDevicePairs[a][b].p2pStatusWrite;
-    } else {
-        std::lock_guard<std::mutex> locked(lock);
-        NVMLCHECK(nvmlDeviceGetP2PStatus, device1, device2, p2pIndex, p2pStatus);
-    }
-    return scclSuccess;
-}
-
-scclResult_t scclNvmlDeviceGetFieldValues(nvmlDevice_t device, int valuesCount, nvmlFieldValue_t* values) {
-    SCCLCHECK(scclNvmlEnsureInitialized());
-    std::lock_guard<std::mutex> locked(lock);
-    NVMLTRY(nvmlDeviceGetFieldValues, device, valuesCount, values);
-    return scclSuccess;
-}
-
-} // namespace topology
-} // namespace hardware
-} // namespace sccl