Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
lishen01
Sccl
Commits
58d57301
Commit
58d57301
authored
Aug 13, 2025
by
lishen
Browse files
将建图过程中原本在bootstrap中的一部分完全移动到graph中
parent
708aae12
Changes
12
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
634 additions
and
684 deletions
+634
-684
examples/2_topo/2_bootstrap/compile_mpi3_init_step2.sh
examples/2_topo/2_bootstrap/compile_mpi3_init_step2.sh
+2
-1
src/hardware/hardware.cpp
src/hardware/hardware.cpp
+8
-4
src/hardware/topology/bootstrap/bootstrap.cpp
src/hardware/topology/bootstrap/bootstrap.cpp
+6
-372
src/hardware/topology/bootstrap/bootstrap.h
src/hardware/topology/bootstrap/bootstrap.h
+5
-40
src/hardware/topology/graph/graph.cpp
src/hardware/topology/graph/graph.cpp
+375
-12
src/hardware/topology/graph/graph.h
src/hardware/topology/graph/graph.h
+19
-4
src/hardware/topology/graph/graph_utils.cpp
src/hardware/topology/graph/graph_utils.cpp
+86
-0
src/hardware/topology/graph/graph_utils.h
src/hardware/topology/graph/graph_utils.h
+35
-32
src/hardware/topology/graph/paths.cpp
src/hardware/topology/graph/paths.cpp
+81
-207
src/hardware/topology/graph/paths.h
src/hardware/topology/graph/paths.h
+9
-6
src/hardware/topology/graph/physical_links.cpp
src/hardware/topology/graph/physical_links.cpp
+2
-2
src/hardware/topology/graph/physical_links.h
src/hardware/topology/graph/physical_links.h
+6
-4
No files found.
examples/2_topo/2_bootstrap/compile_mpi3_init_step2.sh
View file @
58d57301
...
...
@@ -13,13 +13,14 @@ hipcc ./3_mpi_init_mpi_init_step2_graph.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/bootstrap_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/rocm_smi_wrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/bootstrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/physical_links.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/topo_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/archinfo.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/param.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/hardware.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/thread_pool.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/graph/physical_links.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/graph/graph_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/graph/graph.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/graph/paths.cpp
\
-o
3_mpi_init_mpi_init_step2_graph
\
...
...
src/hardware/hardware.cpp
View file @
58d57301
...
...
@@ -39,15 +39,19 @@ scclResult_t sccl_init(const scclUniqueId* unique_id, int rank, int nRanks) {
SCCLCHECK
(
sccl_bootstrap
->
init
(
bootstrap_comm
));
printf
(
"init pos 1
\n
"
);
// -------------------------- 3.
MPI
建图 ----------------------------------- //
// -------------------------- 3.
拓扑
建图 ----------------------------------- //
topo_graph
=
new
scclTopoGraph_t
(
nRanks
);
auto
sccl_graph
=
std
::
make_unique
<
topology
::
graph
::
Graph
>
(
rank
,
nRanks
);
auto
sccl_graph
=
std
::
make_unique
<
topology
::
graph
::
Graph
>
(
sccl_bootstrap
.
get
()
);
printf
(
"init pos 2
\n
"
);
// 计算
通信路径
SCCLCHECK
(
sccl_graph
->
calculateCommunicationPaths
(
bootstrap_comm
,
topo_graph
,
sccl_bootstrap
.
get
()
));
// 计算
拓扑图
SCCLCHECK
(
sccl_graph
->
establishGraph
(
bootstrap_comm
));
printf
(
"init pos 3
\n
"
);
// 计算通信路径
SCCLCHECK
(
sccl_graph
->
calculateCommunicationPaths
(
bootstrap_comm
,
topo_graph
));
printf
(
"init pos 4
\n
"
);
// -------------------------- 3.MPI allgather设置unique_id的整合 ----------------------------------- //
// -------------------------- 5.根据各个节点的基础信息计算topo结果 ----------------------------------- //
...
...
src/hardware/topology/bootstrap/bootstrap.cpp
View file @
58d57301
This diff is collapsed.
Click to expand it.
src/hardware/topology/bootstrap/bootstrap.h
View file @
58d57301
...
...
@@ -7,7 +7,6 @@
#include "bootstrap_net.h"
#include "thread_pool.h"
#include "ipc_socket.h"
#include "physical_links.h"
namespace
sccl
{
namespace
hardware
{
...
...
@@ -15,7 +14,6 @@ namespace topology {
namespace
bootstrap
{
typedef
sccl
::
hardware
::
net
::
ipc_socket
::
scclIpcSocket_t
scclIpcSocket_t
;
typedef
physical_links
::
scclTopoNode_t
scclTopoNode_t
;
///////////////////////////////////// 用于初始化时的功能函数 //////////////////////////////////////////
scclResult_t
bootstrapGetUniqueId
(
BootstrapHandle_t
*
handle
);
...
...
@@ -56,41 +54,13 @@ typedef struct scclRankInfo {
uint64_t
pidHash
=
0
;
// 进程 ID 哈希值
}
scclRankInfo_t
;
// 定义结构体 scclNodeInfo,用于存储每个rank的图连接信息
// TODO: 目前每个rank需要的node_info大小为4k+,当卡数较大时占用内存较大,可以优化。或者不作为全局变量
typedef
struct
scclNodeInfo
{
scclTopoNode_t
*
nodes
;
// 指向scclTopoNode_t对象数组的指针
int
nLocalRanks
;
int
totalByteSize
;
// 表示占用的总字节数
// 带参数的构造函数,用于初始化nodes的大小
scclNodeInfo
(
int
nLocalRanks
)
:
nodes
(
nullptr
),
nLocalRanks
(
nLocalRanks
),
totalByteSize
(
sizeof
(
scclTopoNode_t
)
*
topoNodeMaxLocalNodes
/
nLocalRanks
)
{
nodes
=
reinterpret_cast
<
scclTopoNode_t
*>
(
malloc
(
totalByteSize
));
if
(
nodes
)
{
memset
(
nodes
,
0
,
totalByteSize
);
}
}
// 析构函数,用于释放申请的数组空间
virtual
~
scclNodeInfo
()
{
if
(
nodes
)
{
free
(
nodes
);
}
}
}
scclNodeInfo_t
;
// 所有节点的信息
typedef
struct
scclRankPhysSet
{
// 构造函数声明
scclRankPhysSet
(
int
nRanks
,
int
nLocalRanks
);
std
::
vector
<
scclRankInfo_t
>
rank_info_vec
;
std
::
vector
<
char
>
node_info_vec
;
// 实际为std::vector<scclNodeInfo_t>,vector不支持scclNodeInfo_t变长
scclRankPhysSet
(
int
nRanks
);
public:
int
nRanks
=
0
;
// 总的节点数量
int
nLocalRanks
=
0
;
// 本地计算节点中的节点总数
size_t
node_info_total_bytes
=
0
;
// 记录可变长度scclNodeInfo_t类型数据的实际大小
std
::
vector
<
scclRankInfo_t
>
rank_info_vec
;
}
scclRankPhysSet_t
;
// BootstrapComm 结构体定义,用于存储引导通信信息
...
...
@@ -126,7 +96,7 @@ public:
scclResult_t
init
(
BootstrapComm_t
*
bootstrap_comm
);
// 实现跨节点的AllGather通信操作
scclResult_t
bootstrapAllGather
(
const
void
*
src_data
,
void
*
dst_data
,
int
data_size
);
scclResult_t
bootstrapAllGather
(
const
void
*
src_data
,
void
*
dst_data
,
int
data_size
)
const
;
private:
// 执行根节点的聚集和广播操作
...
...
@@ -135,17 +105,12 @@ private:
// 初始化节点通信信息
scclResult_t
bootstrapCommInitNodeInfo
(
scclNet_t
*
scclNet
,
scclRankInfo_t
*
rank_info
);
// 实现rank_info信息的节点间通信的AllGather操作
scclResult_t
bootstrapCommAllGather
(
scclRankInfo_t
*
rank_info
,
scclNodeInfo_t
*
node_info
,
scclRankPhysSet_t
*
rank_phys_set
);
// 额外处理nRanks个nodes的连接关系
scclResult_t
bootstrapNodesLink
(
void
*
node_info_vec
,
int
node_info_total_bytes
);
private:
public:
int
rank
,
nRanks
;
// 初始化阶段获取MPI的值
int
localRank
,
nLocalRanks
;
// 通过bootstrapRootGatherAndBroadcast函数确定值
int
interRank
,
nInterRanks
;
// 整个节点在全部节点中的位置
private:
// TODO: 用于控制套接字终端的变量,目前不知道在哪里使用
volatile
uint32_t
*
abortFlag
;
// 中止标志,非阻塞套接字设置
...
...
src/hardware/topology/graph/graph.cpp
View file @
58d57301
This diff is collapsed.
Click to expand it.
src/hardware/topology/graph/graph.h
View file @
58d57301
...
...
@@ -3,6 +3,7 @@
#include <vector>
#include "base.h"
#include "graph_utils.h"
#include "paths.h"
namespace
sccl
{
namespace
hardware
{
...
...
@@ -11,23 +12,37 @@ namespace graph {
class
Graph
{
public:
Graph
(
int
rank
,
int
nRanks
);
Graph
(
const
Bootstrap
*
bootstrap
);
virtual
~
Graph
();
scclResult_t
establishGraph
(
const
BootstrapComm_t
*
bootstrap_comm
);
// 通信路径计算
scclResult_t
calculateCommunicationPaths
(
const
BootstrapComm_t
*
bootstrap_comm
,
scclTopoGraph_t
*
topo_graph
,
Bootstrap
*
sccl_bootstrap
);
scclResult_t
calculateCommunicationPaths
(
const
BootstrapComm_t
*
bootstrap_comm
,
scclTopoGraph_t
*
topo_graph
);
// 逻辑拓扑构建
scclResult_t
build
LogicalTopology
();
scclResult_t
search
LogicalTopology
();
// 根据无向图计算topo路径
scclResult_t
calculateTopoChannels
();
private:
// 额外处理nRanks个nodes的连接关系
scclResult_t
bootstrapNodesLink
(
void
*
node_info_vec
,
int
node_info_total_bytes
);
private:
const
Bootstrap
*
sccl_bootstrap
;
// 为了调用class Bootstrap中的函数
// 记录所有rank中node信息
std
::
vector
<
char
>
node_info_vec
;
// 实际为std::vector<scclNodeInfo_t>,vector不支持scclNodeInfo_t变长
size_t
node_info_total_bytes
=
0
;
// 记录可变长度scclNodeInfo_t类型数据的实际大小
std
::
vector
<
std
::
vector
<
int
>>
adjacencyMatrix
;
// 使用邻接矩阵表示图
// 你可以根据需要添加更多的私有成员变量和函数
// rank信息
int
rank
,
nRanks
;
int
localRank
,
nLocalRanks
;
int
interRank
,
nInterRanks
;
// 整个节点在全部节点中的位置
};
}
// namespace graph
...
...
src/hardware/topology/graph/graph_utils.cpp
0 → 100644
View file @
58d57301
#include <string.h>
#include "graph_utils.h"
namespace
sccl
{
namespace
hardware
{
namespace
topology
{
namespace
graph
{
scclTopoGraph
::
scclTopoGraph
(
int
nRanks
)
:
nRanks
(
nRanks
),
transport_map
(
nullptr
,
0
)
{
// 分配transport_map的内存
uint8_t
*
raw_transport_map
=
static_cast
<
uint8_t
*>
(
calloc
(
nRanks
*
nRanks
,
sizeof
(
uint8_t
)));
if
(
raw_transport_map
==
nullptr
)
{
// 处理内存分配失败的情况
throw
std
::
bad_alloc
();
}
// 使用ByteSpanArray初始化transport_map
transport_map
=
ByteSpanArray
<
uint8_t
>
(
raw_transport_map
,
nRanks
*
nRanks
);
}
scclTopoGraph
::~
scclTopoGraph
()
{
// 释放transport_map的内存
free
(
transport_map
.
data
());
}
// 打印transport_map
scclResult_t
scclTopoGraph
::
printTransportMap
()
{
for
(
int
i
=
0
;
i
<
this
->
nRanks
;
++
i
)
{
for
(
int
j
=
0
;
j
<
this
->
nRanks
;
++
j
)
{
uint8_t
*
value
=
this
->
getTransportMapData
(
i
,
j
);
if
(
value
!=
nullptr
)
{
printf
(
"%d "
,
*
value
);
}
else
{
printf
(
"nullptr "
);
}
}
printf
(
"
\n
"
);
}
return
scclSuccess
;
}
// 打印gpu_paths信息的函数
scclResult_t
scclTopoGraph
::
printGPUPaths
()
{
for
(
const
auto
&
start_pair
:
gpu_paths
)
{
uint64_t
start_node_id
=
start_pair
.
first
;
auto
start_node_it
=
graph_nodes
.
find
(
start_node_id
);
if
(
start_node_it
!=
graph_nodes
.
end
())
{
std
::
cout
<<
"Paths starting from node: "
;
start_node_it
->
second
.
printNodeInfo
(
"Start Node"
);
}
else
{
std
::
cout
<<
"Start node ID "
<<
start_node_id
<<
" not found in graph nodes."
<<
std
::
endl
;
continue
;
}
for
(
const
auto
&
end_pair
:
start_pair
.
second
)
{
uint64_t
end_node_id
=
end_pair
.
first
;
auto
end_node_it
=
graph_nodes
.
find
(
end_node_id
);
if
(
end_node_it
!=
graph_nodes
.
end
())
{
std
::
cout
<<
" to node: "
;
end_node_it
->
second
.
printNodeInfo
(
"End Node"
);
}
else
{
std
::
cout
<<
" End node ID "
<<
end_node_id
<<
" not found in graph nodes."
<<
std
::
endl
;
continue
;
}
std
::
cout
<<
" Paths:"
<<
std
::
endl
;
for
(
const
auto
&
path
:
end_pair
.
second
)
{
std
::
cout
<<
" Path: "
;
for
(
const
auto
&
node_id
:
path
)
{
auto
node_it
=
graph_nodes
.
find
(
node_id
);
if
(
node_it
!=
graph_nodes
.
end
())
{
node_it
->
second
.
printNodeInfo
(
" "
);
}
else
{
std
::
cout
<<
" Node ID "
<<
node_id
<<
" not found in graph nodes."
<<
std
::
endl
;
}
}
std
::
cout
<<
std
::
endl
;
}
}
}
return
scclSuccess
;
}
}
// namespace graph
}
// namespace topology
}
// namespace hardware
}
// namespace sccl
src/hardware/topology/graph/graph_utils.h
View file @
58d57301
...
...
@@ -3,16 +3,41 @@
#include <string.h>
#include "base.h"
#include "bootstrap.h"
#include "physical_links.h"
namespace
sccl
{
namespace
hardware
{
namespace
topology
{
namespace
graph
{
typedef
bootstrap
::
physical_links
::
scclTopoNode_t
scclTopoNode_t
;
typedef
bootstrap
::
scclNodeInfo_t
scclNodeInfo
_t
;
typedef
physical_links
::
scclTopoNode_t
scclTopoNode
_t
;
typedef
bootstrap
::
BootstrapComm_t
BootstrapComm_t
;
typedef
topology
::
bootstrap
::
Bootstrap
Bootstrap
;
// 定义结构体 scclNodeInfo,用于存储每个rank的图连接信息
// TODO: 目前每个rank需要的node_info大小为4k+,当卡数较大时占用内存较大,可以优化。或者不作为全局变量
typedef
struct
scclNodeInfo
{
scclTopoNode_t
*
nodes
;
// 指向scclTopoNode_t对象数组的指针
int
nLocalRanks
;
int
totalByteSize
;
// 表示占用的总字节数
// 带参数的构造函数,用于初始化nodes的大小
scclNodeInfo
(
int
nLocalRanks
)
:
nodes
(
nullptr
),
nLocalRanks
(
nLocalRanks
),
totalByteSize
(
sizeof
(
scclTopoNode_t
)
*
topoNodeMaxLocalNodes
/
nLocalRanks
)
{
nodes
=
reinterpret_cast
<
scclTopoNode_t
*>
(
malloc
(
totalByteSize
));
if
(
nodes
)
{
memset
(
nodes
,
0
,
totalByteSize
);
}
}
// 析构函数,用于释放申请的数组空间
virtual
~
scclNodeInfo
()
{
if
(
nodes
)
{
free
(
nodes
);
}
}
}
scclNodeInfo_t
;
//////////////////////////////////////////////////////////////////////////////////////////////////
// 定义 topoPathType_t 枚举类型,用于表示不同的路径类型。
typedef
enum
topoPathType
{
PATH_LOC
=
0
,
// 本地路径
...
...
@@ -20,7 +45,7 @@ typedef enum topoPathType {
PATH_NVB
=
2
,
// 通过中间 GPU 使用 NVLink 连接
PATH_PIX
=
3
,
// 通过最多一个 PCIe 桥连接
PATH_PXB
=
4
,
// 通过多个 PCIe 桥连接(不经过 PCIe 主桥)
PATH_PXN
=
5
,
// GPU 和 NIC 之间通过中间 GPU 连接
PATH_PXN
=
5
,
// GPU 和 NIC 之间通过中间 GPU 连接
, PXN = PCI + NVLink
PATH_PHB
=
6
,
// 通过 PCIe 以及 PCIe 主桥连接
PATH_SYS
=
7
,
// 通过 PCIe 以及 NUMA 节点之间的 SMP 互连连接
PATH_NET
=
8
,
// 通过网络连接
...
...
@@ -39,44 +64,22 @@ typedef enum LinkType : uint8_t {
typedef
struct
scclTopoGraph
{
scclTopoGraph
()
=
delete
;
// 删除默认构造函数
scclTopoGraph
(
int
nRanks
)
:
nRanks
(
nRanks
),
transport_map
(
nullptr
,
0
)
{
// 分配transport_map的内存
uint8_t
*
raw_transport_map
=
static_cast
<
uint8_t
*>
(
calloc
(
nRanks
*
nRanks
,
sizeof
(
uint8_t
)));
if
(
raw_transport_map
==
nullptr
)
{
// 处理内存分配失败的情况
throw
std
::
bad_alloc
();
}
// 使用ByteSpanArray初始化transport_map
transport_map
=
ByteSpanArray
<
uint8_t
>
(
raw_transport_map
,
nRanks
*
nRanks
);
}
virtual
~
scclTopoGraph
()
{
// 释放transport_map的内存
free
(
transport_map
.
data
());
}
scclTopoGraph
(
int
nRanks
);
virtual
~
scclTopoGraph
();
uint8_t
*
getTransportMapRowStart
(
int
row
)
{
return
transport_map
[
row
*
nRanks
];
}
uint8_t
*
getTransportMapData
(
int
row
,
int
col
)
{
return
transport_map
[
row
*
nRanks
+
col
];
}
// 打印transport_map
scclResult_t
printTransportMap
()
{
for
(
int
i
=
0
;
i
<
this
->
nRanks
;
++
i
)
{
for
(
int
j
=
0
;
j
<
this
->
nRanks
;
++
j
)
{
uint8_t
*
value
=
this
->
getTransportMapData
(
i
,
j
);
if
(
value
!=
nullptr
)
{
printf
(
"%d "
,
*
value
);
}
else
{
printf
(
"nullptr "
);
}
}
printf
(
"
\n
"
);
}
return
scclSuccess
;
}
scclResult_t
printTransportMap
();
// 打印gpu_paths信息的函数
scclResult_t
printGPUPaths
();
public:
// 使用无序映射存储图的有效节点
std
::
unordered_map
<
uint64_t
,
scclTopoNode_t
>
graph_nodes
;
// 使用无序映射存储从每个GPU节点到其他GPU节点的所有路径,[start_node_id][end_node_id] = {path1, path2}
// 使用无序映射存储从每个GPU节点到其他GPU节点的所有路径,[start_node_id][end_node_id] = {path1, path2
, ...
}
std
::
unordered_map
<
uint64_t
,
std
::
unordered_map
<
uint64_t
,
std
::
vector
<
std
::
vector
<
uint64_t
>>>>
gpu_paths
;
// 传输位图
...
...
src/hardware/topology/graph/paths.cpp
View file @
58d57301
...
...
@@ -6,15 +6,15 @@ namespace hardware {
namespace
topology
{
namespace
graph
{
PathFinder
::
PathFinder
(
const
BootstrapComm_t
*
bootstrap_comm
)
PathFinder
::
PathFinder
(
const
BootstrapComm_t
*
bootstrap_comm
,
std
::
vector
<
char
>&
node_info_vec
,
size_t
node_info_total_bytes
)
:
rank
(
bootstrap_comm
->
rank
),
nRanks
(
bootstrap_comm
->
nRanks
),
localRank
(
bootstrap_comm
->
localRank
),
nLocalRanks
(
bootstrap_comm
->
nLocalRanks
),
interRank
(
bootstrap_comm
->
interRank
),
nInterRanks
(
bootstrap_comm
->
nInterRanks
),
node_container_
(
bootstrap_comm
->
r
ank
_phys_set
->
node_info_vec
.
data
(),
bootstrap_comm
->
nRanks
*
bootstrap_comm
->
rank_phys_set
->
node_info_total_bytes
)
{
// 初始化NodeContainer对象
node_container_
(
node_info_vec
.
data
(),
bootstrap_comm
->
nR
ank
s
*
node_info_total_bytes
)
{
// 初始化NodeContainer对象
printf
(
"get PathFinder, node_container_=%zu
\n
"
,
node_container_
.
size
());
for
(
size_t
i
=
0
;
i
<
node_container_
.
size
();
++
i
)
{
scclTopoNode_t
*
node
=
node_container_
[
i
];
...
...
@@ -36,7 +36,7 @@ PathFinder::PathFinder(const BootstrapComm_t* bootstrap_comm)
const scclTopoNode_t* node = node_container_[index];
int interRank, deviceValue, terminalType, hipDev, numaId;
bootstrap::
physical_links::getIdComponents(node_id, &interRank, &deviceValue, &terminalType, &hipDev, &numaId);
physical_links::getIdComponents(node_id, &interRank, &deviceValue, &terminalType, &hipDev, &numaId);
char busIdStr[17];
int64ToBusId(node->busId, busIdStr);
printf("rank=%d, node=(InterRank:%d, V:%d, T:%d, H:%d, N:%d, type:%d, busIdStr:%s), neighbor_count=%zu",
...
...
@@ -54,7 +54,7 @@ PathFinder::PathFinder(const BootstrapComm_t* bootstrap_comm)
uint64_t neighbor_id = node->neighbors[n];
const scclTopoNode_t* neighbor_node = findNodeById(neighbor_id);
if(neighbor_node) {
bootstrap::
physical_links::getIdComponents(neighbor_id, &interRank, &deviceValue, &terminalType, &hipDev, &numaId);
physical_links::getIdComponents(neighbor_id, &interRank, &deviceValue, &terminalType, &hipDev, &numaId);
int64ToBusId(neighbor_node->busId, busIdStr);
printf(", neighbor[%d]=(InterRank:%d, V:%d, T:%d, H:%d, N:%d, type:%d, busIdStr:%s)",
...
...
@@ -75,10 +75,36 @@ PathFinder::PathFinder(const BootstrapComm_t* bootstrap_comm)
}
#endif
// 查找当前rank对应的其他GPU节点的所有路径
printf
(
"PathFinder pos 1
\n
"
);
findGpuPaths
();
printf
(
"PathFinder pos 2
\n
"
);
// 查找当前rank对应的GPU的node,并执行BFS搜索,查找到其他所有GPU node的路径
for
(
const
auto
&
pair
:
id_to_index_
)
{
uint64_t
id
=
pair
.
first
;
size_t
index
=
pair
.
second
;
// 定位到node
scclTopoNode_t
*
node
=
node_container_
[
index
];
int
nodeInterRank
,
nodeHipDev
;
physical_links
::
getIdComponents
(
node
->
id
,
&
nodeInterRank
,
nullptr
,
nullptr
,
&
nodeHipDev
,
nullptr
);
if
(
node
->
type
==
GPU
&&
nodeInterRank
==
this
->
interRank
&&
nodeHipDev
==
this
->
localRank
)
{
// printf("bfsFindGpuPaths start_node_id=%lu, running\n", node->id);
bfsFindGpuPaths
(
node
->
id
);
}
}
#if 1
if
(
rank
==
1
)
{
printGpuPaths
();
}
#endif
}
int
getGpuRankFromNodeId
(
uint64_t
node_id
,
int
nLocalRanks
)
{
int
interRank
,
hipDev
;
// 调用 getIdComponents 函数获取 interRank 和 hipDev
physical_links
::
getIdComponents
(
node_id
,
&
interRank
,
nullptr
,
nullptr
,
&
hipDev
,
nullptr
);
// 计算并返回 gpu_rank
int
gpu_rank
=
interRank
*
nLocalRanks
+
hipDev
;
printf
(
"node_id=%lu, interRank=%d, hipDev=%d, gpu_rank=%d
\n
"
,
node_id
,
interRank
,
hipDev
,
gpu_rank
);
return
gpu_rank
;
}
/**
...
...
@@ -124,48 +150,61 @@ scclResult_t PathFinder::computeTopoGpuP2pMap(scclTopoGraph_t* topo_graph) {
// 记录bitmap
LinkType_t
link_type
;
int
start_gpu_rank
,
end_gpu_rank
;
{
// 根据路径中途径的节点点确定连接方式的类型
SCCLCHECK
(
determineLinkType
(
path
,
&
link_type
));
int
start_interRank
,
start_hipDev
;
int
end_interRank
,
end_hipDev
;
bootstrap
::
physical_links
::
getIdComponents
(
start_node_id
,
&
start_interRank
,
nullptr
,
nullptr
,
&
start_hipDev
,
nullptr
);
bootstrap
::
physical_links
::
getIdComponents
(
end_node_id
,
&
end_interRank
,
nullptr
,
nullptr
,
&
end_hipDev
,
nullptr
);
// 根据路径中途径的节点点确定连接方式的类型
SCCLCHECK
(
determineLinkType
(
path
,
&
link_type
));
// 获取gpu的rank
int
start_gpu_rank
=
getGpuRankFromNodeId
(
start_node_id
,
nLocalRanks
);
int
end_gpu_rank
=
getGpuRankFromNodeId
(
end_node_id
,
nLocalRanks
);
start_gpu_rank
=
start_interRank
*
nLocalRanks
+
start_hipDev
;
end_gpu_rank
=
end_interRank
*
nLocalRanks
+
end_hipDev
;
#if 0
printf("rank=%d, interRank=%d, localRank=%d: start_interRank=%d, start_hipDev=%d, end_interRank=%d, end_hipDev=%d, link_type=%d\n",
rank,
interRank,
localRank,
start_interRank,
start_hipDev,
end_interRank,
end_hipDev,
static_cast<int>(link_type));
#endif
}
// 查找transport_map中的起始和结束节点
uint8_t
*
transport_map_pt
=
topo_graph
->
getTransportMapData
(
start_gpu_rank
,
end_gpu_rank
);
// 将连接方式的类型存储在transport_map中
if
(
*
(
topo_graph
->
getTransportMapData
(
start_gpu_rank
,
end_gpu_rank
))
>
0
&&
link_type
>
0
)
{
if
(
link_type
<
static_cast
<
LinkType_t
>
(
*
(
topo_graph
->
getTransportMapData
(
start_gpu_rank
,
end_gpu_rank
))
))
{
*
(
topo_graph
->
getTransportMapData
(
start_gpu_rank
,
end_gpu_rank
))
=
link_type
;
if
(
*
transport_map_pt
>
0
&&
link_type
>
0
)
{
if
(
link_type
<
static_cast
<
LinkType_t
>
(
*
transport_map_pt
))
{
*
transport_map_pt
=
link_type
;
// 清空之前的路径
topo_graph
->
gpu_paths
[
start_node_id
][
end_node_id
].
clear
();
// 添加新的路径
topo_graph
->
gpu_paths
[
start_node_id
][
end_node_id
].
push_back
(
path
);
}
else
if
(
link_type
==
static_cast
<
LinkType_t
>
(
*
(
topo_graph
->
getTransportMapData
(
start_gpu_rank
,
end_gpu_rank
))
))
{
}
else
if
(
link_type
==
static_cast
<
LinkType_t
>
(
*
transport_map_pt
))
{
// 添加新的路径
topo_graph
->
gpu_paths
[
start_node_id
][
end_node_id
].
push_back
(
path
);
}
}
else
{
*
(
topo_graph
->
getTransportMapData
(
start_gpu_rank
,
end_gpu_rank
))
=
static_cast
<
uint8_t
>
(
link_type
);
*
transport_map_pt
=
static_cast
<
uint8_t
>
(
link_type
);
// 添加新的路径
topo_graph
->
gpu_paths
[
start_node_id
][
end_node_id
].
push_back
(
path
);
}
#if 0
{
char start_busIdStr[17] = ""; // 用于存储总线ID字符串
// 根据起始节点的ID查找对应的节点对象
const scclTopoNode_t* start_node = findNodeById(start_node_id);
// 如果找到了对应的节点对象,则将其总线ID转换为字符串
if(start_node) {
int64ToBusId(start_node->busId, start_busIdStr);
}
char end_busIdStr[17] = ""; // 用于存储总线ID字符串
// 根据起始节点的ID查找对应的节点对象
const scclTopoNode_t* end_node = findNodeById(end_node_id);
// 如果找到了对应的节点对象,则将其总线ID转换为字符串
if(end_node) {
int64ToBusId(end_node->busId, end_busIdStr);
}
printf("nLocalRanks=%d, start_node_id=%lu, busIdStr=%s, end_node_id=%lu, busIdStr=%s\n"
"start_gpu_rank: %d, end_gpu_rank: %d, link_type: %d, paths count: %zu\n",
nLocalRanks,
start_node_id,
start_busIdStr,
end_node_id,
end_busIdStr,
start_gpu_rank,
end_gpu_rank,
*(topo_graph->getTransportMapData(start_gpu_rank, end_gpu_rank)),
topo_graph->gpu_paths[start_node_id][end_node_id].size());
}
#endif
}
}
...
...
@@ -173,35 +212,6 @@ scclResult_t PathFinder::computeTopoGpuP2pMap(scclTopoGraph_t* topo_graph) {
}
/////////////////////////////////////////////////////////////////////////////////////////////
/**
* @brief 查找当前rank对应的其他GPU节点的所有路径
*
* 该函数用于查找当前rank对应的GPU节点的所有路径。它遍历`id_to_index_`中的所有节点ID和索引对,
* 对于每一个节点,如果该节点是GPU类型,并且属于当前rank的进程,则调用`bfsFindGpuPaths`函数执行广度优先搜索(BFS),
* 查找到其他所有GPU节点的路径。最后,如果当前rank为1,则调用`printGpuPaths`函数打印所有GPU路径。
*/
void
PathFinder
::
findGpuPaths
()
{
// 查找当前rank对应的GPU的node,并执行BFS搜索,查找到其他所有GPU node的路径
for
(
const
auto
&
pair
:
id_to_index_
)
{
uint64_t
id
=
pair
.
first
;
size_t
index
=
pair
.
second
;
// 定位到node
scclTopoNode_t
*
node
=
node_container_
[
index
];
int
nodeInterRank
,
nodeHipDev
;
bootstrap
::
physical_links
::
getIdComponents
(
node
->
id
,
&
nodeInterRank
,
nullptr
,
nullptr
,
&
nodeHipDev
,
nullptr
);
if
(
node
->
type
==
GPU
&&
nodeInterRank
==
this
->
interRank
&&
nodeHipDev
==
this
->
localRank
)
{
// printf("bfsFindGpuPaths start_node_id=%lu, running\n", node->id);
bfsFindGpuPaths
(
node
->
id
);
}
}
#if 1
if
(
rank
==
1
)
{
printGpuPaths
();
}
#endif
}
/**
* @brief 根据节点ID查找节点
*
...
...
@@ -231,7 +241,6 @@ const scclTopoNode_t* PathFinder::findNodeById(uint64_t id) const {
*
* @param start_node_id 起始GPU节点的ID
*/
#if 1
void
PathFinder
::
bfsFindGpuPaths
(
uint64_t
start_node_id
)
{
// 使用一个队列来存储当前路径
std
::
queue
<
std
::
vector
<
uint64_t
>>
queue
;
...
...
@@ -259,14 +268,14 @@ void PathFinder::bfsFindGpuPaths(uint64_t start_node_id) {
// 如果当前节点是GPU节点且不是起始节点,则将当前路径加入结果
if
(
current_node
->
type
==
GPU
&&
nodeId
!=
start_node_id
)
{
int
hipDev
;
bootstrap
::
physical_links
::
getIdComponents
(
current_node
->
id
,
nullptr
,
nullptr
,
nullptr
,
&
hipDev
,
nullptr
);
physical_links
::
getIdComponents
(
current_node
->
id
,
nullptr
,
nullptr
,
nullptr
,
&
hipDev
,
nullptr
);
// 仅当节点内的device id小于等于nLocalRanks时,才是有效GPU,才将路径加入结果
if
(
hipDev
<
nLocalRanks
)
{
gpu_paths_
[
start_node_id
].
push_back
(
path
);
}
}
else
{
int
nodeInterRank
;
bootstrap
::
physical_links
::
getIdComponents
(
nodeId
,
&
nodeInterRank
);
physical_links
::
getIdComponents
(
nodeId
,
&
nodeInterRank
);
// 遍历当前节点的所有邻居节点
for
(
uint64_t
neighbor_id
:
graph_node_neighbors_
.
at
(
nodeId
))
{
if
(
findNodeById
(
neighbor_id
)
==
nullptr
)
{
...
...
@@ -274,7 +283,7 @@ void PathFinder::bfsFindGpuPaths(uint64_t start_node_id) {
}
// 获取邻居节点的interRank
int
neighbor_inter_rank
;
bootstrap
::
physical_links
::
getIdComponents
(
neighbor_id
,
&
neighbor_inter_rank
);
physical_links
::
getIdComponents
(
neighbor_id
,
&
neighbor_inter_rank
);
// 检查邻居节点是否已在当前路径中访问过
bool
visited
=
std
::
find
(
path
.
begin
(),
path
.
end
(),
neighbor_id
)
!=
path
.
end
();
...
...
@@ -302,141 +311,6 @@ void PathFinder::bfsFindGpuPaths(uint64_t start_node_id) {
}
}
#else
void
PathFinder
::
bfsFindGpuPaths
(
uint64_t
start_node_id
)
{
// 使用一个队列来存储当前路径
std
::
queue
<
std
::
vector
<
uint64_t
>>
queue
;
// 使用一个unordered_map来存储每个node的最短路径
std
::
unordered_map
<
uint64_t
,
std
::
vector
<
uint64_t
>>
shortest_paths
;
// 将起始节点加入队列
queue
.
push
({
start_node_id
});
shortest_paths
[
start_node_id
]
=
{
start_node_id
};
// 当队列不为空时,继续搜索
while
(
!
queue
.
empty
())
{
// 从队列中取出一个路径
auto
path
=
queue
.
front
();
queue
.
pop
();
// 获取当前路径的最后一个节点的ID
uint64_t
nodeId
=
path
.
back
();
// 根据节点ID查找对应的节点
const
scclTopoNode_t
*
current_node
=
findNodeById
(
nodeId
);
if
(
current_node
==
nullptr
)
{
continue
;
}
// 如果当前节点是GPU节点且不是起始节点,则将当前路径加入结果
if
(
current_node
->
type
==
GPU
&&
nodeId
!=
start_node_id
)
{
int
hipDev
;
bootstrap
::
physical_links
::
getIdComponents
(
current_node
->
id
,
nullptr
,
nullptr
,
nullptr
,
&
hipDev
,
nullptr
);
if
(
hipDev
<
nLocalRanks
)
{
gpu_paths_
[
start_node_id
].
push_back
(
path
);
}
}
else
{
int
nodeInterRank
;
bootstrap
::
physical_links
::
getIdComponents
(
nodeId
,
&
nodeInterRank
);
// 遍历当前节点的所有邻居节点
for
(
uint64_t
neighbor_id
:
graph_node_neighbors_
.
at
(
nodeId
))
{
if
(
findNodeById
(
neighbor_id
)
==
nullptr
)
{
continue
;
}
// 获取邻居节点的interRank
int
neighbor_inter_rank
;
bootstrap
::
physical_links
::
getIdComponents
(
neighbor_id
,
&
neighbor_inter_rank
);
// 检查邻居节点是否已在当前路径中访问过
bool
visited
=
std
::
find
(
path
.
begin
(),
path
.
end
(),
neighbor_id
)
!=
path
.
end
();
// 检查interRank是否已经存在(仅当interRank改变时)
bool
inter_rank_exists
=
false
;
if
(
neighbor_inter_rank
!=
nodeInterRank
)
{
for
(
uint64_t
node_id
:
path
)
{
if
(
node_id
==
neighbor_id
)
{
inter_rank_exists
=
true
;
break
;
}
}
}
// 如果邻居节点未访问过且interRank未存在,则扩展路径
if
(
!
visited
&&
!
inter_rank_exists
)
{
std
::
vector
<
uint64_t
>
new_path
=
path
;
new_path
.
push_back
(
neighbor_id
);
// 如果新路径比已有的最短路径更短,则更新最短路径
if
(
shortest_paths
.
find
(
neighbor_id
)
==
shortest_paths
.
end
()
||
shortest_paths
[
neighbor_id
].
size
()
>
new_path
.
size
())
{
shortest_paths
[
neighbor_id
]
=
new_path
;
queue
.
push
(
new_path
);
}
}
}
}
}
}
void
PathFinder
::
bfsFindGpuPaths
(
uint64_t
start_node_id
)
{
// 使用一个队列来存储当前路径
std
::
queue
<
std
::
vector
<
uint64_t
>>
queue
;
// 将起始节点加入队列
queue
.
push
({
start_node_id
});
// 当队列不为空时,继续搜索
while
(
!
queue
.
empty
())
{
// 从队列中取出一个路径
auto
path
=
queue
.
front
();
queue
.
pop
();
// 获取当前路径的最后一个节点的ID
uint64_t
nodeId
=
path
.
back
();
// 根据节点ID查找对应的节点
const
scclTopoNode_t
*
current_node
=
findNodeById
(
nodeId
);
if
(
current_node
==
nullptr
)
{
continue
;
}
// 如果当前节点是GPU节点且不是起始节点,则将当前路径加入结果
if
(
current_node
->
type
==
GPU
&&
nodeId
!=
start_node_id
)
{
int
hipDev
;
bootstrap
::
physical_links
::
getIdComponents
(
current_node
->
id
,
nullptr
,
nullptr
,
nullptr
,
&
hipDev
,
nullptr
);
if
(
hipDev
<
nLocalRanks
)
{
gpu_paths_
[
start_node_id
].
push_back
(
path
);
}
}
else
{
int
nodeInterRank
;
bootstrap
::
physical_links
::
getIdComponents
(
nodeId
,
&
nodeInterRank
);
// 遍历当前节点的所有邻居节点
for
(
uint64_t
neighbor_id
:
graph_node_neighbors_
.
at
(
nodeId
))
{
if
(
findNodeById
(
nodeId
)
==
nullptr
)
{
continue
;
}
// 获取邻居节点的interRank
int
neighbor_inter_rank
;
bootstrap
::
physical_links
::
getIdComponents
(
neighbor_id
,
&
neighbor_inter_rank
);
// 检查邻居节点是否已在当前路径中访问过
bool
visited
=
std
::
find
(
path
.
begin
(),
path
.
end
(),
neighbor_id
)
!=
path
.
end
();
// 检查interRank是否已经存在(仅当interRank改变时)
bool
inter_rank_exists
=
false
;
if
(
neighbor_inter_rank
!=
(
nodeInterRank
))
{
for
(
uint64_t
node_id
:
path
)
{
if
((
nodeInterRank
)
==
neighbor_inter_rank
)
{
inter_rank_exists
=
true
;
break
;
}
}
}
// 如果邻居节点未访问过且interRank未存在,则扩展路径
if
(
!
visited
&&
!
inter_rank_exists
)
{
std
::
vector
<
uint64_t
>
new_path
=
path
;
new_path
.
push_back
(
neighbor_id
);
queue
.
push
(
new_path
);
}
}
}
}
}
#endif
/**
* @brief 打印GPU路径信息
*
...
...
@@ -463,7 +337,7 @@ void PathFinder::printGpuPaths() {
int
interRank
,
deviceValue
,
terminalType
,
hipDev
,
numaId
;
// 根据起始节点的ID获取其interRank、deviceValue、terminalType和numaId
bootstrap
::
physical_links
::
getIdComponents
(
start_node_id
,
&
interRank
,
&
deviceValue
,
&
terminalType
,
&
hipDev
,
&
numaId
);
physical_links
::
getIdComponents
(
start_node_id
,
&
interRank
,
&
deviceValue
,
&
terminalType
,
&
hipDev
,
&
numaId
);
printf
(
"GPU node ID:%lu (InterRank:%d, V:%d, T:%d, H:%d, N:%d) (Path count: %zu)
\n
"
,
start_node_id
,
interRank
,
...
...
@@ -486,7 +360,7 @@ void PathFinder::printGpuPaths() {
const
scclTopoNode_t
*
node
=
findNodeById
(
node_id
);
if
(
node
)
{
// 根据节点的ID获取其interRank、deviceValue、terminalType和numaId
bootstrap
::
physical_links
::
getIdComponents
(
node
->
id
,
&
interRank
,
&
deviceValue
,
&
terminalType
,
&
hipDev
,
&
numaId
);
physical_links
::
getIdComponents
(
node
->
id
,
&
interRank
,
&
deviceValue
,
&
terminalType
,
&
hipDev
,
&
numaId
);
// 将节点的总线ID转换为字符串
int64ToBusId
(
node
->
busId
,
busIdStr
);
// 打印节点的信息,包括其interRank、deviceValue、terminalType、numaId、类型和总线ID字符串
...
...
src/hardware/topology/graph/paths.h
View file @
58d57301
...
...
@@ -13,21 +13,21 @@ namespace hardware {
namespace
topology
{
namespace
graph
{
// 设置Path路径直接link的 bandwidth 和 speed
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class
PathFinder
{
public:
// 构造函数
PathFinder
(
const
BootstrapComm_t
*
bootstrap_comm
);
PathFinder
(
const
BootstrapComm_t
*
bootstrap_comm
,
std
::
vector
<
char
>&
node_info_vec
,
size_t
node_info_total_bytes
);
// 计算拓扑图中GPU节点之间的点对点映射
scclResult_t
computeTopoGpuP2pMap
(
scclTopoGraph_t
*
graph
);
// 计算拓扑图中GPU节点之间的点对点映射
,结果保存在graph中
scclResult_t
computeTopoGpuP2pMap
(
scclTopoGraph_t
*
topo_
graph
);
// 打印函数
void
printGpuPaths
();
private:
// 获取所有GPU到GPU的路径函数
void
findGpuPaths
();
// 使用广度优先搜索(BFS)查找从起始GPU节点到其他GPU节点的最短路径
void
bfsFindGpuPaths
(
uint64_t
start_node_id
);
...
...
@@ -53,6 +53,9 @@ private:
int
nInterRanks
=
0
;
// 全局拥有节点的个数
};
// 根据 node_id 获取 gpu_rank
int
getGpuRankFromNodeId
(
uint64_t
node_id
,
int
nLocalRanks
);
}
// namespace graph
}
// namespace topology
}
// namespace hardware
...
...
src/hardware/topology/
bootst
rap/physical_links.cpp
→
src/hardware/topology/
g
rap
h
/physical_links.cpp
View file @
58d57301
...
...
@@ -4,7 +4,7 @@
namespace
sccl
{
namespace
hardware
{
namespace
topology
{
namespace
bootst
rap
{
namespace
g
rap
h
{
namespace
physical_links
{
constexpr
int
numaIdStrLen
=
10
;
...
...
@@ -726,7 +726,7 @@ void printTopoNode(ByteSpanArray<scclTopoNode_t>& nodes, int nodeIndex, const ch
}
}
// namespace physical_links
}
// namespace
bootst
rap
}
// namespace
g
rap
h
}
// namespace topology
}
// namespace hardware
}
// namespace sccl
\ No newline at end of file
src/hardware/topology/
bootst
rap/physical_links.h
→
src/hardware/topology/
g
rap
h
/physical_links.h
View file @
58d57301
...
...
@@ -13,12 +13,14 @@
#include <filesystem> // 需要C++17支持
#include "container.h"
#include "bootstrap
_utils
.h"
#include "bootstrap.h"
namespace
sccl
{
namespace
hardware
{
namespace
topology
{
namespace
bootstrap
{
namespace
graph
{
typedef
sccl
::
hardware
::
net
::
scclNet_t
scclNet_t
;
constexpr
size_t
topoNodeMaxLocalNodes
=
128
;
// 每个节点最多的node数量
constexpr
size_t
topoNodeMaxNeighbors
=
16
;
// 每个node最多neighbor数量
...
...
@@ -70,7 +72,7 @@ scclResult_t generate_topo_nodes(const char* pciPath, int interRank, int hipDev,
// 根据numaId获取pci路径
std
::
string
generate_topo_node_numa_info
(
int
numaId
);
// 输出id分解后的所有数据
// 输出
node
id分解后的所有数据
void
getIdComponents
(
uint64_t
idToDecompose
,
int
*
interRank
=
nullptr
,
int
*
deviceValue
=
nullptr
,
int
*
terminalType
=
nullptr
,
int
*
hipDev
=
nullptr
,
int
*
numaId
=
nullptr
);
...
...
@@ -82,7 +84,7 @@ char* getNetPciPath(scclNet_t* scclNet, int hipDev);
void
printTopoNode
(
ByteSpanArray
<
scclTopoNode_t
>&
nodes
,
int
nodeIndex
,
const
char
*
prefix
);
}
// namespace physical_links
}
// namespace
bootst
rap
}
// namespace
g
rap
h
}
// namespace topology
}
// namespace hardware
}
// namespace sccl
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment