Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
lishen01
Sccl
Commits
58d57301
"vscode:/vscode.git/clone" did not exist on "81d1f00df32e64053343e863c9c71a5d97761675"
Commit
58d57301
authored
Aug 13, 2025
by
lishen
Browse files
将建图过程中原本在bootstrap中的一部分完全移动到graph中
parent
708aae12
Changes
12
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
634 additions
and
684 deletions
+634
-684
examples/2_topo/2_bootstrap/compile_mpi3_init_step2.sh
examples/2_topo/2_bootstrap/compile_mpi3_init_step2.sh
+2
-1
src/hardware/hardware.cpp
src/hardware/hardware.cpp
+8
-4
src/hardware/topology/bootstrap/bootstrap.cpp
src/hardware/topology/bootstrap/bootstrap.cpp
+6
-372
src/hardware/topology/bootstrap/bootstrap.h
src/hardware/topology/bootstrap/bootstrap.h
+5
-40
src/hardware/topology/graph/graph.cpp
src/hardware/topology/graph/graph.cpp
+375
-12
src/hardware/topology/graph/graph.h
src/hardware/topology/graph/graph.h
+19
-4
src/hardware/topology/graph/graph_utils.cpp
src/hardware/topology/graph/graph_utils.cpp
+86
-0
src/hardware/topology/graph/graph_utils.h
src/hardware/topology/graph/graph_utils.h
+35
-32
src/hardware/topology/graph/paths.cpp
src/hardware/topology/graph/paths.cpp
+81
-207
src/hardware/topology/graph/paths.h
src/hardware/topology/graph/paths.h
+9
-6
src/hardware/topology/graph/physical_links.cpp
src/hardware/topology/graph/physical_links.cpp
+2
-2
src/hardware/topology/graph/physical_links.h
src/hardware/topology/graph/physical_links.h
+6
-4
No files found.
examples/2_topo/2_bootstrap/compile_mpi3_init_step2.sh
View file @
58d57301
...
...
@@ -13,13 +13,14 @@ hipcc ./3_mpi_init_mpi_init_step2_graph.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/bootstrap_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/rocm_smi_wrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/bootstrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/physical_links.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/topo_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/archinfo.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/param.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/hardware.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/thread_pool.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/graph/physical_links.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/graph/graph_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/graph/graph.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/graph/paths.cpp
\
-o
3_mpi_init_mpi_init_step2_graph
\
...
...
src/hardware/hardware.cpp
View file @
58d57301
...
...
@@ -39,15 +39,19 @@ scclResult_t sccl_init(const scclUniqueId* unique_id, int rank, int nRanks) {
SCCLCHECK
(
sccl_bootstrap
->
init
(
bootstrap_comm
));
printf
(
"init pos 1
\n
"
);
// -------------------------- 3.
MPI
建图 ----------------------------------- //
// -------------------------- 3.
拓扑
建图 ----------------------------------- //
topo_graph
=
new
scclTopoGraph_t
(
nRanks
);
auto
sccl_graph
=
std
::
make_unique
<
topology
::
graph
::
Graph
>
(
rank
,
nRanks
);
auto
sccl_graph
=
std
::
make_unique
<
topology
::
graph
::
Graph
>
(
sccl_bootstrap
.
get
()
);
printf
(
"init pos 2
\n
"
);
// 计算
通信路径
SCCLCHECK
(
sccl_graph
->
calculateCommunicationPaths
(
bootstrap_comm
,
topo_graph
,
sccl_bootstrap
.
get
()
));
// 计算
拓扑图
SCCLCHECK
(
sccl_graph
->
establishGraph
(
bootstrap_comm
));
printf
(
"init pos 3
\n
"
);
// 计算通信路径
SCCLCHECK
(
sccl_graph
->
calculateCommunicationPaths
(
bootstrap_comm
,
topo_graph
));
printf
(
"init pos 4
\n
"
);
// -------------------------- 3.MPI allgather设置unique_id的整合 ----------------------------------- //
// -------------------------- 5.根据各个节点的基础信息计算topo结果 ----------------------------------- //
...
...
src/hardware/topology/bootstrap/bootstrap.cpp
View file @
58d57301
This diff is collapsed.
Click to expand it.
src/hardware/topology/bootstrap/bootstrap.h
View file @
58d57301
...
...
@@ -7,7 +7,6 @@
#include "bootstrap_net.h"
#include "thread_pool.h"
#include "ipc_socket.h"
#include "physical_links.h"
namespace
sccl
{
namespace
hardware
{
...
...
@@ -15,7 +14,6 @@ namespace topology {
namespace
bootstrap
{
typedef
sccl
::
hardware
::
net
::
ipc_socket
::
scclIpcSocket_t
scclIpcSocket_t
;
typedef
physical_links
::
scclTopoNode_t
scclTopoNode_t
;
///////////////////////////////////// 用于初始化时的功能函数 //////////////////////////////////////////
scclResult_t
bootstrapGetUniqueId
(
BootstrapHandle_t
*
handle
);
...
...
@@ -56,41 +54,13 @@ typedef struct scclRankInfo {
uint64_t
pidHash
=
0
;
// 进程 ID 哈希值
}
scclRankInfo_t
;
// 定义结构体 scclNodeInfo,用于存储每个rank的图连接信息
// TODO: 目前每个rank需要的node_info大小为4k+,当卡数较大时占用内存较大,可以优化。或者不作为全局变量
typedef
struct
scclNodeInfo
{
scclTopoNode_t
*
nodes
;
// 指向scclTopoNode_t对象数组的指针
int
nLocalRanks
;
int
totalByteSize
;
// 表示占用的总字节数
// 带参数的构造函数,用于初始化nodes的大小
scclNodeInfo
(
int
nLocalRanks
)
:
nodes
(
nullptr
),
nLocalRanks
(
nLocalRanks
),
totalByteSize
(
sizeof
(
scclTopoNode_t
)
*
topoNodeMaxLocalNodes
/
nLocalRanks
)
{
nodes
=
reinterpret_cast
<
scclTopoNode_t
*>
(
malloc
(
totalByteSize
));
if
(
nodes
)
{
memset
(
nodes
,
0
,
totalByteSize
);
}
}
// 析构函数,用于释放申请的数组空间
virtual
~
scclNodeInfo
()
{
if
(
nodes
)
{
free
(
nodes
);
}
}
}
scclNodeInfo_t
;
// 所有节点的信息
typedef
struct
scclRankPhysSet
{
// 构造函数声明
scclRankPhysSet
(
int
nRanks
,
int
nLocalRanks
);
std
::
vector
<
scclRankInfo_t
>
rank_info_vec
;
std
::
vector
<
char
>
node_info_vec
;
// 实际为std::vector<scclNodeInfo_t>,vector不支持scclNodeInfo_t变长
scclRankPhysSet
(
int
nRanks
);
public:
int
nRanks
=
0
;
// 总的节点数量
int
nLocalRanks
=
0
;
// 本地计算节点中的节点总数
size_t
node_info_total_bytes
=
0
;
// 记录可变长度scclNodeInfo_t类型数据的实际大小
std
::
vector
<
scclRankInfo_t
>
rank_info_vec
;
}
scclRankPhysSet_t
;
// BootstrapComm 结构体定义,用于存储引导通信信息
...
...
@@ -126,7 +96,7 @@ public:
scclResult_t
init
(
BootstrapComm_t
*
bootstrap_comm
);
// 实现跨节点的AllGather通信操作
scclResult_t
bootstrapAllGather
(
const
void
*
src_data
,
void
*
dst_data
,
int
data_size
);
scclResult_t
bootstrapAllGather
(
const
void
*
src_data
,
void
*
dst_data
,
int
data_size
)
const
;
private:
// 执行根节点的聚集和广播操作
...
...
@@ -135,17 +105,12 @@ private:
// 初始化节点通信信息
scclResult_t
bootstrapCommInitNodeInfo
(
scclNet_t
*
scclNet
,
scclRankInfo_t
*
rank_info
);
// 实现rank_info信息的节点间通信的AllGather操作
scclResult_t
bootstrapCommAllGather
(
scclRankInfo_t
*
rank_info
,
scclNodeInfo_t
*
node_info
,
scclRankPhysSet_t
*
rank_phys_set
);
// 额外处理nRanks个nodes的连接关系
scclResult_t
bootstrapNodesLink
(
void
*
node_info_vec
,
int
node_info_total_bytes
);
private:
public:
int
rank
,
nRanks
;
// 初始化阶段获取MPI的值
int
localRank
,
nLocalRanks
;
// 通过bootstrapRootGatherAndBroadcast函数确定值
int
interRank
,
nInterRanks
;
// 整个节点在全部节点中的位置
private:
// TODO: 用于控制套接字终端的变量,目前不知道在哪里使用
volatile
uint32_t
*
abortFlag
;
// 中止标志,非阻塞套接字设置
...
...
src/hardware/topology/graph/graph.cpp
View file @
58d57301
This diff is collapsed.
Click to expand it.
src/hardware/topology/graph/graph.h
View file @
58d57301
...
...
@@ -3,6 +3,7 @@
#include <vector>
#include "base.h"
#include "graph_utils.h"
#include "paths.h"
namespace
sccl
{
namespace
hardware
{
...
...
@@ -11,23 +12,37 @@ namespace graph {
class
Graph
{
public:
Graph
(
int
rank
,
int
nRanks
);
Graph
(
const
Bootstrap
*
bootstrap
);
virtual
~
Graph
();
scclResult_t
establishGraph
(
const
BootstrapComm_t
*
bootstrap_comm
);
// 通信路径计算
scclResult_t
calculateCommunicationPaths
(
const
BootstrapComm_t
*
bootstrap_comm
,
scclTopoGraph_t
*
topo_graph
,
Bootstrap
*
sccl_bootstrap
);
scclResult_t
calculateCommunicationPaths
(
const
BootstrapComm_t
*
bootstrap_comm
,
scclTopoGraph_t
*
topo_graph
);
// 逻辑拓扑构建
scclResult_t
build
LogicalTopology
();
scclResult_t
search
LogicalTopology
();
// 根据无向图计算topo路径
scclResult_t
calculateTopoChannels
();
private:
// 额外处理nRanks个nodes的连接关系
scclResult_t
bootstrapNodesLink
(
void
*
node_info_vec
,
int
node_info_total_bytes
);
private:
const
Bootstrap
*
sccl_bootstrap
;
// 为了调用class Bootstrap中的函数
// 记录所有rank中node信息
std
::
vector
<
char
>
node_info_vec
;
// 实际为std::vector<scclNodeInfo_t>,vector不支持scclNodeInfo_t变长
size_t
node_info_total_bytes
=
0
;
// 记录可变长度scclNodeInfo_t类型数据的实际大小
std
::
vector
<
std
::
vector
<
int
>>
adjacencyMatrix
;
// 使用邻接矩阵表示图
// 你可以根据需要添加更多的私有成员变量和函数
// rank信息
int
rank
,
nRanks
;
int
localRank
,
nLocalRanks
;
int
interRank
,
nInterRanks
;
// 整个节点在全部节点中的位置
};
}
// namespace graph
...
...
src/hardware/topology/graph/graph_utils.cpp
0 → 100644
View file @
58d57301
#include <string.h>
#include "graph_utils.h"
namespace
sccl
{
namespace
hardware
{
namespace
topology
{
namespace
graph
{
scclTopoGraph
::
scclTopoGraph
(
int
nRanks
)
:
nRanks
(
nRanks
),
transport_map
(
nullptr
,
0
)
{
// 分配transport_map的内存
uint8_t
*
raw_transport_map
=
static_cast
<
uint8_t
*>
(
calloc
(
nRanks
*
nRanks
,
sizeof
(
uint8_t
)));
if
(
raw_transport_map
==
nullptr
)
{
// 处理内存分配失败的情况
throw
std
::
bad_alloc
();
}
// 使用ByteSpanArray初始化transport_map
transport_map
=
ByteSpanArray
<
uint8_t
>
(
raw_transport_map
,
nRanks
*
nRanks
);
}
scclTopoGraph
::~
scclTopoGraph
()
{
// 释放transport_map的内存
free
(
transport_map
.
data
());
}
// 打印transport_map
scclResult_t
scclTopoGraph
::
printTransportMap
()
{
for
(
int
i
=
0
;
i
<
this
->
nRanks
;
++
i
)
{
for
(
int
j
=
0
;
j
<
this
->
nRanks
;
++
j
)
{
uint8_t
*
value
=
this
->
getTransportMapData
(
i
,
j
);
if
(
value
!=
nullptr
)
{
printf
(
"%d "
,
*
value
);
}
else
{
printf
(
"nullptr "
);
}
}
printf
(
"
\n
"
);
}
return
scclSuccess
;
}
// 打印gpu_paths信息的函数
scclResult_t
scclTopoGraph
::
printGPUPaths
()
{
for
(
const
auto
&
start_pair
:
gpu_paths
)
{
uint64_t
start_node_id
=
start_pair
.
first
;
auto
start_node_it
=
graph_nodes
.
find
(
start_node_id
);
if
(
start_node_it
!=
graph_nodes
.
end
())
{
std
::
cout
<<
"Paths starting from node: "
;
start_node_it
->
second
.
printNodeInfo
(
"Start Node"
);
}
else
{
std
::
cout
<<
"Start node ID "
<<
start_node_id
<<
" not found in graph nodes."
<<
std
::
endl
;
continue
;
}
for
(
const
auto
&
end_pair
:
start_pair
.
second
)
{
uint64_t
end_node_id
=
end_pair
.
first
;
auto
end_node_it
=
graph_nodes
.
find
(
end_node_id
);
if
(
end_node_it
!=
graph_nodes
.
end
())
{
std
::
cout
<<
" to node: "
;
end_node_it
->
second
.
printNodeInfo
(
"End Node"
);
}
else
{
std
::
cout
<<
" End node ID "
<<
end_node_id
<<
" not found in graph nodes."
<<
std
::
endl
;
continue
;
}
std
::
cout
<<
" Paths:"
<<
std
::
endl
;
for
(
const
auto
&
path
:
end_pair
.
second
)
{
std
::
cout
<<
" Path: "
;
for
(
const
auto
&
node_id
:
path
)
{
auto
node_it
=
graph_nodes
.
find
(
node_id
);
if
(
node_it
!=
graph_nodes
.
end
())
{
node_it
->
second
.
printNodeInfo
(
" "
);
}
else
{
std
::
cout
<<
" Node ID "
<<
node_id
<<
" not found in graph nodes."
<<
std
::
endl
;
}
}
std
::
cout
<<
std
::
endl
;
}
}
}
return
scclSuccess
;
}
}
// namespace graph
}
// namespace topology
}
// namespace hardware
}
// namespace sccl
src/hardware/topology/graph/graph_utils.h
View file @
58d57301
...
...
@@ -3,16 +3,41 @@
#include <string.h>
#include "base.h"
#include "bootstrap.h"
#include "physical_links.h"
namespace
sccl
{
namespace
hardware
{
namespace
topology
{
namespace
graph
{
typedef
bootstrap
::
physical_links
::
scclTopoNode_t
scclTopoNode_t
;
typedef
bootstrap
::
scclNodeInfo_t
scclNodeInfo
_t
;
typedef
physical_links
::
scclTopoNode_t
scclTopoNode
_t
;
typedef
bootstrap
::
BootstrapComm_t
BootstrapComm_t
;
typedef
topology
::
bootstrap
::
Bootstrap
Bootstrap
;
// 定义结构体 scclNodeInfo,用于存储每个rank的图连接信息
// TODO: 目前每个rank需要的node_info大小为4k+,当卡数较大时占用内存较大,可以优化。或者不作为全局变量
typedef
struct
scclNodeInfo
{
scclTopoNode_t
*
nodes
;
// 指向scclTopoNode_t对象数组的指针
int
nLocalRanks
;
int
totalByteSize
;
// 表示占用的总字节数
// 带参数的构造函数,用于初始化nodes的大小
scclNodeInfo
(
int
nLocalRanks
)
:
nodes
(
nullptr
),
nLocalRanks
(
nLocalRanks
),
totalByteSize
(
sizeof
(
scclTopoNode_t
)
*
topoNodeMaxLocalNodes
/
nLocalRanks
)
{
nodes
=
reinterpret_cast
<
scclTopoNode_t
*>
(
malloc
(
totalByteSize
));
if
(
nodes
)
{
memset
(
nodes
,
0
,
totalByteSize
);
}
}
// 析构函数,用于释放申请的数组空间
virtual
~
scclNodeInfo
()
{
if
(
nodes
)
{
free
(
nodes
);
}
}
}
scclNodeInfo_t
;
//////////////////////////////////////////////////////////////////////////////////////////////////
// 定义 topoPathType_t 枚举类型,用于表示不同的路径类型。
typedef
enum
topoPathType
{
PATH_LOC
=
0
,
// 本地路径
...
...
@@ -20,7 +45,7 @@ typedef enum topoPathType {
PATH_NVB
=
2
,
// 通过中间 GPU 使用 NVLink 连接
PATH_PIX
=
3
,
// 通过最多一个 PCIe 桥连接
PATH_PXB
=
4
,
// 通过多个 PCIe 桥连接(不经过 PCIe 主桥)
PATH_PXN
=
5
,
// GPU 和 NIC 之间通过中间 GPU 连接
PATH_PXN
=
5
,
// GPU 和 NIC 之间通过中间 GPU 连接
, PXN = PCI + NVLink
PATH_PHB
=
6
,
// 通过 PCIe 以及 PCIe 主桥连接
PATH_SYS
=
7
,
// 通过 PCIe 以及 NUMA 节点之间的 SMP 互连连接
PATH_NET
=
8
,
// 通过网络连接
...
...
@@ -39,44 +64,22 @@ typedef enum LinkType : uint8_t {
typedef
struct
scclTopoGraph
{
scclTopoGraph
()
=
delete
;
// 删除默认构造函数
scclTopoGraph
(
int
nRanks
)
:
nRanks
(
nRanks
),
transport_map
(
nullptr
,
0
)
{
// 分配transport_map的内存
uint8_t
*
raw_transport_map
=
static_cast
<
uint8_t
*>
(
calloc
(
nRanks
*
nRanks
,
sizeof
(
uint8_t
)));
if
(
raw_transport_map
==
nullptr
)
{
// 处理内存分配失败的情况
throw
std
::
bad_alloc
();
}
// 使用ByteSpanArray初始化transport_map
transport_map
=
ByteSpanArray
<
uint8_t
>
(
raw_transport_map
,
nRanks
*
nRanks
);
}
virtual
~
scclTopoGraph
()
{
// 释放transport_map的内存
free
(
transport_map
.
data
());
}
scclTopoGraph
(
int
nRanks
);
virtual
~
scclTopoGraph
();
uint8_t
*
getTransportMapRowStart
(
int
row
)
{
return
transport_map
[
row
*
nRanks
];
}
uint8_t
*
getTransportMapData
(
int
row
,
int
col
)
{
return
transport_map
[
row
*
nRanks
+
col
];
}
// 打印transport_map
scclResult_t
printTransportMap
()
{
for
(
int
i
=
0
;
i
<
this
->
nRanks
;
++
i
)
{
for
(
int
j
=
0
;
j
<
this
->
nRanks
;
++
j
)
{
uint8_t
*
value
=
this
->
getTransportMapData
(
i
,
j
);
if
(
value
!=
nullptr
)
{
printf
(
"%d "
,
*
value
);
}
else
{
printf
(
"nullptr "
);
}
}
printf
(
"
\n
"
);
}
return
scclSuccess
;
}
scclResult_t
printTransportMap
();
// 打印gpu_paths信息的函数
scclResult_t
printGPUPaths
();
public:
// 使用无序映射存储图的有效节点
std
::
unordered_map
<
uint64_t
,
scclTopoNode_t
>
graph_nodes
;
// 使用无序映射存储从每个GPU节点到其他GPU节点的所有路径,[start_node_id][end_node_id] = {path1, path2}
// 使用无序映射存储从每个GPU节点到其他GPU节点的所有路径,[start_node_id][end_node_id] = {path1, path2
, ...
}
std
::
unordered_map
<
uint64_t
,
std
::
unordered_map
<
uint64_t
,
std
::
vector
<
std
::
vector
<
uint64_t
>>>>
gpu_paths
;
// 传输位图
...
...
src/hardware/topology/graph/paths.cpp
View file @
58d57301
...
...
@@ -6,15 +6,15 @@ namespace hardware {
namespace
topology
{
namespace
graph
{
PathFinder
::
PathFinder
(
const
BootstrapComm_t
*
bootstrap_comm
)
PathFinder
::
PathFinder
(
const
BootstrapComm_t
*
bootstrap_comm
,
std
::
vector
<
char
>&
node_info_vec
,
size_t
node_info_total_bytes
)
:
rank
(
bootstrap_comm
->
rank
),
nRanks
(
bootstrap_comm
->
nRanks
),
localRank
(
bootstrap_comm
->
localRank
),
nLocalRanks
(
bootstrap_comm
->
nLocalRanks
),
interRank
(
bootstrap_comm
->
interRank
),
nInterRanks
(
bootstrap_comm
->
nInterRanks
),
node_container_
(
bootstrap_comm
->
r
ank
_phys_set
->
node_info_vec
.
data
(),
bootstrap_comm
->
nRanks
*
bootstrap_comm
->
rank_phys_set
->
node_info_total_bytes
)
{
// 初始化NodeContainer对象
node_container_
(
node_info_vec
.
data
(),
bootstrap_comm
->
nR
ank
s
*
node_info_total_bytes
)
{
// 初始化NodeContainer对象
printf
(
"get PathFinder, node_container_=%zu
\n
"
,
node_container_
.
size
());
for
(
size_t
i
=
0
;
i
<
node_container_
.
size
();
++
i
)
{
scclTopoNode_t
*
node
=
node_container_
[
i
];
...
...
@@ -36,7 +36,7 @@ PathFinder::PathFinder(const BootstrapComm_t* bootstrap_comm)
const scclTopoNode_t* node = node_container_[index];
int interRank, deviceValue, terminalType, hipDev, numaId;
bootstrap::
physical_links::getIdComponents(node_id, &interRank, &deviceValue, &terminalType, &hipDev, &numaId);
physical_links::getIdComponents(node_id, &interRank, &deviceValue, &terminalType, &hipDev, &numaId);
char busIdStr[17];
int64ToBusId(node->busId, busIdStr);
printf("rank=%d, node=(InterRank:%d, V:%d, T:%d, H:%d, N:%d, type:%d, busIdStr:%s), neighbor_count=%zu",
...
...
@@ -54,7 +54,7 @@ PathFinder::PathFinder(const BootstrapComm_t* bootstrap_comm)
uint64_t neighbor_id = node->neighbors[n];
const scclTopoNode_t* neighbor_node = findNodeById(neighbor_id);
if(neighbor_node) {
bootstrap::
physical_links::getIdComponents(neighbor_id, &interRank, &deviceValue, &terminalType, &hipDev, &numaId);
physical_links::getIdComponents(neighbor_id, &interRank, &deviceValue, &terminalType, &hipDev, &numaId);
int64ToBusId(neighbor_node->busId, busIdStr);
printf(", neighbor[%d]=(InterRank:%d, V:%d, T:%d, H:%d, N:%d, type:%d, busIdStr:%s)",
...
...
@@ -75,10 +75,36 @@ PathFinder::PathFinder(const BootstrapComm_t* bootstrap_comm)
}
#endif
// 查找当前rank对应的其他GPU节点的所有路径
printf
(
"PathFinder pos 1
\n
"
);
findGpuPaths
();
printf
(
"PathFinder pos 2
\n
"
);
// 查找当前rank对应的GPU的node,并执行BFS搜索,查找到其他所有GPU node的路径
for
(
const
auto
&
pair
:
id_to_index_
)
{
uint64_t
id
=
pair
.
first
;
size_t
index
=
pair
.
second
;
// 定位到node
scclTopoNode_t
*
node
=
node_container_
[
index
];
int
nodeInterRank
,
nodeHipDev
;
physical_links
::
getIdComponents
(
node
->
id
,
&
nodeInterRank
,
nullptr
,
nullptr
,
&
nodeHipDev
,
nullptr
);
if
(
node
->
type
==
GPU
&&
nodeInterRank
==
this
->
interRank
&&
nodeHipDev
==
this
->
localRank
)
{
// printf("bfsFindGpuPaths start_node_id=%lu, running\n", node->id);
bfsFindGpuPaths
(
node
->
id
);
}
}
#if 1
if
(
rank
==
1
)
{
printGpuPaths
();
}
#endif
}
int
getGpuRankFromNodeId
(
uint64_t
node_id
,
int
nLocalRanks
)
{
int
interRank
,
hipDev
;
// 调用 getIdComponents 函数获取 interRank 和 hipDev
physical_links
::
getIdComponents
(
node_id
,
&
interRank
,
nullptr
,
nullptr
,
&
hipDev
,
nullptr
);
// 计算并返回 gpu_rank
int
gpu_rank
=
interRank
*
nLocalRanks
+
hipDev
;
printf
(
"node_id=%lu, interRank=%d, hipDev=%d, gpu_rank=%d
\n
"
,
node_id
,
interRank
,
hipDev
,
gpu_rank
);
return
gpu_rank
;
}
/**
...
...
@@ -124,84 +150,68 @@ scclResult_t PathFinder::computeTopoGpuP2pMap(scclTopoGraph_t* topo_graph) {
// 记录bitmap
LinkType_t
link_type
;
int
start_gpu_rank
,
end_gpu_rank
;
{
// 根据路径中途径的节点点确定连接方式的类型
SCCLCHECK
(
determineLinkType
(
path
,
&
link_type
));
// 获取gpu的rank
int
start_gpu_rank
=
getGpuRankFromNodeId
(
start_node_id
,
nLocalRanks
);
int
end_gpu_rank
=
getGpuRankFromNodeId
(
end_node_id
,
nLocalRanks
);
int
start_interRank
,
start_hipDev
;
int
end_interRank
,
end_hipDev
;
bootstrap
::
physical_links
::
getIdComponents
(
start_node_id
,
&
start_interRank
,
nullptr
,
nullptr
,
&
start_hipDev
,
nullptr
);
bootstrap
::
physical_links
::
getIdComponents
(
end_node_id
,
&
end_interRank
,
nullptr
,
nullptr
,
&
end_hipDev
,
nullptr
);
start_gpu_rank
=
start_interRank
*
nLocalRanks
+
start_hipDev
;
end_gpu_rank
=
end_interRank
*
nLocalRanks
+
end_hipDev
;
#if 0
printf("rank=%d, interRank=%d, localRank=%d: start_interRank=%d, start_hipDev=%d, end_interRank=%d, end_hipDev=%d, link_type=%d\n",
rank,
interRank,
localRank,
start_interRank,
start_hipDev,
end_interRank,
end_hipDev,
static_cast<int>(link_type));
#endif
}
// 查找transport_map中的起始和结束节点
uint8_t
*
transport_map_pt
=
topo_graph
->
getTransportMapData
(
start_gpu_rank
,
end_gpu_rank
);
// 将连接方式的类型存储在transport_map中
if
(
*
(
topo_graph
->
getTransportMapData
(
start_gpu_rank
,
end_gpu_rank
))
>
0
&&
link_type
>
0
)
{
if
(
link_type
<
static_cast
<
LinkType_t
>
(
*
(
topo_graph
->
getTransportMapData
(
start_gpu_rank
,
end_gpu_rank
))
))
{
*
(
topo_graph
->
getTransportMapData
(
start_gpu_rank
,
end_gpu_rank
))
=
link_type
;
if
(
*
transport_map_pt
>
0
&&
link_type
>
0
)
{
if
(
link_type
<
static_cast
<
LinkType_t
>
(
*
transport_map_pt
))
{
*
transport_map_pt
=
link_type
;
// 清空之前的路径
topo_graph
->
gpu_paths
[
start_node_id
][
end_node_id
].
clear
();
// 添加新的路径
topo_graph
->
gpu_paths
[
start_node_id
][
end_node_id
].
push_back
(
path
);
}
else
if
(
link_type
==
static_cast
<
LinkType_t
>
(
*
(
topo_graph
->
getTransportMapData
(
start_gpu_rank
,
end_gpu_rank
))
))
{
}
else
if
(
link_type
==
static_cast
<
LinkType_t
>
(
*
transport_map_pt
))
{
// 添加新的路径
topo_graph
->
gpu_paths
[
start_node_id
][
end_node_id
].
push_back
(
path
);
}
}
else
{
*
(
topo_graph
->
getTransportMapData
(
start_gpu_rank
,
end_gpu_rank
))
=
static_cast
<
uint8_t
>
(
link_type
);
*
transport_map_pt
=
static_cast
<
uint8_t
>
(
link_type
);
// 添加新的路径
topo_graph
->
gpu_paths
[
start_node_id
][
end_node_id
].
push_back
(
path
);
}
#if 0
{
char start_busIdStr[17] = ""; // 用于存储总线ID字符串
// 根据起始节点的ID查找对应的节点对象
const scclTopoNode_t* start_node = findNodeById(start_node_id);
// 如果找到了对应的节点对象,则将其总线ID转换为字符串
if(start_node) {
int64ToBusId(start_node->busId, start_busIdStr);
}
char end_busIdStr[17] = ""; // 用于存储总线ID字符串
// 根据起始节点的ID查找对应的节点对象
const scclTopoNode_t* end_node = findNodeById(end_node_id);
// 如果找到了对应的节点对象,则将其总线ID转换为字符串
if(end_node) {
int64ToBusId(end_node->busId, end_busIdStr);
}
return
scclSuccess
;
}
/////////////////////////////////////////////////////////////////////////////////////////////
/**
* @brief 查找当前rank对应的其他GPU节点的所有路径
*
* 该函数用于查找当前rank对应的GPU节点的所有路径。它遍历`id_to_index_`中的所有节点ID和索引对,
* 对于每一个节点,如果该节点是GPU类型,并且属于当前rank的进程,则调用`bfsFindGpuPaths`函数执行广度优先搜索(BFS),
* 查找到其他所有GPU节点的路径。最后,如果当前rank为1,则调用`printGpuPaths`函数打印所有GPU路径。
*/
void
PathFinder
::
findGpuPaths
()
{
// 查找当前rank对应的GPU的node,并执行BFS搜索,查找到其他所有GPU node的路径
for
(
const
auto
&
pair
:
id_to_index_
)
{
uint64_t
id
=
pair
.
first
;
size_t
index
=
pair
.
second
;
// 定位到node
scclTopoNode_t
*
node
=
node_container_
[
index
];
int
nodeInterRank
,
nodeHipDev
;
bootstrap
::
physical_links
::
getIdComponents
(
node
->
id
,
&
nodeInterRank
,
nullptr
,
nullptr
,
&
nodeHipDev
,
nullptr
);
if
(
node
->
type
==
GPU
&&
nodeInterRank
==
this
->
interRank
&&
nodeHipDev
==
this
->
localRank
)
{
// printf("bfsFindGpuPaths start_node_id=%lu, running\n", node->id);
bfsFindGpuPaths
(
node
->
id
);
printf("nLocalRanks=%d, start_node_id=%lu, busIdStr=%s, end_node_id=%lu, busIdStr=%s\n"
"start_gpu_rank: %d, end_gpu_rank: %d, link_type: %d, paths count: %zu\n",
nLocalRanks,
start_node_id,
start_busIdStr,
end_node_id,
end_busIdStr,
start_gpu_rank,
end_gpu_rank,
*(topo_graph->getTransportMapData(start_gpu_rank, end_gpu_rank)),
topo_graph->gpu_paths[start_node_id][end_node_id].size());
}
#endif
}
#if 1
if
(
rank
==
1
)
{
printGpuPaths
();
}
#endif
return
scclSuccess
;
}
/////////////////////////////////////////////////////////////////////////////////////////////
/**
* @brief 根据节点ID查找节点
*
...
...
@@ -231,7 +241,6 @@ const scclTopoNode_t* PathFinder::findNodeById(uint64_t id) const {
*
* @param start_node_id 起始GPU节点的ID
*/
#if 1
void
PathFinder
::
bfsFindGpuPaths
(
uint64_t
start_node_id
)
{
// 使用一个队列来存储当前路径
std
::
queue
<
std
::
vector
<
uint64_t
>>
queue
;
...
...
@@ -259,14 +268,14 @@ void PathFinder::bfsFindGpuPaths(uint64_t start_node_id) {
// 如果当前节点是GPU节点且不是起始节点,则将当前路径加入结果
if
(
current_node
->
type
==
GPU
&&
nodeId
!=
start_node_id
)
{
int
hipDev
;
bootstrap
::
physical_links
::
getIdComponents
(
current_node
->
id
,
nullptr
,
nullptr
,
nullptr
,
&
hipDev
,
nullptr
);
physical_links
::
getIdComponents
(
current_node
->
id
,
nullptr
,
nullptr
,
nullptr
,
&
hipDev
,
nullptr
);
// 仅当节点内的device id小于等于nLocalRanks时,才是有效GPU,才将路径加入结果
if
(
hipDev
<
nLocalRanks
)
{
gpu_paths_
[
start_node_id
].
push_back
(
path
);
}
}
else
{
int
nodeInterRank
;
bootstrap
::
physical_links
::
getIdComponents
(
nodeId
,
&
nodeInterRank
);
physical_links
::
getIdComponents
(
nodeId
,
&
nodeInterRank
);
// 遍历当前节点的所有邻居节点
for
(
uint64_t
neighbor_id
:
graph_node_neighbors_
.
at
(
nodeId
))
{
if
(
findNodeById
(
neighbor_id
)
==
nullptr
)
{
...
...
@@ -274,7 +283,7 @@ void PathFinder::bfsFindGpuPaths(uint64_t start_node_id) {
}
// 获取邻居节点的interRank
int
neighbor_inter_rank
;
bootstrap
::
physical_links
::
getIdComponents
(
neighbor_id
,
&
neighbor_inter_rank
);
physical_links
::
getIdComponents
(
neighbor_id
,
&
neighbor_inter_rank
);
// 检查邻居节点是否已在当前路径中访问过
bool
visited
=
std
::
find
(
path
.
begin
(),
path
.
end
(),
neighbor_id
)
!=
path
.
end
();
...
...
@@ -302,141 +311,6 @@ void PathFinder::bfsFindGpuPaths(uint64_t start_node_id) {
}
}
#else
void
PathFinder
::
bfsFindGpuPaths
(
uint64_t
start_node_id
)
{
// 使用一个队列来存储当前路径
std
::
queue
<
std
::
vector
<
uint64_t
>>
queue
;
// 使用一个unordered_map来存储每个node的最短路径
std
::
unordered_map
<
uint64_t
,
std
::
vector
<
uint64_t
>>
shortest_paths
;
// 将起始节点加入队列
queue
.
push
({
start_node_id
});
shortest_paths
[
start_node_id
]
=
{
start_node_id
};
// 当队列不为空时,继续搜索
while
(
!
queue
.
empty
())
{
// 从队列中取出一个路径
auto
path
=
queue
.
front
();
queue
.
pop
();
// 获取当前路径的最后一个节点的ID
uint64_t
nodeId
=
path
.
back
();
// 根据节点ID查找对应的节点
const
scclTopoNode_t
*
current_node
=
findNodeById
(
nodeId
);
if
(
current_node
==
nullptr
)
{
continue
;
}
// 如果当前节点是GPU节点且不是起始节点,则将当前路径加入结果
if
(
current_node
->
type
==
GPU
&&
nodeId
!=
start_node_id
)
{
int
hipDev
;
bootstrap
::
physical_links
::
getIdComponents
(
current_node
->
id
,
nullptr
,
nullptr
,
nullptr
,
&
hipDev
,
nullptr
);
if
(
hipDev
<
nLocalRanks
)
{
gpu_paths_
[
start_node_id
].
push_back
(
path
);
}
}
else
{
int
nodeInterRank
;
bootstrap
::
physical_links
::
getIdComponents
(
nodeId
,
&
nodeInterRank
);
// 遍历当前节点的所有邻居节点
for
(
uint64_t
neighbor_id
:
graph_node_neighbors_
.
at
(
nodeId
))
{
if
(
findNodeById
(
neighbor_id
)
==
nullptr
)
{
continue
;
}
// 获取邻居节点的interRank
int
neighbor_inter_rank
;
bootstrap
::
physical_links
::
getIdComponents
(
neighbor_id
,
&
neighbor_inter_rank
);
// 检查邻居节点是否已在当前路径中访问过
bool
visited
=
std
::
find
(
path
.
begin
(),
path
.
end
(),
neighbor_id
)
!=
path
.
end
();
// 检查interRank是否已经存在(仅当interRank改变时)
bool
inter_rank_exists
=
false
;
if
(
neighbor_inter_rank
!=
nodeInterRank
)
{
for
(
uint64_t
node_id
:
path
)
{
if
(
node_id
==
neighbor_id
)
{
inter_rank_exists
=
true
;
break
;
}
}
}
// 如果邻居节点未访问过且interRank未存在,则扩展路径
if
(
!
visited
&&
!
inter_rank_exists
)
{
std
::
vector
<
uint64_t
>
new_path
=
path
;
new_path
.
push_back
(
neighbor_id
);
// 如果新路径比已有的最短路径更短,则更新最短路径
if
(
shortest_paths
.
find
(
neighbor_id
)
==
shortest_paths
.
end
()
||
shortest_paths
[
neighbor_id
].
size
()
>
new_path
.
size
())
{
shortest_paths
[
neighbor_id
]
=
new_path
;
queue
.
push
(
new_path
);
}
}
}
}
}
}
void
PathFinder
::
bfsFindGpuPaths
(
uint64_t
start_node_id
)
{
// 使用一个队列来存储当前路径
std
::
queue
<
std
::
vector
<
uint64_t
>>
queue
;
// 将起始节点加入队列
queue
.
push
({
start_node_id
});
// 当队列不为空时,继续搜索
while
(
!
queue
.
empty
())
{
// 从队列中取出一个路径
auto
path
=
queue
.
front
();
queue
.
pop
();
// 获取当前路径的最后一个节点的ID
uint64_t
nodeId
=
path
.
back
();
// 根据节点ID查找对应的节点
const
scclTopoNode_t
*
current_node
=
findNodeById
(
nodeId
);
if
(
current_node
==
nullptr
)
{
continue
;
}
// 如果当前节点是GPU节点且不是起始节点,则将当前路径加入结果
if
(
current_node
->
type
==
GPU
&&
nodeId
!=
start_node_id
)
{
int
hipDev
;
bootstrap
::
physical_links
::
getIdComponents
(
current_node
->
id
,
nullptr
,
nullptr
,
nullptr
,
&
hipDev
,
nullptr
);
if
(
hipDev
<
nLocalRanks
)
{
gpu_paths_
[
start_node_id
].
push_back
(
path
);
}
}
else
{
int
nodeInterRank
;
bootstrap
::
physical_links
::
getIdComponents
(
nodeId
,
&
nodeInterRank
);
// 遍历当前节点的所有邻居节点
for
(
uint64_t
neighbor_id
:
graph_node_neighbors_
.
at
(
nodeId
))
{
if
(
findNodeById
(
nodeId
)
==
nullptr
)
{
continue
;
}
// 获取邻居节点的interRank
int
neighbor_inter_rank
;
bootstrap
::
physical_links
::
getIdComponents
(
neighbor_id
,
&
neighbor_inter_rank
);
// 检查邻居节点是否已在当前路径中访问过
bool
visited
=
std
::
find
(
path
.
begin
(),
path
.
end
(),
neighbor_id
)
!=
path
.
end
();
// 检查interRank是否已经存在(仅当interRank改变时)
bool
inter_rank_exists
=
false
;
if
(
neighbor_inter_rank
!=
(
nodeInterRank
))
{
for
(
uint64_t
node_id
:
path
)
{
if
((
nodeInterRank
)
==
neighbor_inter_rank
)
{
inter_rank_exists
=
true
;
break
;
}
}
}
// 如果邻居节点未访问过且interRank未存在,则扩展路径
if
(
!
visited
&&
!
inter_rank_exists
)
{
std
::
vector
<
uint64_t
>
new_path
=
path
;
new_path
.
push_back
(
neighbor_id
);
queue
.
push
(
new_path
);
}
}
}
}
}
#endif
/**
* @brief 打印GPU路径信息
*
...
...
@@ -463,7 +337,7 @@ void PathFinder::printGpuPaths() {
int
interRank
,
deviceValue
,
terminalType
,
hipDev
,
numaId
;
// 根据起始节点的ID获取其interRank、deviceValue、terminalType和numaId
bootstrap
::
physical_links
::
getIdComponents
(
start_node_id
,
&
interRank
,
&
deviceValue
,
&
terminalType
,
&
hipDev
,
&
numaId
);
physical_links
::
getIdComponents
(
start_node_id
,
&
interRank
,
&
deviceValue
,
&
terminalType
,
&
hipDev
,
&
numaId
);
printf
(
"GPU node ID:%lu (InterRank:%d, V:%d, T:%d, H:%d, N:%d) (Path count: %zu)
\n
"
,
start_node_id
,
interRank
,
...
...
@@ -486,7 +360,7 @@ void PathFinder::printGpuPaths() {
const
scclTopoNode_t
*
node
=
findNodeById
(
node_id
);
if
(
node
)
{
// 根据节点的ID获取其interRank、deviceValue、terminalType和numaId
bootstrap
::
physical_links
::
getIdComponents
(
node
->
id
,
&
interRank
,
&
deviceValue
,
&
terminalType
,
&
hipDev
,
&
numaId
);
physical_links
::
getIdComponents
(
node
->
id
,
&
interRank
,
&
deviceValue
,
&
terminalType
,
&
hipDev
,
&
numaId
);
// 将节点的总线ID转换为字符串
int64ToBusId
(
node
->
busId
,
busIdStr
);
// 打印节点的信息,包括其interRank、deviceValue、terminalType、numaId、类型和总线ID字符串
...
...
src/hardware/topology/graph/paths.h
View file @
58d57301
...
...
@@ -13,21 +13,21 @@ namespace hardware {
namespace
topology
{
namespace
graph
{
// 设置Path路径直接link的 bandwidth 和 speed
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class
PathFinder
{
public:
// 构造函数
PathFinder
(
const
BootstrapComm_t
*
bootstrap_comm
);
PathFinder
(
const
BootstrapComm_t
*
bootstrap_comm
,
std
::
vector
<
char
>&
node_info_vec
,
size_t
node_info_total_bytes
);
// 计算拓扑图中GPU节点之间的点对点映射
scclResult_t
computeTopoGpuP2pMap
(
scclTopoGraph_t
*
graph
);
// 计算拓扑图中GPU节点之间的点对点映射
,结果保存在graph中
scclResult_t
computeTopoGpuP2pMap
(
scclTopoGraph_t
*
topo_
graph
);
// 打印函数
void
printGpuPaths
();
private:
// 获取所有GPU到GPU的路径函数
void
findGpuPaths
();
// 使用广度优先搜索(BFS)查找从起始GPU节点到其他GPU节点的最短路径
void
bfsFindGpuPaths
(
uint64_t
start_node_id
);
...
...
@@ -53,6 +53,9 @@ private:
int
nInterRanks
=
0
;
// 全局拥有节点的个数
};
// 根据 node_id 获取 gpu_rank
int
getGpuRankFromNodeId
(
uint64_t
node_id
,
int
nLocalRanks
);
}
// namespace graph
}
// namespace topology
}
// namespace hardware
...
...
src/hardware/topology/
bootst
rap/physical_links.cpp
→
src/hardware/topology/
g
rap
h
/physical_links.cpp
View file @
58d57301
...
...
@@ -4,7 +4,7 @@
namespace
sccl
{
namespace
hardware
{
namespace
topology
{
namespace
bootst
rap
{
namespace
g
rap
h
{
namespace
physical_links
{
constexpr
int
numaIdStrLen
=
10
;
...
...
@@ -726,7 +726,7 @@ void printTopoNode(ByteSpanArray<scclTopoNode_t>& nodes, int nodeIndex, const ch
}
}
// namespace physical_links
}
// namespace
bootst
rap
}
// namespace
g
rap
h
}
// namespace topology
}
// namespace hardware
}
// namespace sccl
\ No newline at end of file
src/hardware/topology/
bootst
rap/physical_links.h
→
src/hardware/topology/
g
rap
h
/physical_links.h
View file @
58d57301
...
...
@@ -13,12 +13,14 @@
#include <filesystem> // 需要C++17支持
#include "container.h"
#include "bootstrap
_utils
.h"
#include "bootstrap.h"
namespace
sccl
{
namespace
hardware
{
namespace
topology
{
namespace
bootstrap
{
namespace
graph
{
typedef
sccl
::
hardware
::
net
::
scclNet_t
scclNet_t
;
constexpr
size_t
topoNodeMaxLocalNodes
=
128
;
// 每个节点最多的node数量
constexpr
size_t
topoNodeMaxNeighbors
=
16
;
// 每个node最多neighbor数量
...
...
@@ -70,7 +72,7 @@ scclResult_t generate_topo_nodes(const char* pciPath, int interRank, int hipDev,
// 根据numaId获取pci路径
std
::
string
generate_topo_node_numa_info
(
int
numaId
);
// 输出id分解后的所有数据
// 输出
node
id分解后的所有数据
void
getIdComponents
(
uint64_t
idToDecompose
,
int
*
interRank
=
nullptr
,
int
*
deviceValue
=
nullptr
,
int
*
terminalType
=
nullptr
,
int
*
hipDev
=
nullptr
,
int
*
numaId
=
nullptr
);
...
...
@@ -82,7 +84,7 @@ char* getNetPciPath(scclNet_t* scclNet, int hipDev);
void
printTopoNode
(
ByteSpanArray
<
scclTopoNode_t
>&
nodes
,
int
nodeIndex
,
const
char
*
prefix
);
}
// namespace physical_links
}
// namespace
bootst
rap
}
// namespace
g
rap
h
}
// namespace topology
}
// namespace hardware
}
// namespace sccl
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment