Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
lishen01
Sccl
Commits
571a75b5
"vscode:/vscode.git/clone" did not exist on "9632ab1d7c27a3aa63f4c2470ecb99ad85edc70a"
Commit
571a75b5
authored
Aug 09, 2025
by
lishen
Browse files
完成全部网络的node建立,以及GPU到GPU的path物理路径搜索
parent
379c4128
Changes
44
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
965 additions
and
573 deletions
+965
-573
examples/1_connection/3_sccl_ipc_socket/1_socket_mpi_fd.cpp
examples/1_connection/3_sccl_ipc_socket/1_socket_mpi_fd.cpp
+25
-40
examples/1_connection/3_sccl_ipc_socket/3_socket_mpi_data.cpp
...ples/1_connection/3_sccl_ipc_socket/3_socket_mpi_data.cpp
+83
-82
examples/1_connection/3_sccl_ipc_socket/4_socket_mpi_data_sccl.cpp
...1_connection/3_sccl_ipc_socket/4_socket_mpi_data_sccl.cpp
+0
-76
examples/2_topo/1_demo_rocm/1_test_rocm_smi.cpp
examples/2_topo/1_demo_rocm/1_test_rocm_smi.cpp
+0
-0
examples/2_topo/1_demo_rocm/2_test_pci_info.cpp
examples/2_topo/1_demo_rocm/2_test_pci_info.cpp
+15
-0
examples/2_topo/1_demo_rocm/compile1.sh
examples/2_topo/1_demo_rocm/compile1.sh
+2
-2
examples/2_topo/1_demo_rocm/compile2.sh
examples/2_topo/1_demo_rocm/compile2.sh
+15
-0
examples/2_topo/2_bootstrap/1_mpi_init.cpp
examples/2_topo/2_bootstrap/1_mpi_init.cpp
+16
-19
examples/2_topo/2_bootstrap/2_mpi_init_mpi_init_step1_bootstrap.cpp
..._topo/2_bootstrap/2_mpi_init_mpi_init_step1_bootstrap.cpp
+92
-0
examples/2_topo/2_bootstrap/3_mpi_init_mpi_init_step2_graph.cpp
...es/2_topo/2_bootstrap/3_mpi_init_mpi_init_step2_graph.cpp
+60
-0
examples/2_topo/2_bootstrap/compile_mpi1.sh
examples/2_topo/2_bootstrap/compile_mpi1.sh
+4
-0
examples/2_topo/2_bootstrap/compile_mpi2_init_step1.sh
examples/2_topo/2_bootstrap/compile_mpi2_init_step1.sh
+59
-0
examples/2_topo/2_bootstrap/compile_mpi3_init_step2.sh
examples/2_topo/2_bootstrap/compile_mpi3_init_step2.sh
+61
-0
examples/2_topo/2_bootstrap/hostfile2
examples/2_topo/2_bootstrap/hostfile2
+1
-1
src/hardware/hardware.cpp
src/hardware/hardware.cpp
+17
-53
src/hardware/hardware.h
src/hardware/hardware.h
+3
-4
src/hardware/hardware_utils.h
src/hardware/hardware_utils.h
+2
-3
src/hardware/net/ipc_socket/ipc_socket.cpp
src/hardware/net/ipc_socket/ipc_socket.cpp
+493
-282
src/hardware/net/ipc_socket/ipc_socket.h
src/hardware/net/ipc_socket/ipc_socket.h
+13
-9
src/hardware/net/net_ib/net_ib.cpp
src/hardware/net/net_ib/net_ib.cpp
+4
-2
No files found.
examples/1_connection/3_sccl_ipc_socket/1_socket_mpi_fd.cpp
View file @
571a75b5
...
...
@@ -27,40 +27,6 @@ using namespace sccl;
typedef
class
sccl
::
hardware
::
net
::
ipc_socket
::
scclIpcSocket
scclIpcSocket_t
;
int
ipcSendRecvFd_nrank2
(
int
argc
,
char
*
argv
[])
{
MPI_Init
(
&
argc
,
&
argv
);
int
rank
;
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
int
dst_hash
=
12345
;
scclIpcSocket_t
ipcsocket
(
rank
,
dst_hash
);
if
(
rank
==
0
)
{
// 进程 0: 打开文件并发送文件描述符
int
fd
=
open
(
"testfile.txt"
,
O_RDONLY
);
if
(
fd
<
0
)
{
perror
(
"Failed to open file"
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
ipcsocket
.
scclIpcSocketSendFd
(
fd
,
1
,
12345
);
// 假设 dst_hash 为 12345
close
(
fd
);
}
else
if
(
rank
==
1
)
{
// 进程 1: 接收文件描述符并读取文件内容
int
fd
;
ipcsocket
.
scclIpcSocketRecvFd
(
&
fd
);
char
buffer
[
256
];
ssize_t
n
=
read
(
fd
,
buffer
,
sizeof
(
buffer
)
-
1
);
if
(
n
>
0
)
{
buffer
[
n
]
=
'\0'
;
printf
(
"Process %d received: %s
\n
"
,
rank
,
buffer
);
}
close
(
fd
);
}
MPI_Finalize
();
return
0
;
}
int
main
(
int
argc
,
char
*
argv
[])
{
MPI_Init
(
&
argc
,
&
argv
);
int
rank
,
size
;
...
...
@@ -68,39 +34,58 @@ int main(int argc, char* argv[]) {
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
size
);
int
dst_hash
=
12345
;
scclIpcSocket_t
ipcsocket
(
rank
,
dst_hash
);
scclIpcSocket_t
ipcsocket
(
rank
,
size
,
dst_hash
);
int
fd
;
if
(
rank
==
0
)
{
// 进程 0: 打开文件并发送文件描述符给所有其他进程
int
fd
=
open
(
"testfile.txt"
,
O_RDONLY
);
fd
=
open
(
"testfile.txt"
,
O_RDONLY
);
if
(
fd
<
0
)
{
perror
(
"Failed to open file"
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
for
(
int
i
=
1
;
i
<
size
;
++
i
)
{
if
(
ipcsocket
.
scclIpcSocketSendFd
(
fd
,
i
,
dst_hash
)
!=
scclSuccess
)
{
if
(
ipcsocket
.
scclIpcSocketSendFd
(
fd
,
i
)
!=
scclSuccess
)
{
perror
(
"Failed to send file descriptor"
);
close
(
fd
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
lseek
(
fd
,
0
,
SEEK_SET
);
}
close
(
fd
);
}
else
{
// 其他进程: 接收文件描述符并读取文件内容
int
fd
;
if
(
ipcsocket
.
scclIpcSocketRecvFd
(
&
fd
)
<
0
)
{
perror
(
"Failed to receive file descriptor"
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
// lseek(fd, 0, SEEK_SET); // 重置文件偏移量到文件开头
printf
(
"11 rank %d received fd %d
\n
"
,
rank
,
fd
);
char
buffer
[
256
];
struct
pollfd
pfd
;
pfd
.
fd
=
fd
;
pfd
.
events
=
POLLIN
;
int
pollResult
=
poll
(
&
pfd
,
1
,
-
1
);
// 无限等待
printf
(
"pollResult=%d, rank=%d
\n
"
,
pollResult
,
rank
);
ssize_t
n
=
read
(
fd
,
buffer
,
sizeof
(
buffer
)
-
1
);
if
(
n
>
0
)
{
buffer
[
n
]
=
'\0'
;
printf
(
"Process %d received: %s
\n
"
,
rank
,
buffer
);
lseek
(
fd
,
0
,
SEEK_SET
);
// 重置文件偏移量到文件开头
}
close
(
fd
);
printf
(
"n=%zd, rank=%d
\n
"
,
n
,
rank
);
/////////////////////
// 注意,fd会有抢占,同一时间只能有一个进程读取
/////////////////////
}
// if(fd >= 0) {
// close(fd);
// }
MPI_Finalize
();
return
0
;
}
...
...
examples/1_connection/3_sccl_ipc_socket/3_socket_mpi_data.cpp
View file @
571a75b5
...
...
@@ -13,63 +13,35 @@ using namespace sccl;
typedef
class
sccl
::
hardware
::
net
::
ipc_socket
::
scclIpcSocket
scclIpcSocket_t
;
template
<
typename
T
>
void
send_data
(
T
*
ipcsocket
,
const
void
*
data
,
size_t
dataLen
,
int
dst_rank
)
{
if
(
ipcsocket
->
scclIpcSocketSendData
(
data
,
dataLen
,
dst_rank
)
!=
scclSuccess
)
{
perror
(
"Failed to send data"
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
}
template
<
typename
T
>
void
recv_data
(
T
*
ipcsocket
,
void
*
buffer
,
size_t
bufferLen
,
size_t
*
receivedLen
)
{
if
(
ipcsocket
->
scclIpcSocketRecvData
(
buffer
,
bufferLen
,
receivedLen
)
!=
scclSuccess
)
{
perror
(
"Failed to receive data"
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
}
template
<
typename
T
>
int
test_allgather_ver1
(
T
*
ipcsocket
,
int
rank
,
int
size
)
{
int
sendDataLen
=
256
;
std
::
vector
<
char
>
sendData
(
sendDataLen
);
std
::
vector
<
char
>
recvData
(
size
*
sendDataLen
);
size_t
receivedLen
;
int
test_allgather
(
T
*
ipcsocket
,
int
rank
,
int
size
,
int
dataLen
=
64
*
1024
,
int
num_iterations
=
1
)
{
std
::
vector
<
char
>
sendData
(
dataLen
);
std
::
vector
<
char
>
recvData
(
size
*
dataLen
);
// 填充发送数据
snprintf
(
sendData
.
data
(),
sendData
.
size
(),
"Data from process %d"
,
rank
);
printf
(
"test_allgather dataLen=%d, sendData.size()=%zu
\n
"
,
dataLen
,
sendData
.
size
());
auto
pthpool
=
ThreadPool
(
size
*
2
);
std
::
vector
<
double
>
elapsed_times
;
// 用于存储每次执行的耗时
// 发送数据给所有其他进程
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
if
(
i
!=
rank
)
{
auto
task_send
=
std
::
bind
(
send_data
<
scclIpcSocket_t
>
,
ipcsocket
,
sendData
.
data
(),
sendData
.
size
(),
i
);
pthpool
.
enqueue
(
task_send
);
// 开始计时
auto
start
=
std
::
chrono
::
high_resolution_clock
::
now
();
auto
task_recv
=
std
::
bind
(
recv_data
<
scclIpcSocket_t
>
,
ipcsocket
,
recvData
.
data
()
+
i
*
sendDataLen
,
sendDataLen
,
&
receivedLen
);
pthpool
.
enqueue
(
task_recv
);
}
// 调用 Allgather 函数
for
(
int
i
=
0
;
i
<
num_iterations
;
++
i
)
{
SCCLCHECK
(
ipcsocket
->
scclIpcSocketAllgather
(
sendData
.
data
(),
recvData
.
data
(),
dataLen
));
}
printf
(
"sendData.size()=%d, receivedLen=%d
\n
"
,
sendDataLen
,
int
(
receivedLen
));
// 打印接收到的数据
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
printf
(
"Process %d received from process %d: %s
\n
"
,
rank
,
i
,
recvData
.
data
()
+
i
*
256
);
}
// 结束计时
auto
end
=
std
::
chrono
::
high_resolution_clock
::
now
();
return
0
;
}
// 所有进程在此处等待,直到所有进程都到达这一点
MPI_Barrier
(
MPI_COMM_WORLD
);
template
<
typename
T
>
int
test_allgather_ver2
(
T
*
ipcsocket
,
int
rank
,
int
size
)
{
int
sendDataLen
=
256
;
std
::
vector
<
char
>
sendData
(
sendDataLen
);
std
::
vector
<
char
>
recvData
(
size
*
sendDataLen
);
// 计算并存储每个进程的计时结果
std
::
chrono
::
duration
<
double
>
elapsed
=
end
-
start
;
// 填充发送数据
snprintf
(
sendData
.
data
(),
sendData
.
size
(),
"Data from process %d"
,
rank
);
SCCLCHECK
(
ipcsocket
->
scclIpcSocketAllgatherSync
(
sendData
.
data
(),
recvData
.
data
(),
sendData
.
size
(),
/*wait*/
true
));
auto
average_time
=
elapsed
.
count
()
*
1e6
/
num_iterations
;
// 转换为微秒
printf
(
"rank %d: Average time for Allgather over %d iterations: %f us.
\n
"
,
rank
,
num_iterations
,
average_time
);
// 打印接收到的数据
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
...
...
@@ -80,67 +52,96 @@ int test_allgather_ver2(T* ipcsocket, int rank, int size) {
}
template
<
typename
T
>
int
test_allgather_ver3
(
T
*
ipcsocket
,
int
rank
,
int
size
)
{
int
sendDataLen
=
256
;
std
::
vector
<
char
>
sendData
(
sendDataLen
);
std
::
vector
<
char
>
recvData
(
size
*
sendDataLen
);
// 填充发送数据
snprintf
(
sendData
.
data
(),
sendData
.
size
(),
"Data from process %d"
,
rank
);
SCCLCHECK
(
ipcsocket
->
scclIpcSocketAllgather
(
sendData
.
data
(),
recvData
.
data
(),
sendData
.
size
()));
// 打印接收到的数据
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
printf
(
"rank %d received from process %d: %s
\n
"
,
rank
,
i
,
recvData
.
data
()
+
i
*
sendData
.
size
());
int
test_broadcast
(
T
*
ipcsocket
,
int
rank
,
int
size
,
int
dataLen
=
64
*
1024
,
int
num_iterations
=
1
)
{
std
::
vector
<
char
>
data
(
dataLen
);
int
root
=
0
;
// 假设 rank 0 是根进程
if
(
rank
==
root
)
{
// 仅根进程填充发送数据
snprintf
(
data
.
data
(),
data
.
size
(),
"Data from root process %d"
,
rank
);
}
printf
(
"rank=%d, data.size()=%zu
\n
"
,
rank
,
data
.
size
());
return
0
;
}
std
::
vector
<
double
>
elapsed_times
;
// 用于存储每次执行的耗时
template
<
typename
T
>
int
test_broadcast_ver1
(
T
*
ipcsocket
,
int
rank
,
int
size
)
{
int
sendDataLen
=
256
;
std
::
vector
<
char
>
sendData
(
sendDataLen
);
std
::
vector
<
char
>
recvData
(
sendDataLen
);
int
root
=
0
;
// 假设 rank 0 是根进程
// 开始计时
auto
start
=
std
::
chrono
::
high_resolution_clock
::
now
();
if
(
rank
==
root
)
{
// 仅根进程填充发送数据
snprintf
(
sendData
.
data
(),
sendData
.
size
(),
"Data from root process %d"
,
rank
);
for
(
int
i
=
0
;
i
<
num_iterations
;
++
i
)
{
SCCLCHECK
(
ipcsocket
->
scclIpcSocketBroadcast
(
data
.
data
(),
data
.
size
(),
root
));
}
SCCLCHECK
(
ipcsocket
->
scclIpcSocketBroadcast
(
sendData
.
data
(),
recvData
.
data
(),
sendData
.
size
(),
root
,
/*wait*/
true
));
// 结束计时
auto
end
=
std
::
chrono
::
high_resolution_clock
::
now
();
// 打印接收到的数据
printf
(
"rank %d received: %s
\n
"
,
rank
,
recvData
.
data
());
// 所有进程在此处等待,直到所有进程都到达这一点
MPI_Barrier
(
MPI_COMM_WORLD
);
// 计算并存储每个进程的计时结果
std
::
chrono
::
duration
<
double
>
elapsed
=
end
-
start
;
auto
average_time
=
elapsed
.
count
()
*
1e6
/
num_iterations
;
// 转换为微秒
printf
(
"rank %d: data=%s, Average time for scclIpcSocketBroadcast over %d iterations: %f us.
\n
"
,
rank
,
(
char
*
)(
data
.
data
()),
num_iterations
,
average_time
);
return
0
;
}
int
main
(
int
argc
,
char
*
argv
[])
{
MPI_Init
(
&
argc
,
&
argv
);
int
rank
,
size
;
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
size
);
int
dst_hash
=
12345
;
int
dst_hash
=
654321
;
scclIpcSocket_t
*
ipcsocket
=
new
scclIpcSocket_t
(
rank
,
size
,
dst_hash
);
// test_allgather_ver1(ipcsocket, rank, size);
// test_allgather_ver2(ipcsocket, rank, size);
// test_allgather_ver3(ipcsocket, rank, size);
test_broadcast_ver1
(
ipcsocket
,
rank
,
size
);
// 默认参数
std
::
string
test_type
=
"allgather"
;
int
dataLen
=
64
*
1024
;
int
num_iterations
=
1
;
// 解析命令行参数
for
(
int
i
=
1
;
i
<
argc
;
++
i
)
{
std
::
istringstream
iss
(
argv
[
i
]);
std
::
string
arg
;
iss
>>
arg
;
if
(
arg
==
"--test-type"
)
{
if
(
++
i
<
argc
)
{
test_type
=
argv
[
i
];
}
}
else
if
(
arg
==
"--data-len"
)
{
if
(
++
i
<
argc
)
{
iss
.
clear
();
iss
.
str
(
argv
[
i
]);
iss
>>
dataLen
;
}
}
else
if
(
arg
==
"--num-iterations"
)
{
if
(
++
i
<
argc
)
{
iss
.
clear
();
iss
.
str
(
argv
[
i
]);
iss
>>
num_iterations
;
}
}
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
seconds
(
10
));
// while(!ipcsocket->getPthreadPool()->allTasksCompleted()) {}
// printf("delete ipcsocket... rank=%d\n", rank);
if
(
test_type
==
"allgather"
)
{
test_allgather
(
ipcsocket
,
rank
,
size
,
dataLen
,
num_iterations
);
}
else
if
(
test_type
==
"broadcast"
)
{
test_broadcast
(
ipcsocket
,
rank
,
size
,
dataLen
,
num_iterations
);
}
else
{
if
(
rank
==
0
)
{
std
::
cerr
<<
"Unknown test type: "
<<
test_type
<<
std
::
endl
;
}
}
delete
(
ipcsocket
)
;
delete
ipcsocket
;
MPI_Finalize
();
return
0
;
}
/*
单机执行
SCCL_DEBUG_LEVEL=ABORT SCCL_DEBUG_SUBSYS=BOOTSTRAP mpirun --allow-run-as-root -np 8 3_socket_mpi_data
SCCL_DEBUG_LEVEL=WARN SCCL_DEBUG_SUBSYS=GRAPH mpirun --allow-run-as-root -np 8 3_socket_mpi_data
SCCL_DEBUG_LEVEL=WARN SCCL_DEBUG_SUBSYS=GRAPH mpirun --allow-run-as-root -np 4 3_socket_mpi_data
*/
examples/1_connection/3_sccl_ipc_socket/4_socket_mpi_data_sccl.cpp
deleted
100644 → 0
View file @
379c4128
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <vector> // 引入vector库
#include <thread> // 为了使用 std::this_thread::sleep_for
#include "mpi.h"
#include "net.h"
#include "ipc_socket.h"
#include "thread_pool.h"
using
namespace
sccl
;
typedef
class
sccl
::
hardware
::
net
::
ipc_socket
::
scclIpcSocket
scclIpcSocket_t
;
template
<
typename
T
>
void
send_data
(
T
*
ipcsocket
,
const
void
*
data
,
size_t
dataLen
,
int
dst_rank
,
uint64_t
dst_hash
)
{
if
(
ipcsocket
->
scclIpcSocketSendData
(
data
,
dataLen
,
dst_rank
,
dst_hash
)
!=
scclSuccess
)
{
perror
(
"Failed to send data"
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
}
template
<
typename
T
>
void
recv_data
(
T
*
ipcsocket
,
void
*
buffer
,
size_t
bufferLen
,
size_t
*
receivedLen
)
{
if
(
ipcsocket
->
scclIpcSocketRecvData
(
buffer
,
bufferLen
,
receivedLen
)
!=
scclSuccess
)
{
perror
(
"Failed to receive data"
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
MPI_Init
(
&
argc
,
&
argv
);
int
rank
,
size
;
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
size
);
int
dst_hash
=
12345
;
scclIpcSocket_t
ipcsocket
(
rank
,
dst_hash
);
int
sendDataLen
=
256
;
std
::
vector
<
char
>
sendData
(
sendDataLen
);
std
::
vector
<
char
>
recvData
(
size
*
sendDataLen
);
size_t
receivedLen
;
// 填充发送数据
snprintf
(
sendData
.
data
(),
sendData
.
size
(),
"Data from process %d"
,
rank
);
auto
pthpool
=
ThreadPool
(
size
*
2
);
// 发送数据给所有其他进程
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
if
(
i
!=
rank
)
{
auto
task_send
=
std
::
bind
(
send_data
<
scclIpcSocket_t
>
,
&
ipcsocket
,
sendData
.
data
(),
sendData
.
size
(),
i
,
dst_hash
);
pthpool
.
enqueue
(
task_send
);
auto
task_recv
=
std
::
bind
(
recv_data
<
scclIpcSocket_t
>
,
&
ipcsocket
,
recvData
.
data
()
+
i
*
sendDataLen
,
sendDataLen
,
&
receivedLen
);
pthpool
.
enqueue
(
task_recv
);
}
}
printf
(
"sendData.size()=%d, receivedLen=%d
\n
"
,
sendDataLen
,
int
(
receivedLen
));
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
seconds
(
2
));
// 打印接收到的数据
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
printf
(
"Process %d received from process %d: %s
\n
"
,
rank
,
i
,
recvData
.
data
()
+
i
*
256
);
}
MPI_Finalize
();
return
0
;
}
/*
单机执行
SCCL_DEBUG_LEVEL=ABORT SCCL_DEBUG_SUBSYS=BOOTSTRAP mpirun --allow-run-as-root -np 8 3_socket_mpi_data
*/
examples/2_topo/1_demo_rocm/test_rocm_smi.cpp
→
examples/2_topo/1_demo_rocm/
1_
test_rocm_smi.cpp
View file @
571a75b5
File moved
examples/2_topo/1_demo_rocm/2_test_pci_info.cpp
0 → 100644
View file @
571a75b5
#include <sys/sysinfo.h>
#include <iostream>
int
main
()
{
struct
sysinfo
info
;
if
(
sysinfo
(
&
info
)
==
0
)
{
std
::
cout
<<
"Uptime: "
<<
info
.
uptime
<<
std
::
endl
;
std
::
cout
<<
"Total RAM: "
<<
info
.
totalram
<<
std
::
endl
;
std
::
cout
<<
"Free RAM: "
<<
info
.
freeram
<<
std
::
endl
;
// 输出更多信息...
}
else
{
std
::
cerr
<<
"Failed to get system information."
<<
std
::
endl
;
}
return
0
;
}
examples/2_topo/1_demo_rocm/compile
_rocm_smi
.sh
→
examples/2_topo/1_demo_rocm/compile
1
.sh
View file @
571a75b5
hipcc
/public/home/lishen/Code/rocSHMEM/SCCL_v1/examples/2_topo/1_demo_rocm/
test_rocm_smi.cpp
\
hipcc
1_
test_rocm_smi.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/rocm_smi_wrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/topo_utils.cpp
\
-o
test_
topo
\
-o
1_
test_
rocm_smi
\
-std
=
c++17
-g
-O3
-fopenmp
-D__HIP_PLATFORM_HCC__
\
-I
./
-I
/usr/include
-I
/opt/dtk/include
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/include
\
...
...
examples/2_topo/1_demo_rocm/compile2.sh
0 → 100644
View file @
571a75b5
hipcc 2_test_pci_info.cpp
\
-o
2_test_pci_info
\
-std
=
c++17
-g
-O3
-fopenmp
-D__HIP_PLATFORM_HCC__
\
-I
./
-I
/usr/include
-I
/opt/dtk/include
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/include
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/topo
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/
\
-L
/usr/lib/x86_64-linux-gnu
\
-L
/usr/lib/
\
-lamdhip64
-lrocm_smi64
\ No newline at end of file
examples/2_topo/2_bootstrap/1_mpi_init.cpp
View file @
571a75b5
...
...
@@ -11,9 +11,6 @@ using namespace sccl;
int
main
(
int
argc
,
char
*
argv
[])
{
int
rank
,
nranks
;
int
tag1
,
src
,
dst
,
cnt
;
MPI_Status
status
;
MPI_Init
(
&
argc
,
&
argv
);
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
nranks
);
...
...
@@ -21,28 +18,28 @@ int main(int argc, char* argv[]) {
printf
(
"rank=%d, nranks=%d
\n
"
,
rank
,
nranks
);
// ----------------------------------------------------------------------- //
//
//
----------------------------------------------------------------------- //
INFO
(
SCCL_LOG_TOPO
,
"Bootstrap ...
\n
"
);
struct
scclRankInfo
*
rank_info
;
struct
sccl
::
hardware
::
topology
::
bootstrap
::
sccl
BootstrapComm
*
comm
;
//
INFO(SCCL_LOG_TOPO, "Bootstrap ...\n");
//
scclRankInfo
_t
* rank_info;
//
struct sccl::hardware::topology::bootstrap::BootstrapComm* comm;
SCCLCHECK
(
scclCalloc
(
&
rank_info
,
1
));
SCCLCHECK
(
scclCalloc
(
&
comm
,
1
));
//
SCCLCHECK(scclCalloc(&rank_info, 1));
//
SCCLCHECK(scclCalloc(&comm, 1));
rank_info
->
rank
=
rank
;
rank_info
->
nRanks
=
nranks
;
rank_info
->
localRanks
=
2
;
rank_info
->
hipDev
=
rank
%
rank_info
->
localRanks
;
//
rank_info->rank = rank;
//
rank_info->nRanks = nranks;
//
rank_info->localRanks = 2;
//
rank_info->hipDev = rank % rank_info->localRanks;
auto
sccl_bootstrap
=
new
sccl
::
hardware
::
topology
::
bootstrap
::
sccl
Bootstrap
(
rank_info
,
comm
);
SCCLCHECK
(
sccl_bootstrap
->
bootstrapInitCheck
());
//
auto sccl_bootstrap = new sccl::hardware::topology::bootstrap::Bootstrap(rank_info, comm);
//
SCCLCHECK(sccl_bootstrap->bootstrapInitCheck());
sccl
::
hardware
::
topology
::
bootstrap
::
printUniqueInfo
(
comm
->
unique_info
);
//
sccl::hardware::topology::bootstrap::printUniqueInfo(comm->unique_info);
int
cuda_id
;
HIPCHECK
(
hipGetDevice
(
&
cuda_id
));
printf
(
"rank=%d, cuda_id=%d
\n
"
,
rank
,
cuda_id
);
//
int cuda_id;
//
HIPCHECK(hipGetDevice(&cuda_id));
//
printf("rank=%d, cuda_id=%d\n", rank, cuda_id);
MPI_Finalize
();
}
...
...
examples/2_topo/2_bootstrap/2_mpi_init_mpi_init_step1_bootstrap.cpp
0 → 100644
View file @
571a75b5
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <mpi.h>
#include "bootstrap.h"
#include "hardware.h"
using
namespace
sccl
;
typedef
sccl
::
hardware
::
topology
::
bootstrap
::
scclUniqueId
scclUniqueId
;
typedef
sccl
::
hardware
::
topology
::
bootstrap
::
BootstrapHandle_t
BootstrapHandle_t
;
typedef
sccl
::
hardware
::
topology
::
bootstrap
::
Bootstrap
Bootstrap
;
// 全局变量
struct
sccl
::
hardware
::
topology
::
bootstrap
::
BootstrapComm
bootstrap_comm
;
scclResult_t
sccl_init_step1
(
const
scclUniqueId
*
unique_id
,
int
rank
,
int
nRanks
)
{
// -------------------------- 1.获取0号rank的地址信息 ----------------------------------- //
auto
root_handle
=
reinterpret_cast
<
const
BootstrapHandle_t
*>
(
unique_id
);
EQCHECK
(
root_handle
->
magic
,
0
);
// 检查handle是否已经更新
// -------------------------- 2.初始化获取所有节点的node信息 ----------------------------------- //
auto
sccl_bootstrap
=
std
::
make_unique
<
Bootstrap
>
(
root_handle
,
rank
,
nRanks
);
SCCLCHECK
(
sccl_bootstrap
->
init
(
&
bootstrap_comm
));
return
scclSuccess
;
}
constexpr
int
topoNodeMaxNeighbors
=
16
;
typedef
struct
topoNode
{
uint64_t
id
;
// 图点id标志
int
type
;
// 图点类型
int
numaId
;
// 节点id
char
busIdStr
[
17
]
=
""
;
// 总线ID字符串 "00000000:00:00.0"
int
speed
;
// 速度
int
width
;
// 带宽
char
cpuAffinity
[
36
]
=
""
;
// cpu的affinity
std
::
array
<
uint64_t
,
topoNodeMaxNeighbors
>
neighbors
;
// 邻居图点
size_t
neighborCount
;
// 邻居图点的数量
}
topoNode_t
;
int
main
(
int
argc
,
char
*
argv
[])
{
// -------------------------- 1.启动MPI ----------------------------------- //
MPI_Init
(
&
argc
,
&
argv
);
int
rank
,
nRanks
;
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
nRanks
);
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
printf
(
"rank=%d, nRanks=%d
\n
"
,
rank
,
nRanks
);
int
nLocalRanks
=
2
;
BootstrapHandle_t
uqid
;
printf
(
"uqid size=%lu
\n
"
,
sizeof
(
uqid
));
sccl
::
hardware
::
topology
::
bootstrap
::
scclRankInfo_t
rankinfo
;
sccl
::
hardware
::
topology
::
bootstrap
::
scclNodeInfo_t
nodeinfo
(
nLocalRanks
);
topoNode_t
topo_node
;
printf
(
"rankinfo size=%lu
\n
"
,
sizeof
(
rankinfo
));
printf
(
"rankinfo cpu size=%lu
\n
"
,
sizeof
(
rankinfo
.
cpu
));
printf
(
"rankinfo gpu size=%lu
\n
"
,
sizeof
(
rankinfo
.
gpu
));
printf
(
"rankinfo net size=%lu
\n
"
,
sizeof
(
rankinfo
.
net
));
printf
(
"nodeinfo size=%lu, stu size=%d
\n
"
,
sizeof
(
nodeinfo
),
nodeinfo
.
size
);
printf
(
"topo_node size=%lu
\n
"
,
sizeof
(
topo_node
));
// -------------------------- 2.获取节点unique_id,主要是socket地址 ----------------------------------- //
scclUniqueId
unique_id
;
if
(
rank
==
0
)
{
SCCLCHECK
(
sccl
::
hardware
::
scclGetUniqueId
(
&
unique_id
));
}
MPI_Bcast
(
&
unique_id
,
sizeof
(
scclUniqueId
),
MPI_BYTE
,
0
,
MPI_COMM_WORLD
);
// -------------------------- 3.基于unique_id的整合结果初始化 ----------------------------------- //
sccl_init_step1
(
&
unique_id
,
rank
,
nRanks
);
int
cuda_id
;
HIPCHECK
(
hipGetDevice
(
&
cuda_id
));
printf
(
"rank=%d, cuda_id=%d
\n
"
,
rank
,
cuda_id
);
MPI_Barrier
(
MPI_COMM_WORLD
);
SCCLCHECK
(
sccl
::
hardware
::
sccl_finalize
());
MPI_Finalize
();
}
/*
单机执行
SCCL_DEBUG_LEVEL=ABORT mpirun --allow-run-as-root -np 4 2_mpi_init_mpi_init_step1_bootstrap
SCCL_DEBUG_LEVEL=INFO SCCL_DEBUG_SUBSYS=ALL mpirun --allow-run-as-root -np 2 2_mpi_init_mpi_init_step1_bootstrap
跨机执行
SCCL_DEBUG_LEVEL=WARN SCCL_DEBUG_SUBSYS=BOOTSTRAP mpirun --allow-run-as-root --hostfile hostfile2 -np 4 ./2_mpi_init_mpi_init_step1_bootstrap
SCCL_DEBUG_LEVEL=WARN SCCL_DEBUG_SUBSYS=BOOTSTRAP mpirun --allow-run-as-root --hostfile hostfile -np 16 ./2_mpi_init_mpi_init_step1_bootstrap
*/
examples/2_topo/2_bootstrap/3_mpi_init_mpi_init_step2_graph.cpp
0 → 100644
View file @
571a75b5
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <mpi.h>
#include "bootstrap.h"
#include "hardware.h"
using
namespace
sccl
;
int
main
(
int
argc
,
char
*
argv
[])
{
// -------------------------- 1.启动MPI ----------------------------------- //
MPI_Init
(
&
argc
,
&
argv
);
int
rank
,
nRanks
;
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
nRanks
);
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
printf
(
"rank=%d, nRanks=%d
\n
"
,
rank
,
nRanks
);
sccl
::
hardware
::
topology
::
bootstrap
::
BootstrapHandle_t
uqid
;
sccl
::
hardware
::
topology
::
bootstrap
::
scclRankInfo_t
rankinfo
;
sccl
::
hardware
::
topology
::
bootstrap
::
scclNodeInfo_t
nodeinfo
(
/*nLocalRanks*/
2
);
printf
(
"rankinfo size=%lu
\n
"
,
sizeof
(
rankinfo
));
printf
(
"rankinfo cpu size=%lu
\n
"
,
sizeof
(
rankinfo
.
cpu
));
printf
(
"rankinfo gpu size=%lu
\n
"
,
sizeof
(
rankinfo
.
gpu
));
printf
(
"rankinfo net size=%lu
\n
"
,
sizeof
(
rankinfo
.
net
));
printf
(
"nodeinfo size=%lu, stu size=%d
\n
"
,
sizeof
(
nodeinfo
),
nodeinfo
.
totalByteSize
);
// topoNode_t topo_node;
// printf("topo_node size=%lu\n", sizeof(topo_node));
// -------------------------- 2.获取节点unique_id,主要是socket地址 ----------------------------------- //
typedef
sccl
::
hardware
::
topology
::
bootstrap
::
scclUniqueId
scclUniqueId
;
scclUniqueId
unique_id
;
if
(
rank
==
0
)
{
SCCLCHECK
(
sccl
::
hardware
::
scclGetUniqueId
(
&
unique_id
));
}
MPI_Bcast
(
&
unique_id
,
sizeof
(
scclUniqueId
),
MPI_BYTE
,
0
,
MPI_COMM_WORLD
);
// -------------------------- 3.基于unique_id的整合结果初始化 ----------------------------------- //
SCCLCHECK
(
sccl
::
hardware
::
sccl_init
(
&
unique_id
,
rank
,
nRanks
));
// int cuda_id;
// HIPCHECK(hipGetDevice(&cuda_id));
// printf("rank=%d, cuda_id=%d\n", rank, cuda_id);
// MPI_Barrier(MPI_COMM_WORLD);
SCCLCHECK
(
sccl
::
hardware
::
sccl_finalize
());
MPI_Finalize
();
}
/*
单机执行
SCCL_DEBUG_LEVEL=WARN mpirun --allow-run-as-root -np 4 3_mpi_init_mpi_init_step2_graph
SCCL_DEBUG_LEVEL=WARN SCCL_DEBUG_SUBSYS=ALL mpirun --allow-run-as-root -np 2 3_mpi_init_mpi_init_step2_graph
跨机执行
SCCL_DEBUG_LEVEL=WARN SCCL_DEBUG_SUBSYS=GRAPH mpirun --allow-run-as-root --hostfile hostfile2 -np 2 ./3_mpi_init_mpi_init_step2_graph
SCCL_DEBUG_LEVEL=WARN SCCL_DEBUG_SUBSYS=GRAPH mpirun --allow-run-as-root --hostfile hostfile2 -np 4 ./3_mpi_init_mpi_init_step2_graph
SCCL_DEBUG_LEVEL=WARN SCCL_DEBUG_SUBSYS=BOOTSTRAP mpirun --allow-run-as-root --hostfile hostfile -np 16 ./3_mpi_init_mpi_init_step2_graph
*/
examples/2_topo/2_bootstrap/compile_mpi.sh
→
examples/2_topo/2_bootstrap/compile_mpi
1
.sh
View file @
571a75b5
...
...
@@ -10,6 +10,8 @@ hipcc ./1_mpi_init.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/rocm_wrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/mpi/mpiwrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/mpi/mpisymbols.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/bootstrap_net.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/bootstrap_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/rocm_smi_wrap.cpp
\
...
...
@@ -31,6 +33,8 @@ hipcc ./1_mpi_init.cpp \
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/ipc_socket/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/mpi
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/mpi
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/
\
-L
/public/home/lishen/Code/rocSHMEM/SCCL_v1
\
...
...
examples/2_topo/2_bootstrap/compile_mpi2_init_step1.sh
0 → 100644
View file @
571a75b5
hipcc ./2_mpi_init_mpi_init_step1_bootstrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/hardware_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/ibvsymbols.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/ibvwrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/net_ib.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_socket/net_socket.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_socket/socket.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/ipc_socket/ipc_socket.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/rocm_wrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/bootstrap_net.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/bootstrap_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/rocm_smi_wrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/bootstrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/physical_links.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/topo_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/archinfo.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/param.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/hardware.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/thread_pool.cpp
\
-o
2_mpi_init_mpi_init_step1_bootstrap
\
-std
=
c++17
-g
-O3
-fopenmp
-DROC_SHMEM
-D__HIP_PLATFORM_HCC__
-Wno-return-type
\
-I
./
-I
/usr/include
-I
/opt/dtk/include
\
-I
/public/home/lishen/Code/rocSHMEM/3rd_party/install/ompi/include/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/include/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_socket/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/ipc_socket/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/graph/
\
-L
/public/home/lishen/Code/rocSHMEM/SCCL_v1
\
-L
/opt/dtk/lib
-lamdhip64
-lrocm-core
-lrocm_smi64
-pthread
\
-L
/usr/lib/x86_64-linux-gnu
-libverbs
-lrdmacm
\
-L
/public/home/lishen/Code/rocSHMEM/3rd_party/install/ompi/lib
-lmpi
# # \
# # -L /public/home/lishen/Code/rocSHMEM/3rd_party/install/ucx/lib -lucs -lucp -luct -lucm
# # export HSA_FORCE_FINE_GRAIN_PCIE="1"
# # export iommu=pt
# hipcc ./2_mpi_init_mpi_init_step1_bootstrap.cpp \
# -o 2_mpi_init_mpi_init_step1_bootstrap \
# -std=c++17 -g -O3 -fopenmp -DROC_SHMEM -D__HIP_PLATFORM_HCC__ -Wno-return-type \
# -I ./ -I /usr/include -I /opt/dtk/include \
# -I /public/home/lishen/Code/rocSHMEM/3rd_party/install/ompi/include/ \
# -L /usr/lib/x86_64-linux-gnu -libverbs -lrdmacm \
# -L /public/home/lishen/Code/rocSHMEM/3rd_party/install/ompi/lib -lmpi \
# -L /opt/dtk/lib -lamdhip64 -lrocm-core -lrocm_smi64 -pthread
examples/2_topo/2_bootstrap/compile_mpi3_init_step2.sh
0 → 100644
View file @
571a75b5
hipcc ./3_mpi_init_mpi_init_step2_graph.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/hardware_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/ibvsymbols.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/ibvwrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/net_ib.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_socket/net_socket.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_socket/socket.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/ipc_socket/ipc_socket.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/rocm_wrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/bootstrap_net.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/bootstrap_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/rocm_smi_wrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/bootstrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/physical_links.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/topo_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/archinfo.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/param.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/hardware.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/thread_pool.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/graph/graph.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/graph/paths.cpp
\
-o
3_mpi_init_mpi_init_step2_graph
\
-std
=
c++17
-g
-O3
-fopenmp
-DROC_SHMEM
-D__HIP_PLATFORM_HCC__
-Wno-return-type
\
-I
./
-I
/usr/include
-I
/opt/dtk/include
\
-I
/public/home/lishen/Code/rocSHMEM/3rd_party/install/ompi/include/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/include/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_socket/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/ipc_socket/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/bootstrap/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/graph/
\
-L
/public/home/lishen/Code/rocSHMEM/SCCL_v1
\
-L
/opt/dtk/lib
-lamdhip64
-lrocm-core
-lrocm_smi64
-pthread
\
-L
/usr/lib/x86_64-linux-gnu
-libverbs
-lrdmacm
\
-L
/public/home/lishen/Code/rocSHMEM/3rd_party/install/ompi/lib
-lmpi
# # \
# # -L /public/home/lishen/Code/rocSHMEM/3rd_party/install/ucx/lib -lucs -lucp -luct -lucm
# # export HSA_FORCE_FINE_GRAIN_PCIE="1"
# # export iommu=pt
# hipcc ./3_mpi_init_mpi_init_step2_graph.cpp \
# -o 3_mpi_init_mpi_init_step2_graph \
# -std=c++17 -g -O3 -fopenmp -DROC_SHMEM -D__HIP_PLATFORM_HCC__ -Wno-return-type \
# -I ./ -I /usr/include -I /opt/dtk/include \
# -I /public/home/lishen/Code/rocSHMEM/3rd_party/install/ompi/include/ \
# -L /usr/lib/x86_64-linux-gnu -libverbs -lrdmacm \
# -L /public/home/lishen/Code/rocSHMEM/3rd_party/install/ompi/lib -lmpi \
# -L /opt/dtk/lib -lamdhip64 -lrocm-core -lrocm_smi64 -pthread
examples/2_topo/2_bootstrap/hostfile2
View file @
571a75b5
node037 slots=2
node038 slots=2
\ No newline at end of file
node038 slots=2
src/hardware/hardware.cpp
View file @
571a75b5
...
...
@@ -5,75 +5,41 @@
#include "base.h"
#include "hardware_utils.h"
#include "bootstrap.h"
#include "graph.h"
#include "hardware.h"
namespace
sccl
{
namespace
hardware
{
namespace
topology
{
namespace
bootstrap
{
// 全局变量,全部节点的信息
s
truct
BootstrapComm
bootstrap_comm
;
s
ccl
::
hardware
::
topology
::
bootstrap
::
BootstrapComm
_t
bootstrap_comm
;
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
scclResult_t
scclGetUniqueId
(
scclUniqueId
*
unique_id
)
{
auto
handle
=
reinterpret_cast
<
struct
BootstrapHandle
*>
(
unique_id
);
NEQCHECK
(
sizeof
(
struct
BootstrapHandle
),
SCCL_UNIQUE_ID_BYTES
);
SCCLCHECK
(
bootstrapGetUniqueId
(
handle
));
auto
handle
=
reinterpret_cast
<
BootstrapHandle
_t
*>
(
unique_id
);
NEQCHECK
(
sizeof
(
BootstrapHandle
_t
),
SCCL_UNIQUE_ID_BYTES
);
SCCLCHECK
(
topology
::
bootstrap
::
bootstrapGetUniqueId
(
handle
));
return
scclSuccess
;
}
scclResult_t
sccl_init
(
const
scclUniqueId
*
unique_id
,
int
rank
,
int
nRanks
)
{
// -------------------------- 1.获取0号rank的地址信息 ----------------------------------- //
auto
root_handle
=
reinterpret_cast
<
const
struct
BootstrapHandle
*>
(
unique_id
);
auto
root_handle
=
reinterpret_cast
<
const
BootstrapHandle
_t
*>
(
unique_id
);
EQCHECK
(
root_handle
->
magic
,
0
);
// 检查handle是否已经更新
// -------------------------- 2.初始化获取所有节点的node信息 ----------------------------------- //
auto
sccl_bootstrap
=
std
::
make_unique
<
Bootstrap
>
(
root_handle
,
rank
,
nRanks
);
auto
sccl_bootstrap
=
std
::
make_unique
<
topology
::
bootstrap
::
Bootstrap
>
(
root_handle
,
rank
,
nRanks
);
SCCLCHECK
(
sccl_bootstrap
->
init
(
&
bootstrap_comm
));
// // -------------------------- 3.MPI allgather设置unique_id的整合 ----------------------------------- //
// auto unique_ids_chr = reinterpret_cast<const char*>(unique_ids);
// -------------------------- 3.MPI 建图 ----------------------------------- //
auto
sccl_graph
=
std
::
make_unique
<
topology
::
graph
::
Graph
>
(
rank
,
nRanks
);
printf
(
"init pos 2
\n
"
);
// 计算通信路径
sccl_graph
->
calculateCommunicationPaths
(
&
bootstrap_comm
);
printf
(
"init pos 3
\n
"
);
// // -------------------------- 3.MPI allgather设置unique_id的整合 ----------------------------------- //
// std::vector<scclUniqueId> unique_id_vec(nRanks);
// MPI_Allgather(&unique_id, sizeof(scclUniqueId), MPI_BYTE, &unique_id_vec[0], sizeof(scclUniqueId), MPI_BYTE, MPI_COMM_WORLD);
// for(int i = 0; i < nRanks; ++i) {
// auto root_handle = reinterpret_cast<const struct BootstrapHandle*>(unique_ids_chr + i * sizeof(struct BootstrapHandle));
// printf("rank=%d, i=%d, unique_ids hosthash=%lu\n", root_handle->rank, i, root_handle->hostHash);
// }
// ByteSpan<struct BootstrapHandle> unique_ids_span(unique_ids_chr, nRanks * sizeof(struct BootstrapHandle));
// // -------------------------- 2.设置基础信息 ----------------------------------- //
// INFO(SCCL_LOG_TOPO, "Bootstrap ...\n");
// struct scclRankInfo rank_info;
// rank_info.rank = rank;
// rank_info.nRanks = nRanks;
// // 在每个进程中设置 root_handle 的值
// root_handle.rank = rank_info->rank;
// root_handle.hostHash = getHostHash();
// scclSocketAddress_t localSocketAddr = sccl_bootstrap->getLocalSocketAddr();
// memcpy(&root_handle.addr, &localSocketAddr, sizeof(scclSocketAddress_t));
// #if 1
// char line[100];
// sprintf(line, "pos 55: rank=%d", rank);
// SCCLCHECK(hardware::net::printSocketAddr(&root_handle.addr, line));
// printf("root_handle.hostHash rank=%d, hash=%lu\n", rank, root_handle.hostHash);
// #endif
// // -------------------------- 3.收集所有进程的 root_handle 信息 ----------------------------------- //
// std::vector<char> recvBuffer(nRanks * sendBuffer.size());
// SCCLCHECK(mpi::wrap_mpi_allgather(sendBuffer.data(), sendBuffer.size(), MPI_BYTE, recvBuffer.data(), sendBuffer.size(), MPI_BYTE, MPI_COMM_WORLD));
// -------------------------- 4.设置各个节点的基础信息 ----------------------------------- //
// SCCLCHECK(sccl_bootstrap->bootstrapInit(rank_info, recvBuffer.data()));
// -------------------------- 5.根据各个节点的基础信息计算topo结果 ----------------------------------- //
...
...
@@ -84,14 +50,12 @@ scclResult_t sccl_finalize() {
// 设置一些全局变量的重置和销毁
// 设置socket等硬件监听的关闭
// void BootstrapComm::destroy() {
if
(
bootstrap_comm
.
nRanks
>
0
)
{
bootstrap_comm
.
destroy
();
}
//
if(bootstrap_comm.nRanks > 0) {
//
bootstrap_comm.destroy();
//
}
return
scclSuccess
;
}
}
// namespace bootstrap
}
// namespace topology
}
// namespace hardware
}
// namespace sccl
src/hardware/hardware.h
View file @
571a75b5
...
...
@@ -6,15 +6,14 @@
namespace
sccl
{
namespace
hardware
{
namespace
topology
{
namespace
bootstrap
{
typedef
topology
::
bootstrap
::
scclUniqueId
scclUniqueId
;
typedef
topology
::
bootstrap
::
BootstrapHandle_t
BootstrapHandle_t
;
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
scclResult_t
scclGetUniqueId
(
scclUniqueId
*
unique_id
);
scclResult_t
sccl_init
(
const
scclUniqueId
*
unique_id
,
int
rank
,
int
nRanks
);
scclResult_t
sccl_finalize
();
}
// namespace bootstrap
}
// namespace topology
}
// namespace hardware
}
// namespace sccl
src/hardware/hardware_utils.h
View file @
571a75b5
...
...
@@ -6,9 +6,8 @@
namespace
sccl
{
namespace
hardware
{
namespace
ops
{
////
}
// namespace ops
// 实现类似于std::span的功能,将字节数组转换为类型数组
}
// namespace hardware
}
// namespace sccl
src/hardware/net/ipc_socket/ipc_socket.cpp
View file @
571a75b5
This diff is collapsed.
Click to expand it.
src/hardware/net/ipc_socket/ipc_socket.h
View file @
571a75b5
...
...
@@ -37,7 +37,7 @@ struct DataPackage {
};
//////////////////////////////////////////////////////////////////////////////////////////////////////
class
scclIpcSocket
{
typedef
class
scclIpcSocket
{
public:
// 构造函数和析构函数
scclIpcSocket
(
int
localRank
,
int
nlocalRanks
,
uint64_t
hash
,
volatile
uint32_t
*
abortFlag
=
nullptr
);
...
...
@@ -62,19 +62,16 @@ public:
// 通过Unix域套接字发送/接收数据到指定目标
scclResult_t
scclIpcSocketSendData
(
const
void
*
data
,
size_t
dataLen
,
int
dst_rank
);
scclResult_t
scclIpcSocketRecvData
(
void
*
buffer
,
size_t
bufferLen
,
size_t
*
receivedLen
);
scclResult_t
scclIpcSocketRecvData
(
void
*
buffer
,
size_t
bufferLen
,
size_t
*
receivedLen
,
int
*
src_rank
);
// 通过Unix域套接字发送/接收数据到指定目标,
并发送ack确保发送成功
// 通过Unix域套接字发送/接收数据到指定目标,
有ACK信息
scclResult_t
scclIpcSocketSendDataWithAck
(
const
void
*
data
,
size_t
dataLen
,
int
dst_rank
);
scclResult_t
scclIpcSocketRecvData
AndSend
Ack
(
void
*
buffer
,
size_t
bufferLen
,
size_t
*
receivedLen
,
int
src_rank
);
scclResult_t
scclIpcSocketRecvData
With
Ack
(
void
*
buffer
,
size_t
bufferLen
,
size_t
*
receivedLen
,
int
*
src_rank
);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// local rank内的allgather操作
。
保证接收顺序
// local rank内的allgather操作
,
保证接收顺序
scclResult_t
scclIpcSocketAllgather
(
const
void
*
sendData
,
void
*
recvData
,
size_t
dataLen
);
// local rank内的allgather操作。为了性能,不保证接收顺序,所以发送的信息中需要添加进程ID
scclResult_t
scclIpcSocketAllgatherSync
(
const
void
*
sendData
,
void
*
recvData
,
size_t
dataLen
);
// local rank内的broadcast操作
scclResult_t
scclIpcSocketBroadcast
(
void
*
data
,
size_t
dataLen
,
int
root
);
...
...
@@ -82,6 +79,12 @@ private:
// 初始化IPC套接字
scclResult_t
scclIpcSocketInit
(
volatile
uint32_t
*
abortFlag
);
scclResult_t
getScclIpcSocknameStr
(
int
rank
,
uint64_t
hash
,
char
*
out_str
,
int
*
out_len
);
// 通过Unix域套接字发送/接收数据到指定目标,不加锁执行
scclResult_t
scclIpcSocketSendDataBasic
(
const
void
*
data
,
size_t
dataLen
,
int
dst_rank
);
scclResult_t
scclIpcSocketRecvDataBasic
(
void
*
buffer
,
size_t
bufferLen
,
size_t
*
receivedLen
);
// 通过Unix域套接字发送/接收数据到指定目标,不加锁执行
scclResult_t
scclIpcSocketSendDataAndRank
(
const
void
*
data
,
size_t
dataLen
,
int
dst_rank
);
scclResult_t
scclIpcSocketRecvDataAndRank
(
void
*
buffer
,
size_t
bufferLen
,
size_t
*
receivedLen
,
int
*
src_rank
);
private:
// 定义并初始化一个 scclIpcSocket 结构体,用于处理 IPC 套接字连接
...
...
@@ -100,6 +103,7 @@ private:
// 线程池指针
ThreadPool
*
pthread_pool
=
nullptr
;
// 设置超时时间为无限长
int
timeoutMs
=
-
1
;
...
...
@@ -107,7 +111,7 @@ private:
static
constexpr
int
ACK_SIZE
=
8
;
// 假设 CHUNK_SIZE 是一个合适的块大小,例如 64KB
static
constexpr
size_t
CHUNK_SIZE
=
64
*
1024
;
};
}
scclIpcSocket_t
;
}
// namespace ipc_socket
}
// namespace net
...
...
src/hardware/net/net_ib/net_ib.cpp
View file @
571a75b5
...
...
@@ -1154,8 +1154,10 @@ scclResult_t scclNetIb::getProperties(int dev, scclNetProperties_t* props) {
if
(
scclIbGdrSupport
(
dev
)
==
scclSuccess
)
{
props
->
ptrSupport
|=
SCCL_PTR_CUDA
;
// GDR support via nv_peermem
}
if
(
scclIbDmaBufSupport
(
dev
)
==
scclSuccess
)
{
props
->
ptrSupport
|=
SCCL_PTR_DMABUF
;
// GDR support via DMA-BUF
if
(
getDmaBufEnable
()
!=
0
)
{
if
(
scclIbDmaBufSupport
(
dev
)
==
scclSuccess
)
{
props
->
ptrSupport
|=
SCCL_PTR_DMABUF
;
// GDR support via DMA-BUF
}
}
props
->
speed
=
scclIbDevs
[
dev
].
speed
;
props
->
latency
=
0
;
// Not set
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment