Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
lishen01
Sccl
Commits
a4ac3320
Commit
a4ac3320
authored
Jul 07, 2025
by
lishen
Browse files
通过线程池实现ipcsocket,满足节点内通信
parent
d9d23f34
Changes
132
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
947 additions
and
25 deletions
+947
-25
examples/0_demos/3_thread_pool2/my_pth_pool2/main.cpp
examples/0_demos/3_thread_pool2/my_pth_pool2/main.cpp
+37
-0
examples/0_demos/3_thread_pool2/my_pth_pool2/thread_pool.cpp
examples/0_demos/3_thread_pool2/my_pth_pool2/thread_pool.cpp
+51
-0
examples/0_demos/3_thread_pool2/my_pth_pool2/thread_pool.h
examples/0_demos/3_thread_pool2/my_pth_pool2/thread_pool.h
+39
-0
examples/0_demos/4_sccl_thread_pool/compile.sh
examples/0_demos/4_sccl_thread_pool/compile.sh
+16
-0
examples/0_demos/4_sccl_thread_pool/main.cpp
examples/0_demos/4_sccl_thread_pool/main.cpp
+28
-0
examples/1_connection/1_rdma_comm/1_simple.cpp
examples/1_connection/1_rdma_comm/1_simple.cpp
+5
-4
examples/1_connection/1_rdma_comm/2_mpi_get.cpp
examples/1_connection/1_rdma_comm/2_mpi_get.cpp
+4
-1
examples/1_connection/1_rdma_comm/compile_mpi.sh
examples/1_connection/1_rdma_comm/compile_mpi.sh
+10
-8
examples/1_connection/1_rdma_comm/compile_simple.sh
examples/1_connection/1_rdma_comm/compile_simple.sh
+10
-8
examples/1_connection/2_ipc_socket_simple/1_socket_client.cpp
...ples/1_connection/2_ipc_socket_simple/1_socket_client.cpp
+74
-0
examples/1_connection/2_ipc_socket_simple/1_socket_server.cpp
...ples/1_connection/2_ipc_socket_simple/1_socket_server.cpp
+101
-0
examples/1_connection/2_ipc_socket_simple/2_socket_client.cpp
...ples/1_connection/2_ipc_socket_simple/2_socket_client.cpp
+89
-0
examples/1_connection/2_ipc_socket_simple/2_socket_server.cpp
...ples/1_connection/2_ipc_socket_simple/2_socket_server.cpp
+102
-0
examples/1_connection/2_ipc_socket_simple/compile_client1.sh
examples/1_connection/2_ipc_socket_simple/compile_client1.sh
+3
-4
examples/1_connection/2_ipc_socket_simple/compile_client2.sh
examples/1_connection/2_ipc_socket_simple/compile_client2.sh
+9
-0
examples/1_connection/2_ipc_socket_simple/compile_server1.sh
examples/1_connection/2_ipc_socket_simple/compile_server1.sh
+9
-0
examples/1_connection/2_ipc_socket_simple/compile_server2.sh
examples/1_connection/2_ipc_socket_simple/compile_server2.sh
+15
-0
examples/1_connection/3_sccl_ipc_socket/1_socket_mpi_fd.cpp
examples/1_connection/3_sccl_ipc_socket/1_socket_mpi_fd.cpp
+110
-0
examples/1_connection/3_sccl_ipc_socket/2_socket_mpi_fd_pthpool.cpp
..._connection/3_sccl_ipc_socket/2_socket_mpi_fd_pthpool.cpp
+89
-0
examples/1_connection/3_sccl_ipc_socket/3_socket_mpi_data.cpp
...ples/1_connection/3_sccl_ipc_socket/3_socket_mpi_data.cpp
+146
-0
No files found.
examples/0_demos/3_thread_pool2/my_pth_pool2/main.cpp
0 → 100644
View file @
a4ac3320
#include <iostream>
#include <vector>
#include <queue>
#include <pthread.h>
#include <functional>
#include <future>
#include <memory>
#include <thread> // 为了使用 std::this_thread::sleep_for
#include "thread_pool.h"
// 模拟一个需要时间的操作
int
simulate_long_task
(
int
id
,
int
duration
)
{
std
::
cout
<<
"Task "
<<
id
<<
" is running..."
<<
std
::
endl
;
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
seconds
(
duration
));
std
::
cout
<<
"Task "
<<
id
<<
" completed."
<<
std
::
endl
;
return
id
+
10
;
}
int
main
()
{
ThreadPool
pool
(
4
);
std
::
future
<
int
>
task_res
[
10
];
// 添加多个任务到线程池
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
task_res
[
i
]
=
pool
.
enqueue
(
simulate_long_task
,
i
,
std
::
rand
()
%
3
+
1
);
}
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
std
::
cout
<<
"final "
<<
i
<<
" get:"
<<
task_res
[
i
].
get
()
<<
"."
<<
std
::
endl
;
}
// 等待所有任务完成
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
seconds
(
10
));
return
0
;
}
\ No newline at end of file
examples/0_demos/3_thread_pool2/my_pth_pool2/thread_pool.cpp
0 → 100644
View file @
a4ac3320
#include "thread_pool.h"
ThreadPool
::
ThreadPool
(
size_t
threads
)
:
stop
(
false
)
{
pthread_mutex_init
(
&
queue_mutex
,
nullptr
);
pthread_cond_init
(
&
condition
,
nullptr
);
for
(
size_t
i
=
0
;
i
<
threads
;
++
i
)
{
pthread_t
worker
;
pthread_create
(
&
worker
,
nullptr
,
ThreadPool
::
run
,
this
);
workers
.
push_back
(
worker
);
}
}
ThreadPool
::~
ThreadPool
()
{
{
pthread_mutex_lock
(
&
queue_mutex
);
stop
=
true
;
pthread_mutex_unlock
(
&
queue_mutex
);
pthread_cond_broadcast
(
&
condition
);
}
for
(
size_t
i
=
0
;
i
<
workers
.
size
();
++
i
)
{
pthread_join
(
workers
[
i
],
nullptr
);
}
pthread_mutex_destroy
(
&
queue_mutex
);
pthread_cond_destroy
(
&
condition
);
}
void
*
ThreadPool
::
run
(
void
*
arg
)
{
ThreadPool
*
pool
=
static_cast
<
ThreadPool
*>
(
arg
);
while
(
true
)
{
std
::
function
<
void
()
>
task
;
{
pthread_mutex_lock
(
&
pool
->
queue_mutex
);
while
(
pool
->
tasks
.
empty
()
&&
!
pool
->
stop
)
{
pthread_cond_wait
(
&
pool
->
condition
,
&
pool
->
queue_mutex
);
}
if
(
pool
->
stop
&&
pool
->
tasks
.
empty
())
{
pthread_mutex_unlock
(
&
pool
->
queue_mutex
);
return
nullptr
;
}
task
=
pool
->
tasks
.
front
();
pool
->
tasks
.
pop
();
pthread_mutex_unlock
(
&
pool
->
queue_mutex
);
}
task
();
}
}
examples/0_demos/3_thread_pool2/my_pth_pool2/thread_pool.h
0 → 100644
View file @
a4ac3320
#include <iostream>
#include <vector>
#include <queue>
#include <pthread.h>
#include <functional>
#include <future>
#include <memory>
class
ThreadPool
{
public:
ThreadPool
(
size_t
);
~
ThreadPool
();
template
<
class
F
,
class
...
Args
>
auto
enqueue
(
F
&&
f
,
Args
&&
...
args
)
->
std
::
future
<
typename
std
::
result_of
<
F
(
Args
...)
>::
type
>
{
using
return_type
=
typename
std
::
result_of
<
F
(
Args
...)
>::
type
;
auto
task
=
std
::
make_shared
<
std
::
packaged_task
<
return_type
()
>>
(
std
::
bind
(
std
::
forward
<
F
>
(
f
),
std
::
forward
<
Args
>
(
args
)...));
std
::
future
<
return_type
>
res
=
task
->
get_future
();
{
pthread_mutex_lock
(
&
queue_mutex
);
tasks
.
push
([
task
]()
{
(
*
task
)();
});
pthread_mutex_unlock
(
&
queue_mutex
);
pthread_cond_signal
(
&
condition
);
}
return
res
;
}
private:
std
::
vector
<
pthread_t
>
workers
;
std
::
queue
<
std
::
function
<
void
()
>>
tasks
;
pthread_mutex_t
queue_mutex
;
pthread_cond_t
condition
;
bool
stop
;
static
void
*
run
(
void
*
arg
);
};
examples/0_demos/4_sccl_thread_pool/compile.sh
0 → 100644
View file @
a4ac3320
hipcc main.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/thread_pool.cpp
\
-o
main
\
-std
=
c++17
-g
-O3
-fopenmp
-D__HIP_PLATFORM_HCC__
-Wno-return-type
\
-I
./
-I
/usr/include
-I
/opt/dtk/include
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/include
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/topo
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/thread_pool
\
-L
/usr/lib/x86_64-linux-gnu
\
-L
/usr/lib/
\
-lamdhip64
-lrocm_smi64
-pthread
\ No newline at end of file
examples/0_demos/4_sccl_thread_pool/main.cpp
0 → 100644
View file @
a4ac3320
#include "thread_pool.h"
using
namespace
std
;
using
namespace
sccl
;
void
multiply
(
const
int
a
,
const
int
b
)
{
const
int
res
=
a
*
b
;
printf
(
"%d * %d = %d
\n
"
,
a
,
b
,
res
);
}
void
*
show_id
(
void
*
id
)
{
int
tid
=
*
(
int
*
)
id
;
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
printf
(
"id=%d
\n
"
,
tid
);
}
return
(
void
*
)
0
;
}
int
main
()
{
ThreadPool
thread_pool
(
30
);
for
(
int
i
=
1
;
i
<
3
;
++
i
)
{
for
(
int
j
=
1
;
j
<
10
;
++
j
)
{
thread_pool
.
enqueue
(
multiply
,
i
,
j
);
}
}
return
0
;
}
examples/1_connection/1_rdma_comm/1_simple.cpp
View file @
a4ac3320
...
...
@@ -11,14 +11,15 @@ int main(int argc, char* argv[]) {
// printf("device num=%d\n", n_ib);
// ----------------------------------------------------------------------- //
auto
scclNet
=
sccl
::
hardware
::
net
::
initNet
(
sccl
::
hardware
::
net
::
NET_IB
)
;
// auto scclNet =
sccl::hardware::net::
initNet(sccl::hardware::net::NET_SOCKET
);
sccl
::
hardware
::
net
::
scclNet
Properties_t
props
;
sccl
::
hardware
::
net
::
scclNet_t
*
scclNet
;
sccl
::
hardware
::
net
::
scclNetInit
(
"IB"
,
scclNet
);
//
sccl::hardware::net::scclNet
Init("Socket", scclNet)
;
scclNet
->
init
();
int
n_ib
;
scclNet
->
devices
(
&
n_ib
);
printf
(
"device num=%d
\n
"
,
n_ib
);
sccl
::
hardware
::
net
::
scclNetProperties_t
props
;
scclNet
->
getProperties
(
0
,
&
props
);
printf
(
"device name=%s
\n
"
,
props
.
name
);
...
...
examples/1_connection/1_rdma_comm/2_mpi_get.cpp
View file @
a4ac3320
...
...
@@ -55,6 +55,8 @@ int main(int argc, char* argv[]) {
#if 0
{
sccl::hardware::net::scclNet_t* scclNet;
sccl::hardware::net::scclNetInit("Socket", scclNet);
auto scclNet = sccl::hardware::net::initNet(sccl::hardware::net::NET_SOCKET);
sccl::hardware::net::scclNetProperties_t props;
...
...
@@ -84,7 +86,8 @@ int main(int argc, char* argv[]) {
#endif
#if 1
{
auto
scclNet
=
sccl
::
hardware
::
net
::
initNet
(
sccl
::
hardware
::
net
::
NET_IB
);
sccl
::
hardware
::
net
::
scclNet_t
*
scclNet
;
sccl
::
hardware
::
net
::
scclNetInit
(
"IB"
,
scclNet
);
sccl
::
hardware
::
net
::
scclNetProperties_t
props
;
int
n_ib
;
...
...
examples/1_connection/1_rdma_comm/compile_mpi.sh
View file @
a4ac3320
hipcc ./2_mpi_get.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
device
/ibvsymbols.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
device
/ibvwrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
device
/net_ib.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
hos
t/socket.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
hos
t/net_socket.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
net_ib
/ibvsymbols.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
net_ib
/ibvwrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
net_ib
/net_ib.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
net_socke
t/socket.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
net_socke
t/net_socket.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/rocm_wrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/param.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net.cpp
\
-o
2_mpi_get
\
-std
=
c++17
-g
-O3
-fopenmp
-DROC_SHMEM
-D__HIP_PLATFORM_HCC__
\
-std
=
c++17
-g
-O3
-fopenmp
-DROC_SHMEM
-D__HIP_PLATFORM_HCC__
-Wno-return-type
\
-I
./
-I
/usr/include
-I
/opt/dtk/include
\
-I
/public/home/lishen/Code/rocSHMEM/3rd_party/install/ompi/include/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/include/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
device
/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
hos
t/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
net_ib
/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
net_socke
t/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/
\
-L
/public/home/lishen/Code/rocSHMEM/SCCL_v1
\
-L
/usr/lib/x86_64-linux-gnu
-libverbs
-lrdmacm
\
-L
/public/home/lishen/Code/rocSHMEM/3rd_party/install/ompi/lib
-lmpi
examples/1_connection/1_rdma_comm/compile_simple.sh
View file @
a4ac3320
hipcc ./1_simple.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/device/ibvsymbols.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/device/ibvwrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/device/net_ib.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/host/socket.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/host/net_socket.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/ibvsymbols.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/ibvwrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/net_ib.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_socket/socket.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_utils.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/rocm_wrap.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/param.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net.cpp
\
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_socket/net_socket.cpp
\
-o
1_simple
\
-std
=
c++17
-g
-O3
-fopenmp
-DROC_SHMEM
-D__HIP_PLATFORM_HCC__
\
-std
=
c++17
-g
-O3
-fopenmp
-DROC_SHMEM
-D__HIP_PLATFORM_HCC__
-Wno-return-type
\
-I
./
-I
/usr/include
-I
/opt/dtk/include
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/include/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
device
/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
hos
t/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
net_ib
/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
net_socke
t/
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/
\
-L
/public/home/lishen/Code/rocSHMEM/SCCL_v1
\
-L
/usr/lib/x86_64-linux-gnu
-libverbs
-lrdmacm
examples/1_connection/2_ipc_socket_simple/1_socket_client.cpp
0 → 100644
View file @
a4ac3320
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#define SOCKET_PATH "/tmp/socket.domain"
void
err_quit
(
const
char
*
estr
)
{
perror
(
estr
);
exit
(
-
1
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
char
buf
[
1024
]
=
{
0
};
// 用于存储从服务器读取的数据或要发送给服务器的数据
int
sockfd
=
-
1
;
// 用于存储创建的套接字文件描述符
int
rv
=
-
1
;
// 用于存储函数返回值
struct
sockaddr_un
servaddr
;
// 用于存储服务器地址信息
socklen_t
addrlen
=
sizeof
(
servaddr
);
// 服务器地址长度
// 创建UNIX域套接字
if
((
sockfd
=
socket
(
AF_UNIX
,
SOCK_STREAM
,
0
))
<
0
)
{
err_quit
(
"socket create failure"
);
// 如果创建失败,调用err_quit函数并传递错误信息
}
printf
(
"Create sockfd[%d] ok
\n
"
,
sockfd
);
// 打印创建的套接字文件描述符
bzero
(
&
servaddr
,
sizeof
(
servaddr
));
// 将服务器地址结构体清零
servaddr
.
sun_family
=
AF_UNIX
;
// 设置地址族为UNIX
strncpy
(
servaddr
.
sun_path
,
SOCKET_PATH
,
sizeof
(
servaddr
.
sun_path
)
-
1
);
// 设置服务器地址路径
// 连接到服务器
if
(
connect
(
sockfd
,
(
struct
sockaddr
*
)
&
servaddr
,
addrlen
)
<
0
)
printf
(
"Connect to unix domain socket server on
\"
%s
\"
failure:%s
\n
"
,
SOCKET_PATH
,
strerror
(
errno
));
// 如果连接失败,打印错误信息
printf
(
"connect unix domain socket
\"
%s
\"
ok!
\n
"
,
SOCKET_PATH
);
// 打印连接成功信息
fgets
(
buf
,
sizeof
(
buf
),
stdin
);
// 从标准输入读取数据
// 向服务器写入数据
if
((
rv
=
write
(
sockfd
,
buf
,
strlen
(
buf
)))
<
0
)
{
printf
(
"Write to server failure:%s
\n
"
,
strerror
(
errno
));
// 如果写入失败,打印错误信息
close
(
sockfd
);
// 关闭套接字
return
-
1
;
// 返回错误码
}
printf
(
"Actually write %d bytes data to server:%s
\n
"
,
rv
-
1
,
buf
);
// 打印实际写入的数据量和数据内容
bzero
(
&
buf
,
sizeof
(
buf
));
// 清空缓冲区
printf
(
"start read
\n
"
);
// 打印开始读取信息
// 从服务器读取数据
if
((
rv
=
read
(
sockfd
,
buf
,
sizeof
(
buf
)))
<
0
)
{
printf
(
"Read to server failure:%s
\n
"
,
strerror
(
errno
));
// 如果读取失败,打印错误信息
close
(
sockfd
);
// 关闭套接字
return
-
1
;
// 返回错误码
}
else
if
(
0
==
rv
)
{
printf
(
"socket connet disconnected
\n
"
);
// 如果连接断开,打印断开信息
close
(
sockfd
);
// 关闭套接字
return
-
3
;
// 返回错误码
}
printf
(
"Read %d bytes data from server:%s
\n
"
,
rv
-
1
,
buf
);
// 打印实际读取的数据量和数据内容
close
(
sockfd
);
// 关闭套接字
return
0
;
// 返回成功码
}
examples/1_connection/2_ipc_socket_simple/1_socket_server.cpp
0 → 100644
View file @
a4ac3320
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#define SOCKET_PATH "/tmp/socket.domain"
void
err_quit
(
const
char
*
estr
)
{
perror
(
estr
);
exit
(
-
1
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
char
buf
[
1024
];
// 用于存储从客户端读取的数据
int
i
;
int
listen_fd
=
-
1
;
// 监听套接字
int
client_fd
;
// 客户端套接字
int
rv
=
-
1
;
// 读取返回值
struct
sockaddr_un
servaddr
;
// 服务器地址结构
struct
sockaddr_un
cliaddr
;
// 客户端地址结构
socklen_t
addrlen
=
sizeof
(
servaddr
);
// 地址长度
// 创建Unix域套接字
listen_fd
=
socket
(
AF_UNIX
,
SOCK_STREAM
,
0
);
if
(
listen_fd
<
0
)
{
err_quit
(
"socket create fail"
);
// 创建套接字失败
}
printf
(
"create sockfd[%d] ok!
\n
"
,
listen_fd
);
// 创建套接字成功
// 如果套接字路径已存在,则删除
if
(
!
access
(
SOCKET_PATH
,
F_OK
))
{
remove
(
SOCKET_PATH
);
}
// 清零服务器地址结构
bzero
(
&
servaddr
,
addrlen
);
servaddr
.
sun_family
=
AF_UNIX
;
// 设置地址族为Unix域
strncpy
(
servaddr
.
sun_path
,
SOCKET_PATH
,
sizeof
(
servaddr
.
sun_path
));
// 设置套接字路径
// 绑定套接字到地址
if
(
bind
(
listen_fd
,
(
struct
sockaddr
*
)
&
servaddr
,
addrlen
)
<
0
)
{
printf
(
"Create socket failure:%s
\n
"
,
strerror
(
errno
));
// 绑定失败
unlink
(
SOCKET_PATH
);
return
-
1
;
}
// 监听套接字
listen
(
listen_fd
,
13
);
// 无限循环等待客户端连接
while
(
1
)
{
printf
(
"Start waiting and accept new client connect......
\n
"
);
// 等待客户端连接
client_fd
=
accept
(
listen_fd
,
(
struct
sockaddr
*
)
&
cliaddr
,
&
addrlen
);
// 接受客户端连接
if
(
client_fd
<
0
)
{
printf
(
"Accept new client failure:%s
\n
"
,
strerror
(
errno
));
// 接受连接失败
return
-
2
;
}
memset
(
buf
,
0
,
sizeof
(
buf
));
// 清空缓冲区
// 从客户端读取数据
if
((
rv
=
read
(
client_fd
,
buf
,
sizeof
(
buf
)))
<
0
)
{
printf
(
"Read from client[%d] failure:%s
\n
"
,
client_fd
,
strerror
(
errno
));
// 读取失败
close
(
client_fd
);
continue
;
}
else
if
(
rv
==
0
)
{
printf
(
"socket connet disconneted
\n
"
);
// 客户端断开连接
close
(
client_fd
);
continue
;
}
printf
(
"Read massage from client[%d]:%s
\n
"
,
listen_fd
,
buf
);
// 打印从客户端读取的消息
// 将读取的数据转换为大写
for
(
i
=
0
;
i
<
rv
;
i
++
)
{
buf
[
i
]
=
toupper
(
buf
[
i
]);
}
// 向客户端写入数据
if
(
write
(
client_fd
,
buf
,
rv
)
<
0
)
{
printf
(
"Write to client[%d] failure:%s
\n
"
,
client_fd
,
strerror
(
errno
));
// 写入失败
close
(
client_fd
);
continue
;
}
printf
(
"Write %d bytes data to client[%d]
\n
"
,
rv
-
1
,
client_fd
);
// 打印写入客户端的数据量
close
(
client_fd
);
// 关闭客户端套接字
sleep
(
1
);
// 暂停1秒
}
close
(
listen_fd
);
// 关闭监听套接字
}
examples/1_connection/2_ipc_socket_simple/2_socket_client.cpp
0 → 100644
View file @
a4ac3320
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define SOCKET_PATH "/tmp/unix_domain_socket"
void
err_quit
(
const
char
*
msg
)
{
perror
(
msg
);
exit
(
1
);
}
int
main
()
{
int
sockfd
,
send_fd
;
struct
sockaddr_un
servaddr
;
struct
iovec
iov
;
struct
msghdr
msg
;
char
buf
[
1024
];
char
ctrl_buf
[
CMSG_SPACE
(
sizeof
(
int
))];
struct
cmsghdr
*
cmsg
;
// 创建Unix域数据报套接字
if
((
sockfd
=
socket
(
AF_UNIX
,
SOCK_DGRAM
,
0
))
<
0
)
err_quit
(
"socket creation failed"
);
printf
(
"sockfd=%d
\n
"
,
sockfd
);
// 清空服务器地址结构
memset
(
&
servaddr
,
0
,
sizeof
(
servaddr
));
servaddr
.
sun_family
=
AF_UNIX
;
strncpy
(
servaddr
.
sun_path
,
SOCKET_PATH
,
sizeof
(
servaddr
.
sun_path
)
-
1
);
// 准备发送的消息
strcpy
(
buf
,
"Hello, Server!"
);
iov
.
iov_base
=
buf
;
iov
.
iov_len
=
strlen
(
buf
)
+
1
;
msg
.
msg_name
=
&
servaddr
;
msg
.
msg_namelen
=
sizeof
(
servaddr
);
msg
.
msg_iov
=
&
iov
;
msg
.
msg_iovlen
=
1
;
msg
.
msg_control
=
ctrl_buf
;
msg
.
msg_controllen
=
sizeof
(
ctrl_buf
);
// 添加文件描述符到辅助数据
cmsg
=
CMSG_FIRSTHDR
(
&
msg
);
cmsg
->
cmsg_level
=
SOL_SOCKET
;
cmsg
->
cmsg_type
=
SCM_RIGHTS
;
cmsg
->
cmsg_len
=
CMSG_LEN
(
sizeof
(
int
));
send_fd
=
dup
(
STDOUT_FILENO
);
// 示例:发送标准输出的文件描述符
memmove
(
CMSG_DATA
(
cmsg
),
&
send_fd
,
sizeof
(
send_fd
));
// 发送消息
if
(
sendmsg
(
sockfd
,
&
msg
,
0
)
<
0
)
{
if
(
errno
==
ECONNREFUSED
)
{
printf
(
"sendmsg failed: Transport endpoint is not connected. Make sure the server is running.
\n
"
);
}
else
{
err_quit
(
"sendmsg failed"
);
}
}
else
{
printf
(
"Message sent to server.
\n
"
);
}
/*
printf("111\n");
// 接收响应
struct sockaddr_un from;
socklen_t fromlen = sizeof(from);
memset(buf, 0, sizeof(buf));
iov.iov_base = buf;
iov.iov_len = sizeof(buf);
msg.msg_name = &from;
msg.msg_namelen = fromlen;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
printf("222\n");
if(recvmsg(sockfd, &msg, 0) < 0) {
err_quit("recvmsg failed");
}
printf("333\n");
printf("Received response from server: %s\n", buf);
*/
close
(
sockfd
);
return
0
;
}
\ No newline at end of file
examples/1_connection/2_ipc_socket_simple/2_socket_server.cpp
0 → 100644
View file @
a4ac3320
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define SOCKET_PATH "/tmp/unix_domain_socket"
void
err_quit
(
const
char
*
msg
)
{
perror
(
msg
);
exit
(
1
);
}
/**
* @brief Unix域套接字服务器示例
*
* 该程序实现了一个简单的Unix域数据报套接字服务器,用于接收消息和文件描述符。
* 主要功能包括:
* 1. 创建并绑定Unix域套接字
* 2. 循环接收客户端消息和文件描述符
* 3. 处理接收到的文件描述符(关闭)
* 4. 清理套接字资源
*
* @note 当前实现中发送响应部分被注释掉了
* @warning 程序使用goto语句进行资源清理,需注意流程控制
*/
int
main
()
{
int
sockfd
;
struct
sockaddr_un
servaddr
,
cliaddr
;
struct
iovec
iov
;
struct
msghdr
msg
;
char
buf
[
1024
];
char
ctrl_buf
[
CMSG_SPACE
(
sizeof
(
int
))];
struct
cmsghdr
*
cmsg
;
int
received_fd
;
// 创建Unix域数据报套接字
if
((
sockfd
=
socket
(
AF_UNIX
,
SOCK_DGRAM
,
0
))
<
0
)
err_quit
(
"socket creation failed"
);
// 清空服务器地址结构
memset
(
&
servaddr
,
0
,
sizeof
(
servaddr
));
unlink
(
SOCKET_PATH
);
servaddr
.
sun_family
=
AF_UNIX
;
strncpy
(
servaddr
.
sun_path
,
SOCKET_PATH
,
sizeof
(
servaddr
.
sun_path
)
-
1
);
// 绑定套接字到地址
if
(
bind
(
sockfd
,
(
struct
sockaddr
*
)
&
servaddr
,
sizeof
(
servaddr
))
<
0
)
err_quit
(
"bind failed"
);
printf
(
"Server is waiting for connections... sockfd=%d
\n
"
,
sockfd
);
while
(
1
)
{
// 清空缓冲区和消息头
memset
(
buf
,
0
,
sizeof
(
buf
));
memset
(
&
msg
,
0
,
sizeof
(
msg
));
iov
.
iov_base
=
buf
;
iov
.
iov_len
=
sizeof
(
buf
);
msg
.
msg_name
=
&
cliaddr
;
msg
.
msg_namelen
=
sizeof
(
cliaddr
);
msg
.
msg_iov
=
&
iov
;
msg
.
msg_iovlen
=
1
;
msg
.
msg_control
=
ctrl_buf
;
msg
.
msg_controllen
=
sizeof
(
ctrl_buf
);
// 接收消息
if
(
recvmsg
(
sockfd
,
&
msg
,
0
)
<
0
)
err_quit
(
"recvmsg failed"
);
printf
(
"Received message: %s
\n
"
,
buf
);
// 处理辅助数据
for
(
cmsg
=
CMSG_FIRSTHDR
(
&
msg
);
cmsg
!=
NULL
;
cmsg
=
CMSG_NXTHDR
(
&
msg
,
cmsg
))
{
if
(
cmsg
->
cmsg_level
==
SOL_SOCKET
&&
cmsg
->
cmsg_type
==
SCM_RIGHTS
)
{
memmove
(
&
received_fd
,
CMSG_DATA
(
cmsg
),
sizeof
(
received_fd
));
printf
(
"Received file descriptor: %d
\n
"
,
received_fd
);
close
(
received_fd
);
// 关闭接收到的文件描述符
}
goto
final
;
}
/*
printf("111\n");
// 发送响应
strcpy(buf, "Message received");
printf("222\n");
if(sendmsg(sockfd, &msg, 0) < 0) {
printf("333\n");
err_quit("sendmsg failed");
}
printf("444\n");
*/
}
final:
close
(
sockfd
);
unlink
(
SOCKET_PATH
);
return
0
;
}
\ No newline at end of file
examples/1_connection/
3
_socket_
comm
/compile.sh
→
examples/1_connection/
2_ipc
_socket_
simple
/compile
_client1
.sh
View file @
a4ac3320
hipcc ./test_socket_itf.cpp
\
./socket.cpp
\
-o
test_socket_itf
\
hipcc ./1_socket_client.cpp
\
-o
1_socket_client
\
-std
=
c++17
--offload-arch
=
gfx936
-g
-O3
-fopenmp
-D__HIP_PLATFORM_HCC__
\
-I
./
\
-I
/usr/include
\
-I
/opt/dtk/include
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/
\
-L
/usr/lib/x86_64-linux-gnu
-lpthread
-lrt
\ No newline at end of file
-L
/usr/lib/x86_64-linux-gnu
-lpthread
-lrt
examples/1_connection/2_ipc_socket_simple/compile_client2.sh
0 → 100644
View file @
a4ac3320
hipcc ./2_socket_client.cpp
\
-o
2_socket_client
\
-std
=
c++17
--offload-arch
=
gfx936
-g
-O3
-fopenmp
-D__HIP_PLATFORM_HCC__
\
-I
./
\
-I
/usr/include
\
-I
/opt/dtk/include
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/
\
-L
/usr/lib/x86_64-linux-gnu
-lpthread
-lrt
examples/1_connection/2_ipc_socket_simple/compile_server1.sh
0 → 100644
View file @
a4ac3320
hipcc ./1_socket_server.cpp
\
-o
1_socket_server
\
-std
=
c++17
--offload-arch
=
gfx936
-g
-O3
-fopenmp
-D__HIP_PLATFORM_HCC__
\
-I
./
\
-I
/usr/include
\
-I
/opt/dtk/include
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/
\
-L
/usr/lib/x86_64-linux-gnu
-lpthread
-lrt
examples/1_connection/2_ipc_socket_simple/compile_server2.sh
0 → 100644
View file @
a4ac3320
hipcc ./2_socket_server.cpp
\
-o
2_socket_server
\
-std
=
c++17
--offload-arch
=
gfx936
-g
-O3
-fopenmp
-D__HIP_PLATFORM_HCC__
\
-I
./
\
-I
/usr/include
\
-I
/opt/dtk/include
\
-I
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/
\
-L
/usr/lib/x86_64-linux-gnu
-lpthread
-lrt
# g++ -std=c++11 -E - < /dev/null
# g++ -std=c++14 -E - < /dev/null
# g++ -std=c++17 -E - < /dev/null
# g++ -std=c++20 -E - < /dev/null
examples/1_connection/3_sccl_ipc_socket/1_socket_mpi_fd.cpp
0 → 100644
View file @
a4ac3320
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <vector> // 引入vector库
#include "mpi.h"
#include "net.h"
#include "ipc_socket.h"
/*
通过Unix域套接字发送和接收文件描述符的接口通常在需要在不同进程之间共享文件或网络连接时使用。
这种情况常见于需要高效通信和资源共享的高性能计算(HPC)环境、服务器进程、或者需要在进程间传递复杂数据结构的应用中。
例如,一个进程可能打开了一个到远程服务器的网络连接,然后需要将这个连接传递给另一个进程来处理后续的通信。
使用场景
- 高性能计算(HPC)和并行计算:在使用
MPI(消息传递接口)进行并行计算时,不同的进程可能需要访问相同的文件或网络资源。通过发送文件描述符,可以避免多个进程重复打开相同的文件或建立相同的网络连接,从而节省资源和时间。
-
服务器架构:在一个主进程接受客户端连接,然后将这些连接传递给工作进程处理的服务器架构中,文件描述符传递是一个有效的策略。这允许主进程继续接受新的连接,而不被单个连接的处理所阻塞。
- 插件或模块化系统:在一个进程中加载的插件可能需要访问主进程已经打开的文件或网络连接。通过文件描述符传递,插件可以直接使用这些资源,而无需重新打开或建立连接。
*/
using
namespace
sccl
;
typedef
class
sccl
::
hardware
::
net
::
ipc_socket
::
scclIpcSocket
scclIpcSocket_t
;
int
ipcSendRecvFd_nrank2
(
int
argc
,
char
*
argv
[])
{
MPI_Init
(
&
argc
,
&
argv
);
int
rank
;
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
int
dst_hash
=
12345
;
scclIpcSocket_t
ipcsocket
(
rank
,
dst_hash
);
if
(
rank
==
0
)
{
// 进程 0: 打开文件并发送文件描述符
int
fd
=
open
(
"testfile.txt"
,
O_RDONLY
);
if
(
fd
<
0
)
{
perror
(
"Failed to open file"
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
ipcsocket
.
scclIpcSocketSendFd
(
fd
,
1
,
12345
);
// 假设 dst_hash 为 12345
close
(
fd
);
}
else
if
(
rank
==
1
)
{
// 进程 1: 接收文件描述符并读取文件内容
int
fd
;
ipcsocket
.
scclIpcSocketRecvFd
(
&
fd
);
char
buffer
[
256
];
ssize_t
n
=
read
(
fd
,
buffer
,
sizeof
(
buffer
)
-
1
);
if
(
n
>
0
)
{
buffer
[
n
]
=
'\0'
;
printf
(
"Process %d received: %s
\n
"
,
rank
,
buffer
);
}
close
(
fd
);
}
MPI_Finalize
();
return
0
;
}
int
main
(
int
argc
,
char
*
argv
[])
{
MPI_Init
(
&
argc
,
&
argv
);
int
rank
,
size
;
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
size
);
int
dst_hash
=
12345
;
scclIpcSocket_t
ipcsocket
(
rank
,
dst_hash
);
if
(
rank
==
0
)
{
// 进程 0: 打开文件并发送文件描述符给所有其他进程
int
fd
=
open
(
"testfile.txt"
,
O_RDONLY
);
if
(
fd
<
0
)
{
perror
(
"Failed to open file"
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
for
(
int
i
=
1
;
i
<
size
;
++
i
)
{
if
(
ipcsocket
.
scclIpcSocketSendFd
(
fd
,
i
,
dst_hash
)
!=
scclSuccess
)
{
perror
(
"Failed to send file descriptor"
);
close
(
fd
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
}
close
(
fd
);
}
else
{
// 其他进程: 接收文件描述符并读取文件内容
int
fd
;
if
(
ipcsocket
.
scclIpcSocketRecvFd
(
&
fd
)
<
0
)
{
perror
(
"Failed to receive file descriptor"
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
char
buffer
[
256
];
ssize_t
n
=
read
(
fd
,
buffer
,
sizeof
(
buffer
)
-
1
);
if
(
n
>
0
)
{
buffer
[
n
]
=
'\0'
;
printf
(
"Process %d received: %s
\n
"
,
rank
,
buffer
);
}
close
(
fd
);
}
MPI_Finalize
();
return
0
;
}
/*
单机执行
SCCL_DEBUG_LEVEL=ABORT SCCL_DEBUG_SUBSYS=BOOTSTRAP mpirun --allow-run-as-root -np 4 1_socket_mpi_fd
*/
examples/1_connection/3_sccl_ipc_socket/2_socket_mpi_fd_pthpool.cpp
0 → 100644
View file @
a4ac3320
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <vector> // 引入vector库
#include <thread> // 为了使用 std::this_thread::sleep_for
#include "mpi.h"
#include "net.h"
#include "ipc_socket.h"
#include "thread_pool.h"
/*
通过Unix域套接字发送和接收文件描述符的接口通常在需要在不同进程之间共享文件或网络连接时使用。
这种情况常见于需要高效通信和资源共享的高性能计算(HPC)环境、服务器进程、或者需要在进程间传递复杂数据结构的应用中。
例如,一个进程可能打开了一个到远程服务器的网络连接,然后需要将这个连接传递给另一个进程来处理后续的通信。
使用场景
- 高性能计算(HPC)和并行计算:在使用
MPI(消息传递接口)进行并行计算时,不同的进程可能需要访问相同的文件或网络资源。通过发送文件描述符,可以避免多个进程重复打开相同的文件或建立相同的网络连接,从而节省资源和时间。
-
服务器架构:在一个主进程接受客户端连接,然后将这些连接传递给工作进程处理的服务器架构中,文件描述符传递是一个有效的策略。这允许主进程继续接受新的连接,而不被单个连接的处理所阻塞。
- 插件或模块化系统:在一个进程中加载的插件可能需要访问主进程已经打开的文件或网络连接。通过文件描述符传递,插件可以直接使用这些资源,而无需重新打开或建立连接。
*/
using
namespace
sccl
;
typedef
class
sccl
::
hardware
::
net
::
ipc_socket
::
scclIpcSocket
scclIpcSocket_t
;
template
<
typename
T
>
void
send_fd
(
T
*
ipcsocket
,
int
fd
,
int
dst_rank
)
{
if
(
ipcsocket
->
scclIpcSocketSendFd
(
fd
,
dst_rank
)
!=
scclSuccess
)
{
perror
(
"Failed to send file descriptor"
);
close
(
fd
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
MPI_Init
(
&
argc
,
&
argv
);
int
rank
,
size
;
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
size
);
int
dst_hash
=
12345
;
scclIpcSocket_t
ipcsocket
(
rank
,
size
,
dst_hash
);
if
(
rank
==
0
)
{
// 进程 0: 打开文件并发送文件描述符给所有其他进程
int
fd
=
open
(
"testfile.txt"
,
O_RDONLY
);
if
(
fd
<
0
)
{
perror
(
"Failed to open file"
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
auto
pthpool
=
ThreadPool
(
8
);
for
(
int
i
=
1
;
i
<
size
;
++
i
)
{
auto
task
=
std
::
bind
(
send_fd
<
scclIpcSocket_t
>
,
&
ipcsocket
,
fd
,
i
);
pthpool
.
enqueue
(
task
);
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
seconds
(
5
));
close
(
fd
);
}
else
{
// 其他进程: 接收文件描述符并读取文件内容
int
fd
;
if
(
ipcsocket
.
scclIpcSocketRecvFd
(
&
fd
)
<
0
)
{
perror
(
"Failed to receive file descriptor"
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
char
buffer
[
256
];
ssize_t
n
=
read
(
fd
,
buffer
,
sizeof
(
buffer
)
-
1
);
if
(
n
>
0
)
{
buffer
[
n
]
=
'\0'
;
printf
(
"Process %d received: %s
\n
"
,
rank
,
buffer
);
}
close
(
fd
);
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
seconds
(
10
));
MPI_Finalize
();
return
0
;
}
/*
单机执行
SCCL_DEBUG_LEVEL=ABORT SCCL_DEBUG_SUBSYS=BOOTSTRAP mpirun --allow-run-as-root -np 4 2_socket_mpi_fd_pthpool
*/
examples/1_connection/3_sccl_ipc_socket/3_socket_mpi_data.cpp
0 → 100644
View file @
a4ac3320
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <vector> // 引入vector库
#include <thread> // 为了使用 std::this_thread::sleep_for
#include "mpi.h"
#include "net.h"
#include "ipc_socket.h"
#include "thread_pool.h"
using
namespace
sccl
;
typedef
class
sccl
::
hardware
::
net
::
ipc_socket
::
scclIpcSocket
scclIpcSocket_t
;
template
<
typename
T
>
void
send_data
(
T
*
ipcsocket
,
const
void
*
data
,
size_t
dataLen
,
int
dst_rank
)
{
if
(
ipcsocket
->
scclIpcSocketSendData
(
data
,
dataLen
,
dst_rank
)
!=
scclSuccess
)
{
perror
(
"Failed to send data"
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
}
template
<
typename
T
>
void
recv_data
(
T
*
ipcsocket
,
void
*
buffer
,
size_t
bufferLen
,
size_t
*
receivedLen
)
{
if
(
ipcsocket
->
scclIpcSocketRecvData
(
buffer
,
bufferLen
,
receivedLen
)
!=
scclSuccess
)
{
perror
(
"Failed to receive data"
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
}
template
<
typename
T
>
int
test_allgather_ver1
(
T
*
ipcsocket
,
int
rank
,
int
size
)
{
int
sendDataLen
=
256
;
std
::
vector
<
char
>
sendData
(
sendDataLen
);
std
::
vector
<
char
>
recvData
(
size
*
sendDataLen
);
size_t
receivedLen
;
// 填充发送数据
snprintf
(
sendData
.
data
(),
sendData
.
size
(),
"Data from process %d"
,
rank
);
auto
pthpool
=
ThreadPool
(
size
*
2
);
// 发送数据给所有其他进程
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
if
(
i
!=
rank
)
{
auto
task_send
=
std
::
bind
(
send_data
<
scclIpcSocket_t
>
,
ipcsocket
,
sendData
.
data
(),
sendData
.
size
(),
i
);
pthpool
.
enqueue
(
task_send
);
auto
task_recv
=
std
::
bind
(
recv_data
<
scclIpcSocket_t
>
,
ipcsocket
,
recvData
.
data
()
+
i
*
sendDataLen
,
sendDataLen
,
&
receivedLen
);
pthpool
.
enqueue
(
task_recv
);
}
}
printf
(
"sendData.size()=%d, receivedLen=%d
\n
"
,
sendDataLen
,
int
(
receivedLen
));
// 打印接收到的数据
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
printf
(
"Process %d received from process %d: %s
\n
"
,
rank
,
i
,
recvData
.
data
()
+
i
*
256
);
}
return
0
;
}
template
<
typename
T
>
int
test_allgather_ver2
(
T
*
ipcsocket
,
int
rank
,
int
size
)
{
int
sendDataLen
=
256
;
std
::
vector
<
char
>
sendData
(
sendDataLen
);
std
::
vector
<
char
>
recvData
(
size
*
sendDataLen
);
// 填充发送数据
snprintf
(
sendData
.
data
(),
sendData
.
size
(),
"Data from process %d"
,
rank
);
SCCLCHECK
(
ipcsocket
->
scclIpcSocketAllgatherSync
(
sendData
.
data
(),
recvData
.
data
(),
sendData
.
size
(),
/*wait*/
true
));
// 打印接收到的数据
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
printf
(
"rank %d received from process %d: %s
\n
"
,
rank
,
i
,
recvData
.
data
()
+
i
*
sendData
.
size
());
}
return
0
;
}
template
<
typename
T
>
int
test_allgather_ver3
(
T
*
ipcsocket
,
int
rank
,
int
size
)
{
int
sendDataLen
=
256
;
std
::
vector
<
char
>
sendData
(
sendDataLen
);
std
::
vector
<
char
>
recvData
(
size
*
sendDataLen
);
// 填充发送数据
snprintf
(
sendData
.
data
(),
sendData
.
size
(),
"Data from process %d"
,
rank
);
SCCLCHECK
(
ipcsocket
->
scclIpcSocketAllgather
(
sendData
.
data
(),
recvData
.
data
(),
sendData
.
size
()));
// 打印接收到的数据
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
printf
(
"rank %d received from process %d: %s
\n
"
,
rank
,
i
,
recvData
.
data
()
+
i
*
sendData
.
size
());
}
return
0
;
}
template
<
typename
T
>
int
test_broadcast_ver1
(
T
*
ipcsocket
,
int
rank
,
int
size
)
{
int
sendDataLen
=
256
;
std
::
vector
<
char
>
sendData
(
sendDataLen
);
std
::
vector
<
char
>
recvData
(
sendDataLen
);
int
root
=
0
;
// 假设 rank 0 是根进程
if
(
rank
==
root
)
{
// 仅根进程填充发送数据
snprintf
(
sendData
.
data
(),
sendData
.
size
(),
"Data from root process %d"
,
rank
);
}
SCCLCHECK
(
ipcsocket
->
scclIpcSocketBroadcast
(
sendData
.
data
(),
recvData
.
data
(),
sendData
.
size
(),
root
,
/*wait*/
true
));
// 打印接收到的数据
printf
(
"rank %d received: %s
\n
"
,
rank
,
recvData
.
data
());
return
0
;
}
int
main
(
int
argc
,
char
*
argv
[])
{
MPI_Init
(
&
argc
,
&
argv
);
int
rank
,
size
;
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
size
);
int
dst_hash
=
12345
;
scclIpcSocket_t
*
ipcsocket
=
new
scclIpcSocket_t
(
rank
,
size
,
dst_hash
);
// test_allgather_ver1(ipcsocket, rank, size);
// test_allgather_ver2(ipcsocket, rank, size);
// test_allgather_ver3(ipcsocket, rank, size);
test_broadcast_ver1
(
ipcsocket
,
rank
,
size
);
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
seconds
(
10
));
// while(!ipcsocket->getPthreadPool()->allTasksCompleted()) {}
// printf("delete ipcsocket... rank=%d\n", rank);
delete
(
ipcsocket
);
MPI_Finalize
();
return
0
;
}
/*
单机执行
SCCL_DEBUG_LEVEL=ABORT SCCL_DEBUG_SUBSYS=BOOTSTRAP mpirun --allow-run-as-root -np 8 3_socket_mpi_data
*/
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment