Commit a4ac3320 authored by lishen's avatar lishen
Browse files

通过线程池实现ipcsocket,满足节点内通信

parent d9d23f34
#include <iostream>
#include <vector>
#include <queue>
#include <pthread.h>
#include <functional>
#include <future>
#include <memory>
#include <thread> // 为了使用 std::this_thread::sleep_for
#include "thread_pool.h"
// 模拟一个需要时间的操作
int simulate_long_task(int id, int duration) {
std::cout << "Task " << id << " is running..." << std::endl;
std::this_thread::sleep_for(std::chrono::seconds(duration));
std::cout << "Task " << id << " completed." << std::endl;
return id + 10;
}
int main() {
ThreadPool pool(4);
std::future<int> task_res[10];
// 添加多个任务到线程池
for(int i = 0; i < 10; ++i) {
task_res[i] = pool.enqueue(simulate_long_task, i, std::rand() % 3 + 1);
}
for(int i = 0; i < 10; ++i) {
std::cout << "final " << i << " get:" << task_res[i].get() << "." << std::endl;
}
// 等待所有任务完成
std::this_thread::sleep_for(std::chrono::seconds(10));
return 0;
}
\ No newline at end of file
#include "thread_pool.h"
ThreadPool::ThreadPool(size_t threads) : stop(false) {
pthread_mutex_init(&queue_mutex, nullptr);
pthread_cond_init(&condition, nullptr);
for(size_t i = 0; i < threads; ++i) {
pthread_t worker;
pthread_create(&worker, nullptr, ThreadPool::run, this);
workers.push_back(worker);
}
}
ThreadPool::~ThreadPool() {
{
pthread_mutex_lock(&queue_mutex);
stop = true;
pthread_mutex_unlock(&queue_mutex);
pthread_cond_broadcast(&condition);
}
for(size_t i = 0; i < workers.size(); ++i) {
pthread_join(workers[i], nullptr);
}
pthread_mutex_destroy(&queue_mutex);
pthread_cond_destroy(&condition);
}
void* ThreadPool::run(void* arg) {
ThreadPool* pool = static_cast<ThreadPool*>(arg);
while(true) {
std::function<void()> task;
{
pthread_mutex_lock(&pool->queue_mutex);
while(pool->tasks.empty() && !pool->stop) {
pthread_cond_wait(&pool->condition, &pool->queue_mutex);
}
if(pool->stop && pool->tasks.empty()) {
pthread_mutex_unlock(&pool->queue_mutex);
return nullptr;
}
task = pool->tasks.front();
pool->tasks.pop();
pthread_mutex_unlock(&pool->queue_mutex);
}
task();
}
}
#include <iostream>
#include <vector>
#include <queue>
#include <pthread.h>
#include <functional>
#include <future>
#include <memory>
class ThreadPool {
public:
ThreadPool(size_t);
~ThreadPool();
template <class F, class... Args>
auto enqueue(F&& f, Args&&... args) -> std::future<typename std::result_of<F(Args...)>::type> {
using return_type = typename std::result_of<F(Args...)>::type;
auto task = std::make_shared<std::packaged_task<return_type()>>(std::bind(std::forward<F>(f), std::forward<Args>(args)...));
std::future<return_type> res = task->get_future();
{
pthread_mutex_lock(&queue_mutex);
tasks.push([task]() { (*task)(); });
pthread_mutex_unlock(&queue_mutex);
pthread_cond_signal(&condition);
}
return res;
}
private:
std::vector<pthread_t> workers;
std::queue<std::function<void()>> tasks;
pthread_mutex_t queue_mutex;
pthread_cond_t condition;
bool stop;
static void* run(void* arg);
};
hipcc main.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/thread_pool.cpp \
-o main \
-std=c++17 -g -O3 -fopenmp -D__HIP_PLATFORM_HCC__ -Wno-return-type \
-I ./ -I /usr/include -I /opt/dtk/include \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/include \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/topology/topo \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/thread_pool \
-L /usr/lib/x86_64-linux-gnu \
-L /usr/lib/ \
-lamdhip64 -lrocm_smi64 -pthread
\ No newline at end of file
#include "thread_pool.h"
using namespace std;
using namespace sccl;
void multiply(const int a, const int b) {
const int res = a * b;
printf("%d * %d = %d\n", a, b, res);
}
void* show_id(void* id) {
int tid = *(int*)id;
for(int i = 0; i < 1000; ++i) {
printf("id=%d\n", tid);
}
return (void*)0;
}
int main() {
ThreadPool thread_pool(30);
for(int i = 1; i < 3; ++i) {
for(int j = 1; j < 10; ++j) {
thread_pool.enqueue(multiply, i, j);
}
}
return 0;
}
......@@ -11,14 +11,15 @@ int main(int argc, char* argv[]) {
// printf("device num=%d\n", n_ib);
// ----------------------------------------------------------------------- //
auto scclNet = sccl::hardware::net::initNet(sccl::hardware::net::NET_IB);
// auto scclNet = sccl::hardware::net::initNet(sccl::hardware::net::NET_SOCKET);
sccl::hardware::net::scclNetProperties_t props;
sccl::hardware::net::scclNet_t* scclNet;
sccl::hardware::net::scclNetInit("IB", scclNet);
// sccl::hardware::net::scclNetInit("Socket", scclNet);
scclNet->init();
int n_ib;
scclNet->devices(&n_ib);
printf("device num=%d\n", n_ib);
sccl::hardware::net::scclNetProperties_t props;
scclNet->getProperties(0, &props);
printf("device name=%s\n", props.name);
......
......@@ -55,6 +55,8 @@ int main(int argc, char* argv[]) {
#if 0
{
sccl::hardware::net::scclNet_t* scclNet;
sccl::hardware::net::scclNetInit("Socket", scclNet);
auto scclNet = sccl::hardware::net::initNet(sccl::hardware::net::NET_SOCKET);
sccl::hardware::net::scclNetProperties_t props;
......@@ -84,7 +86,8 @@ int main(int argc, char* argv[]) {
#endif
#if 1
{
auto scclNet = sccl::hardware::net::initNet(sccl::hardware::net::NET_IB);
sccl::hardware::net::scclNet_t* scclNet;
sccl::hardware::net::scclNetInit("IB", scclNet);
sccl::hardware::net::scclNetProperties_t props;
int n_ib;
......
hipcc ./2_mpi_get.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/device/ibvsymbols.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/device/ibvwrap.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/device/net_ib.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/host/socket.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/host/net_socket.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/ibvsymbols.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/ibvwrap.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/net_ib.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_socket/socket.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_socket/net_socket.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_utils.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/rocm_wrap.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/param.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net.cpp \
-o 2_mpi_get \
-std=c++17 -g -O3 -fopenmp -DROC_SHMEM -D__HIP_PLATFORM_HCC__ \
-std=c++17 -g -O3 -fopenmp -DROC_SHMEM -D__HIP_PLATFORM_HCC__ -Wno-return-type \
-I ./ -I /usr/include -I /opt/dtk/include \
-I /public/home/lishen/Code/rocSHMEM/3rd_party/install/ompi/include/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/include/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/device/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/host/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_socket/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/ \
-L /public/home/lishen/Code/rocSHMEM/SCCL_v1 \
-L /usr/lib/x86_64-linux-gnu -libverbs -lrdmacm \
-L /public/home/lishen/Code/rocSHMEM/3rd_party/install/ompi/lib -lmpi
hipcc ./1_simple.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/device/ibvsymbols.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/device/ibvwrap.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/device/net_ib.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/host/socket.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/host/net_socket.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/ibvsymbols.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/ibvwrap.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/net_ib.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_socket/socket.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_utils.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/rocm_wrap.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/param.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net.cpp \
/public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_socket/net_socket.cpp \
-o 1_simple \
-std=c++17 -g -O3 -fopenmp -DROC_SHMEM -D__HIP_PLATFORM_HCC__ \
-std=c++17 -g -O3 -fopenmp -DROC_SHMEM -D__HIP_PLATFORM_HCC__ -Wno-return-type \
-I ./ -I /usr/include -I /opt/dtk/include \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/utils/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/include/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/device/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/host/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_ib/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/net_socket/ \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/hardware/net/ \
-L /public/home/lishen/Code/rocSHMEM/SCCL_v1 \
-L /usr/lib/x86_64-linux-gnu -libverbs -lrdmacm
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#define SOCKET_PATH "/tmp/socket.domain"
void err_quit(const char* estr) {
perror(estr);
exit(-1);
}
int main(int argc, char* argv[]) {
char buf[1024] = {0}; // 用于存储从服务器读取的数据或要发送给服务器的数据
int sockfd = -1; // 用于存储创建的套接字文件描述符
int rv = -1; // 用于存储函数返回值
struct sockaddr_un servaddr; // 用于存储服务器地址信息
socklen_t addrlen = sizeof(servaddr); // 服务器地址长度
// 创建UNIX域套接字
if((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
err_quit("socket create failure"); // 如果创建失败,调用err_quit函数并传递错误信息
}
printf("Create sockfd[%d] ok\n", sockfd); // 打印创建的套接字文件描述符
bzero(&servaddr, sizeof(servaddr)); // 将服务器地址结构体清零
servaddr.sun_family = AF_UNIX; // 设置地址族为UNIX
strncpy(servaddr.sun_path, SOCKET_PATH, sizeof(servaddr.sun_path) - 1); // 设置服务器地址路径
// 连接到服务器
if(connect(sockfd, (struct sockaddr*)&servaddr, addrlen) < 0)
printf("Connect to unix domain socket server on \"%s\" failure:%s\n", SOCKET_PATH, strerror(errno)); // 如果连接失败,打印错误信息
printf("connect unix domain socket \"%s\" ok!\n", SOCKET_PATH); // 打印连接成功信息
fgets(buf, sizeof(buf), stdin); // 从标准输入读取数据
// 向服务器写入数据
if((rv = write(sockfd, buf, strlen(buf))) < 0) {
printf("Write to server failure:%s\n", strerror(errno)); // 如果写入失败,打印错误信息
close(sockfd); // 关闭套接字
return -1; // 返回错误码
}
printf("Actually write %d bytes data to server:%s\n", rv - 1, buf); // 打印实际写入的数据量和数据内容
bzero(&buf, sizeof(buf)); // 清空缓冲区
printf("start read\n"); // 打印开始读取信息
// 从服务器读取数据
if((rv = read(sockfd, buf, sizeof(buf))) < 0) {
printf("Read to server failure:%s\n", strerror(errno)); // 如果读取失败,打印错误信息
close(sockfd); // 关闭套接字
return -1; // 返回错误码
}
else if(0 == rv) {
printf("socket connet disconnected\n"); // 如果连接断开,打印断开信息
close(sockfd); // 关闭套接字
return -3; // 返回错误码
}
printf("Read %d bytes data from server:%s\n", rv - 1, buf); // 打印实际读取的数据量和数据内容
close(sockfd); // 关闭套接字
return 0; // 返回成功码
}
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#define SOCKET_PATH "/tmp/socket.domain"
void err_quit(const char* estr) {
perror(estr);
exit(-1);
}
int main(int argc, char* argv[]) {
char buf[1024]; // 用于存储从客户端读取的数据
int i;
int listen_fd = -1; // 监听套接字
int client_fd; // 客户端套接字
int rv = -1; // 读取返回值
struct sockaddr_un servaddr; // 服务器地址结构
struct sockaddr_un cliaddr; // 客户端地址结构
socklen_t addrlen = sizeof(servaddr); // 地址长度
// 创建Unix域套接字
listen_fd = socket(AF_UNIX, SOCK_STREAM, 0);
if(listen_fd < 0) {
err_quit("socket create fail"); // 创建套接字失败
}
printf("create sockfd[%d] ok!\n", listen_fd); // 创建套接字成功
// 如果套接字路径已存在,则删除
if(!access(SOCKET_PATH, F_OK)) {
remove(SOCKET_PATH);
}
// 清零服务器地址结构
bzero(&servaddr, addrlen);
servaddr.sun_family = AF_UNIX; // 设置地址族为Unix域
strncpy(servaddr.sun_path, SOCKET_PATH, sizeof(servaddr.sun_path)); // 设置套接字路径
// 绑定套接字到地址
if(bind(listen_fd, (struct sockaddr*)&servaddr, addrlen) < 0) {
printf("Create socket failure:%s\n", strerror(errno)); // 绑定失败
unlink(SOCKET_PATH);
return -1;
}
// 监听套接字
listen(listen_fd, 13);
// 无限循环等待客户端连接
while(1) {
printf("Start waiting and accept new client connect......\n"); // 等待客户端连接
client_fd = accept(listen_fd, (struct sockaddr*)&cliaddr, &addrlen); // 接受客户端连接
if(client_fd < 0) {
printf("Accept new client failure:%s\n", strerror(errno)); // 接受连接失败
return -2;
}
memset(buf, 0, sizeof(buf)); // 清空缓冲区
// 从客户端读取数据
if((rv = read(client_fd, buf, sizeof(buf))) < 0) {
printf("Read from client[%d] failure:%s\n", client_fd, strerror(errno)); // 读取失败
close(client_fd);
continue;
}
else if(rv == 0) {
printf("socket connet disconneted\n"); // 客户端断开连接
close(client_fd);
continue;
}
printf("Read massage from client[%d]:%s\n", listen_fd, buf); // 打印从客户端读取的消息
// 将读取的数据转换为大写
for(i = 0; i < rv; i++) {
buf[i] = toupper(buf[i]);
}
// 向客户端写入数据
if(write(client_fd, buf, rv) < 0) {
printf("Write to client[%d] failure:%s\n", client_fd, strerror(errno)); // 写入失败
close(client_fd);
continue;
}
printf("Write %d bytes data to client[%d]\n", rv - 1, client_fd); // 打印写入客户端的数据量
close(client_fd); // 关闭客户端套接字
sleep(1); // 暂停1秒
}
close(listen_fd); // 关闭监听套接字
}
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define SOCKET_PATH "/tmp/unix_domain_socket"
void err_quit(const char* msg) {
perror(msg);
exit(1);
}
int main() {
int sockfd, send_fd;
struct sockaddr_un servaddr;
struct iovec iov;
struct msghdr msg;
char buf[1024];
char ctrl_buf[CMSG_SPACE(sizeof(int))];
struct cmsghdr* cmsg;
// 创建Unix域数据报套接字
if((sockfd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0)
err_quit("socket creation failed");
printf("sockfd=%d\n", sockfd);
// 清空服务器地址结构
memset(&servaddr, 0, sizeof(servaddr));
servaddr.sun_family = AF_UNIX;
strncpy(servaddr.sun_path, SOCKET_PATH, sizeof(servaddr.sun_path) - 1);
// 准备发送的消息
strcpy(buf, "Hello, Server!");
iov.iov_base = buf;
iov.iov_len = strlen(buf) + 1;
msg.msg_name = &servaddr;
msg.msg_namelen = sizeof(servaddr);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = ctrl_buf;
msg.msg_controllen = sizeof(ctrl_buf);
// 添加文件描述符到辅助数据
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
send_fd = dup(STDOUT_FILENO); // 示例:发送标准输出的文件描述符
memmove(CMSG_DATA(cmsg), &send_fd, sizeof(send_fd));
// 发送消息
if(sendmsg(sockfd, &msg, 0) < 0) {
if(errno == ECONNREFUSED) {
printf("sendmsg failed: Transport endpoint is not connected. Make sure the server is running.\n");
} else {
err_quit("sendmsg failed");
}
} else {
printf("Message sent to server.\n");
}
/*
printf("111\n");
// 接收响应
struct sockaddr_un from;
socklen_t fromlen = sizeof(from);
memset(buf, 0, sizeof(buf));
iov.iov_base = buf;
iov.iov_len = sizeof(buf);
msg.msg_name = &from;
msg.msg_namelen = fromlen;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
printf("222\n");
if(recvmsg(sockfd, &msg, 0) < 0) {
err_quit("recvmsg failed");
}
printf("333\n");
printf("Received response from server: %s\n", buf);
*/
close(sockfd);
return 0;
}
\ No newline at end of file
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define SOCKET_PATH "/tmp/unix_domain_socket"
void err_quit(const char* msg) {
perror(msg);
exit(1);
}
/**
* @brief Unix域套接字服务器示例
*
* 该程序实现了一个简单的Unix域数据报套接字服务器,用于接收消息和文件描述符。
* 主要功能包括:
* 1. 创建并绑定Unix域套接字
* 2. 循环接收客户端消息和文件描述符
* 3. 处理接收到的文件描述符(关闭)
* 4. 清理套接字资源
*
* @note 当前实现中发送响应部分被注释掉了
* @warning 程序使用goto语句进行资源清理,需注意流程控制
*/
int main() {
int sockfd;
struct sockaddr_un servaddr, cliaddr;
struct iovec iov;
struct msghdr msg;
char buf[1024];
char ctrl_buf[CMSG_SPACE(sizeof(int))];
struct cmsghdr* cmsg;
int received_fd;
// 创建Unix域数据报套接字
if((sockfd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0)
err_quit("socket creation failed");
// 清空服务器地址结构
memset(&servaddr, 0, sizeof(servaddr));
unlink(SOCKET_PATH);
servaddr.sun_family = AF_UNIX;
strncpy(servaddr.sun_path, SOCKET_PATH, sizeof(servaddr.sun_path) - 1);
// 绑定套接字到地址
if(bind(sockfd, (struct sockaddr*)&servaddr, sizeof(servaddr)) < 0)
err_quit("bind failed");
printf("Server is waiting for connections... sockfd=%d\n", sockfd);
while(1) {
// 清空缓冲区和消息头
memset(buf, 0, sizeof(buf));
memset(&msg, 0, sizeof(msg));
iov.iov_base = buf;
iov.iov_len = sizeof(buf);
msg.msg_name = &cliaddr;
msg.msg_namelen = sizeof(cliaddr);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = ctrl_buf;
msg.msg_controllen = sizeof(ctrl_buf);
// 接收消息
if(recvmsg(sockfd, &msg, 0) < 0)
err_quit("recvmsg failed");
printf("Received message: %s\n", buf);
// 处理辅助数据
for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
if(cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
memmove(&received_fd, CMSG_DATA(cmsg), sizeof(received_fd));
printf("Received file descriptor: %d\n", received_fd);
close(received_fd); // 关闭接收到的文件描述符
}
goto final;
}
/*
printf("111\n");
// 发送响应
strcpy(buf, "Message received");
printf("222\n");
if(sendmsg(sockfd, &msg, 0) < 0) {
printf("333\n");
err_quit("sendmsg failed");
}
printf("444\n");
*/
}
final:
close(sockfd);
unlink(SOCKET_PATH);
return 0;
}
\ No newline at end of file
hipcc ./test_socket_itf.cpp \
./socket.cpp \
-o test_socket_itf \
hipcc ./1_socket_client.cpp \
-o 1_socket_client \
-std=c++17 --offload-arch=gfx936 -g -O3 -fopenmp -D__HIP_PLATFORM_HCC__ \
-I ./ \
-I /usr/include \
-I /opt/dtk/include \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/ \
-L /usr/lib/x86_64-linux-gnu -lpthread -lrt
\ No newline at end of file
-L /usr/lib/x86_64-linux-gnu -lpthread -lrt
hipcc ./2_socket_client.cpp \
-o 2_socket_client \
-std=c++17 --offload-arch=gfx936 -g -O3 -fopenmp -D__HIP_PLATFORM_HCC__ \
-I ./ \
-I /usr/include \
-I /opt/dtk/include \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/ \
-L /usr/lib/x86_64-linux-gnu -lpthread -lrt
hipcc ./1_socket_server.cpp \
-o 1_socket_server \
-std=c++17 --offload-arch=gfx936 -g -O3 -fopenmp -D__HIP_PLATFORM_HCC__ \
-I ./ \
-I /usr/include \
-I /opt/dtk/include \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/ \
-L /usr/lib/x86_64-linux-gnu -lpthread -lrt
hipcc ./2_socket_server.cpp \
-o 2_socket_server \
-std=c++17 --offload-arch=gfx936 -g -O3 -fopenmp -D__HIP_PLATFORM_HCC__ \
-I ./ \
-I /usr/include \
-I /opt/dtk/include \
-I /public/home/lishen/Code/rocSHMEM/SCCL_v1/src/ \
-L /usr/lib/x86_64-linux-gnu -lpthread -lrt
# g++ -std=c++11 -E - < /dev/null
# g++ -std=c++14 -E - < /dev/null
# g++ -std=c++17 -E - < /dev/null
# g++ -std=c++20 -E - < /dev/null
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <vector> // 引入vector库
#include "mpi.h"
#include "net.h"
#include "ipc_socket.h"
/*
通过Unix域套接字发送和接收文件描述符的接口通常在需要在不同进程之间共享文件或网络连接时使用。
这种情况常见于需要高效通信和资源共享的高性能计算(HPC)环境、服务器进程、或者需要在进程间传递复杂数据结构的应用中。
例如,一个进程可能打开了一个到远程服务器的网络连接,然后需要将这个连接传递给另一个进程来处理后续的通信。
使用场景
- 高性能计算(HPC)和并行计算:在使用
MPI(消息传递接口)进行并行计算时,不同的进程可能需要访问相同的文件或网络资源。通过发送文件描述符,可以避免多个进程重复打开相同的文件或建立相同的网络连接,从而节省资源和时间。
-
服务器架构:在一个主进程接受客户端连接,然后将这些连接传递给工作进程处理的服务器架构中,文件描述符传递是一个有效的策略。这允许主进程继续接受新的连接,而不被单个连接的处理所阻塞。
- 插件或模块化系统:在一个进程中加载的插件可能需要访问主进程已经打开的文件或网络连接。通过文件描述符传递,插件可以直接使用这些资源,而无需重新打开或建立连接。
*/
using namespace sccl;
typedef class sccl::hardware::net::ipc_socket::scclIpcSocket scclIpcSocket_t;
int ipcSendRecvFd_nrank2(int argc, char* argv[]) {
MPI_Init(&argc, &argv);
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int dst_hash = 12345;
scclIpcSocket_t ipcsocket(rank, dst_hash);
if(rank == 0) {
// 进程 0: 打开文件并发送文件描述符
int fd = open("testfile.txt", O_RDONLY);
if(fd < 0) {
perror("Failed to open file");
MPI_Abort(MPI_COMM_WORLD, 1);
}
ipcsocket.scclIpcSocketSendFd(fd, 1, 12345); // 假设 dst_hash 为 12345
close(fd);
} else if(rank == 1) {
// 进程 1: 接收文件描述符并读取文件内容
int fd;
ipcsocket.scclIpcSocketRecvFd(&fd);
char buffer[256];
ssize_t n = read(fd, buffer, sizeof(buffer) - 1);
if(n > 0) {
buffer[n] = '\0';
printf("Process %d received: %s\n", rank, buffer);
}
close(fd);
}
MPI_Finalize();
return 0;
}
int main(int argc, char* argv[]) {
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int dst_hash = 12345;
scclIpcSocket_t ipcsocket(rank, dst_hash);
if(rank == 0) {
// 进程 0: 打开文件并发送文件描述符给所有其他进程
int fd = open("testfile.txt", O_RDONLY);
if(fd < 0) {
perror("Failed to open file");
MPI_Abort(MPI_COMM_WORLD, 1);
}
for(int i = 1; i < size; ++i) {
if(ipcsocket.scclIpcSocketSendFd(fd, i, dst_hash) != scclSuccess) {
perror("Failed to send file descriptor");
close(fd);
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
close(fd);
} else {
// 其他进程: 接收文件描述符并读取文件内容
int fd;
if(ipcsocket.scclIpcSocketRecvFd(&fd) < 0) {
perror("Failed to receive file descriptor");
MPI_Abort(MPI_COMM_WORLD, 1);
}
char buffer[256];
ssize_t n = read(fd, buffer, sizeof(buffer) - 1);
if(n > 0) {
buffer[n] = '\0';
printf("Process %d received: %s\n", rank, buffer);
}
close(fd);
}
MPI_Finalize();
return 0;
}
/*
单机执行
SCCL_DEBUG_LEVEL=ABORT SCCL_DEBUG_SUBSYS=BOOTSTRAP mpirun --allow-run-as-root -np 4 1_socket_mpi_fd
*/
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <vector> // 引入vector库
#include <thread> // 为了使用 std::this_thread::sleep_for
#include "mpi.h"
#include "net.h"
#include "ipc_socket.h"
#include "thread_pool.h"
/*
通过Unix域套接字发送和接收文件描述符的接口通常在需要在不同进程之间共享文件或网络连接时使用。
这种情况常见于需要高效通信和资源共享的高性能计算(HPC)环境、服务器进程、或者需要在进程间传递复杂数据结构的应用中。
例如,一个进程可能打开了一个到远程服务器的网络连接,然后需要将这个连接传递给另一个进程来处理后续的通信。
使用场景
- 高性能计算(HPC)和并行计算:在使用
MPI(消息传递接口)进行并行计算时,不同的进程可能需要访问相同的文件或网络资源。通过发送文件描述符,可以避免多个进程重复打开相同的文件或建立相同的网络连接,从而节省资源和时间。
-
服务器架构:在一个主进程接受客户端连接,然后将这些连接传递给工作进程处理的服务器架构中,文件描述符传递是一个有效的策略。这允许主进程继续接受新的连接,而不被单个连接的处理所阻塞。
- 插件或模块化系统:在一个进程中加载的插件可能需要访问主进程已经打开的文件或网络连接。通过文件描述符传递,插件可以直接使用这些资源,而无需重新打开或建立连接。
*/
using namespace sccl;
typedef class sccl::hardware::net::ipc_socket::scclIpcSocket scclIpcSocket_t;
template <typename T>
void send_fd(T* ipcsocket, int fd, int dst_rank) {
if(ipcsocket->scclIpcSocketSendFd(fd, dst_rank) != scclSuccess) {
perror("Failed to send file descriptor");
close(fd);
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
int main(int argc, char* argv[]) {
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int dst_hash = 12345;
scclIpcSocket_t ipcsocket(rank, size, dst_hash);
if(rank == 0) {
// 进程 0: 打开文件并发送文件描述符给所有其他进程
int fd = open("testfile.txt", O_RDONLY);
if(fd < 0) {
perror("Failed to open file");
MPI_Abort(MPI_COMM_WORLD, 1);
}
auto pthpool = ThreadPool(8);
for(int i = 1; i < size; ++i) {
auto task = std::bind(send_fd<scclIpcSocket_t>, &ipcsocket, fd, i);
pthpool.enqueue(task);
}
std::this_thread::sleep_for(std::chrono::seconds(5));
close(fd);
} else {
// 其他进程: 接收文件描述符并读取文件内容
int fd;
if(ipcsocket.scclIpcSocketRecvFd(&fd) < 0) {
perror("Failed to receive file descriptor");
MPI_Abort(MPI_COMM_WORLD, 1);
}
char buffer[256];
ssize_t n = read(fd, buffer, sizeof(buffer) - 1);
if(n > 0) {
buffer[n] = '\0';
printf("Process %d received: %s\n", rank, buffer);
}
close(fd);
}
std::this_thread::sleep_for(std::chrono::seconds(10));
MPI_Finalize();
return 0;
}
/*
单机执行
SCCL_DEBUG_LEVEL=ABORT SCCL_DEBUG_SUBSYS=BOOTSTRAP mpirun --allow-run-as-root -np 4 2_socket_mpi_fd_pthpool
*/
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <vector> // 引入vector库
#include <thread> // 为了使用 std::this_thread::sleep_for
#include "mpi.h"
#include "net.h"
#include "ipc_socket.h"
#include "thread_pool.h"
using namespace sccl;
typedef class sccl::hardware::net::ipc_socket::scclIpcSocket scclIpcSocket_t;
template <typename T>
void send_data(T* ipcsocket, const void* data, size_t dataLen, int dst_rank) {
if(ipcsocket->scclIpcSocketSendData(data, dataLen, dst_rank) != scclSuccess) {
perror("Failed to send data");
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
template <typename T>
void recv_data(T* ipcsocket, void* buffer, size_t bufferLen, size_t* receivedLen) {
if(ipcsocket->scclIpcSocketRecvData(buffer, bufferLen, receivedLen) != scclSuccess) {
perror("Failed to receive data");
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
template <typename T>
int test_allgather_ver1(T* ipcsocket, int rank, int size) {
int sendDataLen = 256;
std::vector<char> sendData(sendDataLen);
std::vector<char> recvData(size * sendDataLen);
size_t receivedLen;
// 填充发送数据
snprintf(sendData.data(), sendData.size(), "Data from process %d", rank);
auto pthpool = ThreadPool(size * 2);
// 发送数据给所有其他进程
for(int i = 0; i < size; ++i) {
if(i != rank) {
auto task_send = std::bind(send_data<scclIpcSocket_t>, ipcsocket, sendData.data(), sendData.size(), i);
pthpool.enqueue(task_send);
auto task_recv = std::bind(recv_data<scclIpcSocket_t>, ipcsocket, recvData.data() + i * sendDataLen, sendDataLen, &receivedLen);
pthpool.enqueue(task_recv);
}
}
printf("sendData.size()=%d, receivedLen=%d\n", sendDataLen, int(receivedLen));
// 打印接收到的数据
for(int i = 0; i < size; ++i) {
printf("Process %d received from process %d: %s\n", rank, i, recvData.data() + i * 256);
}
return 0;
}
template <typename T>
int test_allgather_ver2(T* ipcsocket, int rank, int size) {
int sendDataLen = 256;
std::vector<char> sendData(sendDataLen);
std::vector<char> recvData(size * sendDataLen);
// 填充发送数据
snprintf(sendData.data(), sendData.size(), "Data from process %d", rank);
SCCLCHECK(ipcsocket->scclIpcSocketAllgatherSync(sendData.data(), recvData.data(), sendData.size(), /*wait*/ true));
// 打印接收到的数据
for(int i = 0; i < size; ++i) {
printf("rank %d received from process %d: %s\n", rank, i, recvData.data() + i * sendData.size());
}
return 0;
}
template <typename T>
int test_allgather_ver3(T* ipcsocket, int rank, int size) {
int sendDataLen = 256;
std::vector<char> sendData(sendDataLen);
std::vector<char> recvData(size * sendDataLen);
// 填充发送数据
snprintf(sendData.data(), sendData.size(), "Data from process %d", rank);
SCCLCHECK(ipcsocket->scclIpcSocketAllgather(sendData.data(), recvData.data(), sendData.size()));
// 打印接收到的数据
for(int i = 0; i < size; ++i) {
printf("rank %d received from process %d: %s\n", rank, i, recvData.data() + i * sendData.size());
}
return 0;
}
template <typename T>
int test_broadcast_ver1(T* ipcsocket, int rank, int size) {
int sendDataLen = 256;
std::vector<char> sendData(sendDataLen);
std::vector<char> recvData(sendDataLen);
int root = 0; // 假设 rank 0 是根进程
if(rank == root) {
// 仅根进程填充发送数据
snprintf(sendData.data(), sendData.size(), "Data from root process %d", rank);
}
SCCLCHECK(ipcsocket->scclIpcSocketBroadcast(sendData.data(), recvData.data(), sendData.size(), root, /*wait*/ true));
// 打印接收到的数据
printf("rank %d received: %s\n", rank, recvData.data());
return 0;
}
int main(int argc, char* argv[]) {
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int dst_hash = 12345;
scclIpcSocket_t* ipcsocket = new scclIpcSocket_t(rank, size, dst_hash);
// test_allgather_ver1(ipcsocket, rank, size);
// test_allgather_ver2(ipcsocket, rank, size);
// test_allgather_ver3(ipcsocket, rank, size);
test_broadcast_ver1(ipcsocket, rank, size);
std::this_thread::sleep_for(std::chrono::seconds(10));
// while(!ipcsocket->getPthreadPool()->allTasksCompleted()) {}
// printf("delete ipcsocket... rank=%d\n", rank);
delete(ipcsocket);
MPI_Finalize();
return 0;
}
/*
单机执行
SCCL_DEBUG_LEVEL=ABORT SCCL_DEBUG_SUBSYS=BOOTSTRAP mpirun --allow-run-as-root -np 8 3_socket_mpi_data
*/
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment