"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "c5462fb37d8cef2ee6f3d579ff3f47bfc247c7f5"
Commit 192db8b4 authored by Antoine Kaufmann's avatar Antoine Kaufmann
Browse files

dist/net_rdma: batch transfer of entries if possible

parent e00c5148
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
static const uint64_t kPollReportThreshold = 128; static const uint64_t kPollReportThreshold = 128;
static const uint64_t kCleanReportThreshold = 128; static const uint64_t kCleanReportThreshold = 128;
static const uint64_t kPollMax = 8;
const char *shm_path = NULL; const char *shm_path = NULL;
size_t shm_size = 256 * 1024 * 1024ULL; // 256MB size_t shm_size = 256 * 1024 * 1024ULL; // 256MB
...@@ -343,21 +344,26 @@ static int PeerEvent(struct Peer *peer, uint32_t events) { ...@@ -343,21 +344,26 @@ static int PeerEvent(struct Peer *peer, uint32_t events) {
static inline void PollPeerTransfer(struct Peer *peer, bool *report) { static inline void PollPeerTransfer(struct Peer *peer, bool *report) {
// XXX: consider batching this to forward multiple entries at once if possible // XXX: consider batching this to forward multiple entries at once if possible
void *entry = (peer->local_base + peer->local_pos * peer->local_elen); size_t n;
bool ready; for (n = 0; n < kPollMax && peer->local_pos + n < peer->local_enum; n++) {
if (peer->is_dev) { void *entry = (peer->local_base + (peer->local_pos + n) * peer->local_elen);
struct SimbricksProtoNetD2NDummy *d2n = entry; bool ready;
ready = (d2n->own_type & SIMBRICKS_PROTO_NET_D2N_OWN_MASK) == if (peer->is_dev) {
SIMBRICKS_PROTO_NET_D2N_OWN_NET; struct SimbricksProtoNetD2NDummy *d2n = entry;
} else { ready = (d2n->own_type & SIMBRICKS_PROTO_NET_D2N_OWN_MASK) ==
struct SimbricksProtoNetN2DDummy *n2d = entry; SIMBRICKS_PROTO_NET_D2N_OWN_NET;
ready = (n2d->own_type & SIMBRICKS_PROTO_NET_N2D_OWN_MASK) == } else {
SIMBRICKS_PROTO_NET_N2D_OWN_DEV; struct SimbricksProtoNetN2DDummy *n2d = entry;
ready = (n2d->own_type & SIMBRICKS_PROTO_NET_N2D_OWN_MASK) ==
SIMBRICKS_PROTO_NET_N2D_OWN_DEV;
}
if (!ready)
break;
} }
if (ready) { if (n > 0) {
RdmaPassEntry(peer); RdmaPassEntry(peer, n);
peer->local_pos += 1; peer->local_pos += n;
if (peer->local_pos >= peer->local_enum) if (peer->local_pos >= peer->local_enum)
peer->local_pos -= peer->local_enum; peer->local_pos -= peer->local_enum;
......
...@@ -106,7 +106,7 @@ int PeerReport(struct Peer *peer, uint32_t written_pos, uint32_t clean_pos); ...@@ -106,7 +106,7 @@ int PeerReport(struct Peer *peer, uint32_t written_pos, uint32_t clean_pos);
int RdmaListen(struct sockaddr_in *addr); int RdmaListen(struct sockaddr_in *addr);
int RdmaConnect(struct sockaddr_in *addr); int RdmaConnect(struct sockaddr_in *addr);
int RdmaPassIntro(struct Peer *peer); int RdmaPassIntro(struct Peer *peer);
int RdmaPassEntry(struct Peer *peer); int RdmaPassEntry(struct Peer *peer, uint32_t n);
int RdmaPassReport(); int RdmaPassReport();
int RdmaEvent(); int RdmaEvent();
......
...@@ -550,7 +550,7 @@ int RdmaPassIntro(struct Peer *peer) { ...@@ -550,7 +550,7 @@ int RdmaPassIntro(struct Peer *peer) {
return 0; return 0;
} }
int RdmaPassEntry(struct Peer *peer) { int RdmaPassEntry(struct Peer *peer, uint32_t n) {
#ifdef RDMA_DEBUG #ifdef RDMA_DEBUG
fprintf(stderr, "RdmaPassEntry(%s,%u)\n", peer->sock_path, peer->local_pos); fprintf(stderr, "RdmaPassEntry(%s,%u)\n", peer->sock_path, peer->local_pos);
fprintf(stderr, " remote_base=%lx local_base=%p\n", peer->remote_base, fprintf(stderr, " remote_base=%lx local_base=%p\n", peer->remote_base,
...@@ -560,7 +560,7 @@ int RdmaPassEntry(struct Peer *peer) { ...@@ -560,7 +560,7 @@ int RdmaPassEntry(struct Peer *peer) {
uint64_t pos = peer->local_pos * peer->local_elen; uint64_t pos = peer->local_pos * peer->local_elen;
struct ibv_sge sge; struct ibv_sge sge;
sge.addr = (uintptr_t) (peer->local_base + pos); sge.addr = (uintptr_t) (peer->local_base + pos);
sge.length = peer->local_elen; sge.length = peer->local_elen * n;
sge.lkey = peer->shm_mr->lkey; sge.lkey = peer->shm_mr->lkey;
struct ibv_send_wr send_wr = { }; struct ibv_send_wr send_wr = { };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment