Unverified Commit 42253d08 authored by Void's avatar Void Committed by GitHub
Browse files

Fix the address of dispatch_rdma_recv_count_buffer to avoid cleaning after...


Fix the address of dispatch_rdma_recv_count_buffer to avoid cleaning after each change in hidden_size/token_num. (#313)
Signed-off-by: default avatarYilin Zhang <18275976+yilin-void@users.noreply.github.com>
parent bdd119f8
...@@ -159,7 +159,8 @@ struct LowLatencyLayout { ...@@ -159,7 +159,8 @@ struct LowLatencyLayout {
size_t dispatch_recv_count_buffer_bytes = num_experts * sizeof(int); size_t dispatch_recv_count_buffer_bytes = num_experts * sizeof(int);
size_t combine_recv_flag_buffer_bytes = dispatch_recv_count_buffer_bytes; size_t combine_recv_flag_buffer_bytes = dispatch_recv_count_buffer_bytes;
size_t signaling_buffer_bytes = std::max(dispatch_recv_count_buffer_bytes, combine_recv_flag_buffer_bytes); size_t signaling_buffer_bytes = std::max(dispatch_recv_count_buffer_bytes, combine_recv_flag_buffer_bytes);
total_bytes += signaling_buffer_bytes * 2; size_t signaling_buffer_bytes_aligned = align<size_t>(signaling_buffer_bytes, 128);
total_bytes += signaling_buffer_bytes_aligned * 2;
// Assign pointers // Assign pointers
// NOTES: we still leave some space for distinguishing dispatch/combine buffer, // NOTES: we still leave some space for distinguishing dispatch/combine buffer,
...@@ -167,13 +168,13 @@ struct LowLatencyLayout { ...@@ -167,13 +168,13 @@ struct LowLatencyLayout {
for (int i = 0; i < 2; ++ i) { for (int i = 0; i < 2; ++ i) {
buffers[i] = { buffers[i] = {
static_cast<int>(signaling_buffer_bytes / sizeof(int)), static_cast<int>(signaling_buffer_bytes / sizeof(int)),
advance(rdma_buffer, send_buffer_bytes * i), advance(rdma_buffer, signaling_buffer_bytes_aligned * 2 + send_buffer_bytes * i),
advance(rdma_buffer, send_buffer_bytes * 2 + recv_buffer_bytes * i), advance(rdma_buffer, signaling_buffer_bytes_aligned * 2 + send_buffer_bytes * 2 + recv_buffer_bytes * i),
advance<int*>(rdma_buffer, send_buffer_bytes * 2 + recv_buffer_bytes * 2 + signaling_buffer_bytes * i), advance<int*>(rdma_buffer, signaling_buffer_bytes_aligned * i),
advance(rdma_buffer, send_buffer_bytes * i), advance(rdma_buffer, signaling_buffer_bytes_aligned * 2 + send_buffer_bytes * i),
advance(rdma_buffer, send_buffer_bytes * 2 + recv_buffer_bytes * i), advance(rdma_buffer, signaling_buffer_bytes_aligned * 2 + send_buffer_bytes * 2 + recv_buffer_bytes * i),
advance<int*>(rdma_buffer, send_buffer_bytes * 2 + recv_buffer_bytes * 2 + signaling_buffer_bytes * i), advance<int*>(rdma_buffer, signaling_buffer_bytes_aligned * i),
advance(rdma_buffer, send_buffer_bytes * i), advance(rdma_buffer, signaling_buffer_bytes_aligned * 2 + send_buffer_bytes * i),
num_bytes_per_combine_msg num_bytes_per_combine_msg
}; };
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment