Unverified Commit d72817eb authored by Zhiyi Hu's avatar Zhiyi Hu Committed by GitHub
Browse files

fix hang due to small rdma_chunk_size (#317)


Co-authored-by: default avatarzhiyi Hu <zhiyihu@U-NYQQMGK0-2250.local>
parent 5b549c85
......@@ -1851,6 +1851,7 @@ void combine(cudaDataType_t type,
EP_HOST_ASSERT(num_forwarder_warps > NUM_MAX_NVL_PEERS and num_forwarder_warps % num_rdma_ranks == 0);
EP_HOST_ASSERT(num_max_nvl_chunked_recv_tokens % num_rdma_ranks == 0);
EP_HOST_ASSERT(num_max_nvl_chunked_recv_tokens / num_rdma_ranks > std::max(num_max_rdma_chunked_send_tokens, num_max_nvl_chunked_send_tokens));
EP_HOST_ASSERT(num_max_rdma_chunked_send_tokens >= num_warps_per_forwarder);
EP_HOST_ASSERT(type == CUDA_R_16BF);
SETUP_LAUNCH_CONFIG(num_channels * 2, (num_forwarder_warps + 1) * 32, stream);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment