"vscode:/vscode.git/clone" did not exist on "f1e0c7ce4a4c4a6f48e18db935bfefb01fcd3f53"
Unverified Commit 91a066ec authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Tiny remove comments about DeepEP on H20 (#7234)

parent c4943867
......@@ -542,38 +542,6 @@ class _DeepEPDispatcherImplLowLatency(_DeepEPDispatcherImplBase):
topk_idx: torch.Tensor,
use_fp8: bool = False,
):
"""
# For H20, there will be an CUDA error: DeepEP/csrc/kernels/internode_ll.cu:337 'too many blocks in cooperative launch'.
# Please make sure to change DeepEP code in internode_ll.cu dispatch / combine as below first and then reinstall.
# More details refer: https://github.com/deepseek-ai/DeepEP/issues/15#issuecomment-2709715782
diff --git a/csrc/kernels/internode_ll.cu b/csrc/kernels/internode_ll.cu
index 76ae2e2..8ecd08f 100644
--- a/csrc/kernels/internode_ll.cu
+++ b/csrc/kernels/internode_ll.cu
@@ -310,8 +310,8 @@ void dispatch(void* packed_recv_x, float* packed_recv_x_scales,
int num_topk, int num_experts, int rank, int num_ranks, bool use_fp8,
void* workspace, cudaStream_t stream, int phases) {
constexpr int kNumMaxTopK = 9;
- constexpr int kNumWarpsPerGroup = 10;
- constexpr int kNumWarpGroups = 3;
+ constexpr int kNumWarpsPerGroup = 8;
+ constexpr int kNumWarpGroups = 4;
EP_STATIC_ASSERT(kNumMaxTopK + 1 <= kNumWarpGroups * kNumWarpsPerGroup, "Too many top-k selections");
const auto num_warps = kNumWarpGroups * kNumWarpsPerGroup;
@@ -501,8 +501,8 @@ void combine(void* combined_x,
int num_combined_tokens, int hidden, int num_max_dispatch_tokens_per_rank,
int num_topk, int num_experts, int rank, int num_ranks,
void* workspace, cudaStream_t stream, int phases) {
- constexpr int kNumWarpsPerGroup = 10;
- constexpr int kNumWarpGroups = 3;
+ constexpr int kNumWarpsPerGroup = 8;
+ constexpr int kNumWarpGroups = 4;
constexpr int kNumMaxTopk = 9;
const auto num_warps = kNumWarpGroups * kNumWarpsPerGroup;
"""
buffer = self._get_buffer()
packed_recv_hidden, packed_recv_count, self.handle, event, hook = (
buffer.low_latency_dispatch(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment