Commit 20241efa authored by yiqa's avatar yiqa
Browse files

使用groupgemm完成高吞吐模式适配

parent 8f355853
......@@ -541,7 +541,7 @@ class _DeepEPDispatcherImplLowLatency(_DeepEPDispatcherImplBase):
num_max_dispatch_tokens_per_rank: the actual batch size in the decoding engine should be less than 256
https://github.com/deepseek-ai/DeepEP?tab=readme-ov-file#example-use-in-inference-decoding
"""
self.return_recv_hook = return_recv_hook
self.return_recv_hook = False
self.device_module = torch.get_device_module()
self.quant_config = {}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment