Commit 474b26ab authored by yiqa's avatar yiqa
Browse files

使用groupgemm完成高吞吐模式适配

parent 1693e754
......@@ -380,7 +380,7 @@ class _DeepEPDispatcherImplNormal(_DeepEPDispatcherImplBase):
num_recv_tokens_per_expert,
event,
) = self._dispatch_core(hidden_states, topk_ids, topk_weights, previous_event)
event.current_stream_wait() if self.async_finish else ()
# event.current_stream_wait() if self.async_finish else ()
if isinstance(hidden_states, tuple):
hidden_states, hidden_states_scale = hidden_states
......@@ -500,7 +500,7 @@ class _DeepEPDispatcherImplNormal(_DeepEPDispatcherImplBase):
def combine_b(self, output, previous_event):
hidden_states, event = self._combine_core(output, previous_event)
event.current_stream_wait() if self.async_finish else ()
# event.current_stream_wait() if self.async_finish else ()
self.handle = None
self.src2dst = None
return hidden_states
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment