Commit ae7f9123 authored by maxiao1's avatar maxiao1
Browse files

设置tbo相关变量 & 修改tbo拷贝

parent 5533c538
...@@ -541,7 +541,7 @@ class _DeepEPDispatcherImplLowLatency(_DeepEPDispatcherImplBase): ...@@ -541,7 +541,7 @@ class _DeepEPDispatcherImplLowLatency(_DeepEPDispatcherImplBase):
num_max_dispatch_tokens_per_rank: the actual batch size in the decoding engine should be less than 256 num_max_dispatch_tokens_per_rank: the actual batch size in the decoding engine should be less than 256
https://github.com/deepseek-ai/DeepEP?tab=readme-ov-file#example-use-in-inference-decoding https://github.com/deepseek-ai/DeepEP?tab=readme-ov-file#example-use-in-inference-decoding
""" """
self.return_recv_hook = False self.return_recv_hook = return_recv_hook
self.device_module = torch.get_device_module() self.device_module = torch.get_device_module()
self.quant_config = {} self.quant_config = {}
......
...@@ -758,7 +758,7 @@ class TboForwardBatchPreparer: ...@@ -758,7 +758,7 @@ class TboForwardBatchPreparer:
# TODO we may make padding on both sub-batches to make it slightly more balanced # TODO we may make padding on both sub-batches to make it slightly more balanced
value_a = min(tbo_split_token_index, num_token_non_padded) value_a = min(tbo_split_token_index, num_token_non_padded)
value_b = max(0, num_token_non_padded - tbo_split_token_index) value_b = max(0, num_token_non_padded - tbo_split_token_index)
return torch.tensor([value_a, value_b], dtype=torch.int32).to( return torch.tensor([value_a, value_b], dtype=torch.int32).pin_memory().to(
device=get_global_server_args().device, non_blocking=True device=get_global_server_args().device, non_blocking=True
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment