Commit 1a73f6a3 authored by maxiao1's avatar maxiao1
Browse files

Merge branch 'v0.5.4_dev_maxiao' into 'v0.5.4_dev'

设置tbo相关变量 & 修改tbo拷贝

See merge request OpenDAS/sglang!37
parents 31653dd9 ae7f9123
......@@ -542,7 +542,7 @@ class _DeepEPDispatcherImplLowLatency(_DeepEPDispatcherImplBase):
num_max_dispatch_tokens_per_rank: the actual batch size in the decoding engine should be less than 256
https://github.com/deepseek-ai/DeepEP?tab=readme-ov-file#example-use-in-inference-decoding
"""
self.return_recv_hook = False
self.return_recv_hook = return_recv_hook
self.device_module = torch.get_device_module()
self.quant_config = {}
......
......@@ -758,7 +758,7 @@ class TboForwardBatchPreparer:
# TODO we may make padding on both sub-batches to make it slightly more balanced
value_a = min(tbo_split_token_index, num_token_non_padded)
value_b = max(0, num_token_non_padded - tbo_split_token_index)
return torch.tensor([value_a, value_b], dtype=torch.int32).to(
return torch.tensor([value_a, value_b], dtype=torch.int32).pin_memory().to(
device=get_global_server_args().device, non_blocking=True
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment