Commit cc4d1002 authored by 王敏's avatar 王敏
Browse files

零消耗添加epsp

parent 855cb148
...@@ -465,7 +465,7 @@ class V1ZeroModelRunner(GPUModelRunner): ...@@ -465,7 +465,7 @@ class V1ZeroModelRunner(GPUModelRunner):
num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
# make sure that the padded length is divisible by attn_tp_size because we may need reduce-scatter across attn_tp dim. # make sure that the padded length is divisible by attn_tp_size because we may need reduce-scatter across attn_tp dim.
if self.ep_sp: if self.ep_sp or self.enable_dp_attention:
num_input_tokens = round_up(num_scheduled_tokens, tp_size) num_input_tokens = round_up(num_scheduled_tokens, tp_size)
if (self.use_cuda_graph if (self.use_cuda_graph
and num_input_tokens <= self.cudagraph_batch_sizes[-1]): and num_input_tokens <= self.cudagraph_batch_sizes[-1]):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment