Commit 066cef58 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.9.2-dev-ds-wm-1224' into 'v0.9.2-dev-ds'

[feat]解决EP开启pd_padding大输入卡住问题

See merge request dcutoolkit/deeplearing/vllm!319
parents b91ae72f 7439b205
...@@ -223,8 +223,8 @@ class EagleProposer: ...@@ -223,8 +223,8 @@ class EagleProposer:
with set_forward_context(per_layer_attn_metadata, with set_forward_context(per_layer_attn_metadata,
self.vllm_config, self.vllm_config,
num_tokens=num_input_tokens, num_tokens=num_input_tokens,):
skip_cuda_graphs=not decoding): #skip_cuda_graphs=not decoding):
ret_hidden_states = self.model( ret_hidden_states = self.model(
self.input_ids[:num_input_tokens], self.input_ids[:num_input_tokens],
self.positions[:num_input_tokens], self.positions[:num_input_tokens],
......
...@@ -146,8 +146,8 @@ class V1ZeroEagleProposer(EagleProposer): ...@@ -146,8 +146,8 @@ class V1ZeroEagleProposer(EagleProposer):
with set_forward_context(per_layer_attn_metadata, with set_forward_context(per_layer_attn_metadata,
self.vllm_config, self.vllm_config,
num_tokens=num_input_tokens, num_tokens=num_input_tokens,):
skip_cuda_graphs=not decoding): #skip_cuda_graphs=not decoding):
ret_hidden_states = self.model( ret_hidden_states = self.model(
self.input_ids[:num_input_tokens], self.input_ids[:num_input_tokens],
self.positions[:num_input_tokens], self.positions[:num_input_tokens],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment