Commit f5f9f42f authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.7.2-dev-wm' into 'v0.7.2-dev'

[fix]修复开启并行解码后,在极端测试情况下服务无响应问题

See merge request dcutoolkit/deeplearing/vllm!97
parents f5f65d24 7a23da92
...@@ -690,14 +690,16 @@ class SpecDecodeWorker(LoraNotSupportedWorkerBase): ...@@ -690,14 +690,16 @@ class SpecDecodeWorker(LoraNotSupportedWorkerBase):
hidden_states = hidden_states[ hidden_states = hidden_states[
torch.where(sampler_output.sampled_token_ids - torch.where(sampler_output.sampled_token_ids -
VLLM_INVALID_TOKEN_ID)[0]] VLLM_INVALID_TOKEN_ID)[0]]
if self.previous_hidden_states is None and len(
seq_group_meta_with_hidden): if not skip_proposer:
self.previous_hidden_states = HiddenStates( if self.previous_hidden_states is None and len(
hidden_states, seq_group_meta_with_hidden) seq_group_meta_with_hidden):
elif self.previous_hidden_states and len( self.previous_hidden_states = HiddenStates(
seq_group_meta_with_hidden): hidden_states, seq_group_meta_with_hidden)
self.previous_hidden_states.update(hidden_states, elif self.previous_hidden_states and len(
seq_group_meta_with_hidden) seq_group_meta_with_hidden):
self.previous_hidden_states.update(hidden_states,
seq_group_meta_with_hidden)
# Store logits from target model execution. # Store logits from target model execution.
if self.tree_decoding: if self.tree_decoding:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment