"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "ada4f4fadd20372b1bf349961a1e442b2d07c53d"
Commit 7e0cdd29 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.9.2-dev-fix-zero' into 'v0.9.2-dev'

fix: zero overhead KERNEL VMFault

See merge request dcutoolkit/deeplearing/vllm!446
parents fa3bae2e 808a9ed9
...@@ -154,10 +154,11 @@ class V1ZeroEagleProposer(EagleProposer): ...@@ -154,10 +154,11 @@ class V1ZeroEagleProposer(EagleProposer):
self.attn_metadata_cudagraph.decode.seq_lens[:attn_metadata.num_decode_tokens] = ( self.attn_metadata_cudagraph.decode.seq_lens[:attn_metadata.num_decode_tokens] = (
attn_metadata.decode.seq_lens) attn_metadata.decode.seq_lens)
use_ep = self.vllm_config.parallel_config.enable_expert_parallel
with set_forward_context(per_layer_attn_metadata, with set_forward_context(per_layer_attn_metadata,
self.vllm_config, self.vllm_config,
num_tokens=num_input_tokens,): num_tokens=num_input_tokens,
#skip_cuda_graphs=not decoding): skip_cuda_graphs= not (decoding or use_ep)):
ret_hidden_states = self.model( ret_hidden_states = self.model(
self.input_ids[:num_input_tokens], self.input_ids[:num_input_tokens],
self.positions[:num_input_tokens], self.positions[:num_input_tokens],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment