"vscode:/vscode.git/clone" did not exist on "18016a5e627d2a4b69af599272a5aa8ce71b98c8"
Commit 2ce72b9c authored by 王敏's avatar 王敏
Browse files

[feat]deepseek mtp支持pp模式

parent 55c719cb
...@@ -37,6 +37,7 @@ from .deepseek_v2 import ( ...@@ -37,6 +37,7 @@ from .deepseek_v2 import (
get_spec_layer_idx_from_weight_name, get_spec_layer_idx_from_weight_name,
) )
from .utils import maybe_prefix from .utils import maybe_prefix
from .interfaces import SupportsPP
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm.model_executor.layers.quantization.blockwise_int8 import BlockInt8Config from vllm.model_executor.layers.quantization.blockwise_int8 import BlockInt8Config
import vllm.envs as envs import vllm.envs as envs
...@@ -194,7 +195,7 @@ class DeepSeekMultiTokenPredictor(nn.Module): ...@@ -194,7 +195,7 @@ class DeepSeekMultiTokenPredictor(nn.Module):
@support_torch_compile @support_torch_compile
class DeepSeekMTP(nn.Module, DeepseekV2MixtureOfExperts): class DeepSeekMTP(nn.Module, DeepseekV2MixtureOfExperts, SupportsPP):
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
self.config = vllm_config.model_config.hf_config self.config = vllm_config.model_config.hf_config
......
...@@ -1902,7 +1902,7 @@ class GPUModelRunner( ...@@ -1902,7 +1902,7 @@ class GPUModelRunner(
cm.block_table_tensor = _get_block_table(kv_cache_gid) cm.block_table_tensor = _get_block_table(kv_cache_gid)
cm.slot_mapping = slot_mappings[kv_cache_gid] cm.slot_mapping = slot_mappings[kv_cache_gid]
if self.speculative_config and spec_decode_common_attn_metadata is None: if self.speculative_config and spec_decode_common_attn_metadata is None and hasattr(self, "drafter"):
if isinstance(self.drafter, EagleProposer): if isinstance(self.drafter, EagleProposer):
if self.drafter.attn_layer_names[0] in kv_cache_group.layer_names: if self.drafter.attn_layer_names[0] in kv_cache_group.layer_names:
spec_decode_common_attn_metadata = cm spec_decode_common_attn_metadata = cm
...@@ -4840,7 +4840,8 @@ class GPUModelRunner( ...@@ -4840,7 +4840,8 @@ class GPUModelRunner(
self.speculative_config.use_eagle() self.speculative_config.use_eagle()
or self.speculative_config.uses_draft_model() or self.speculative_config.uses_draft_model()
): ):
assert isinstance(self.drafter, EagleProposer | DraftModelProposer) #assert isinstance(self.drafter, EagleProposer | DraftModelProposer)
if hasattr(self, "drafter") and isinstance(self.drafter, EagleProposer | DraftModelProposer):
assert self.speculative_config is not None assert self.speculative_config is not None
# Eagle currently only supports PIECEWISE cudagraphs. # Eagle currently only supports PIECEWISE cudagraphs.
# Therefore only use cudagraphs if the main model uses PIECEWISE # Therefore only use cudagraphs if the main model uses PIECEWISE
...@@ -5544,7 +5545,7 @@ class GPUModelRunner( ...@@ -5544,7 +5545,7 @@ class GPUModelRunner(
) )
# Initialize eagle's cudagraph dispatcher if using eagle spec decode. # Initialize eagle's cudagraph dispatcher if using eagle spec decode.
if self.speculative_config and self.speculative_config.use_eagle(): if self.speculative_config and self.speculative_config.use_eagle() and hasattr(self, "drafter"):
assert isinstance(self.drafter, EagleProposer) assert isinstance(self.drafter, EagleProposer)
self.drafter.initialize_cudagraph_keys(cudagraph_mode) self.drafter.initialize_cudagraph_keys(cudagraph_mode)
...@@ -6091,7 +6092,8 @@ class GPUModelRunner( ...@@ -6091,7 +6092,8 @@ class GPUModelRunner(
self.speculative_config.use_eagle() self.speculative_config.use_eagle()
or self.speculative_config.uses_draft_model() or self.speculative_config.uses_draft_model()
): ):
assert isinstance(self.drafter, EagleProposer | DraftModelProposer) #assert isinstance(self.drafter, EagleProposer | DraftModelProposer)
if hasattr(self, "drafter") and isinstance(self.drafter, EagleProposer | DraftModelProposer):
# validate all draft model layers belong to the same kv cache # validate all draft model layers belong to the same kv cache
# group # group
self.drafter.validate_same_kv_cache_group(kv_cache_config) self.drafter.validate_same_kv_cache_group(kv_cache_config)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment