Commit 2ce72b9c authored by 王敏's avatar 王敏
Browse files

[feat]deepseek mtp支持pp模式

parent 55c719cb
......@@ -37,6 +37,7 @@ from .deepseek_v2 import (
get_spec_layer_idx_from_weight_name,
)
from .utils import maybe_prefix
from .interfaces import SupportsPP
from vllm import _custom_ops as ops
from vllm.model_executor.layers.quantization.blockwise_int8 import BlockInt8Config
import vllm.envs as envs
......@@ -194,7 +195,7 @@ class DeepSeekMultiTokenPredictor(nn.Module):
@support_torch_compile
class DeepSeekMTP(nn.Module, DeepseekV2MixtureOfExperts):
class DeepSeekMTP(nn.Module, DeepseekV2MixtureOfExperts, SupportsPP):
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
self.config = vllm_config.model_config.hf_config
......
......@@ -1902,7 +1902,7 @@ class GPUModelRunner(
cm.block_table_tensor = _get_block_table(kv_cache_gid)
cm.slot_mapping = slot_mappings[kv_cache_gid]
if self.speculative_config and spec_decode_common_attn_metadata is None:
if self.speculative_config and spec_decode_common_attn_metadata is None and hasattr(self, "drafter"):
if isinstance(self.drafter, EagleProposer):
if self.drafter.attn_layer_names[0] in kv_cache_group.layer_names:
spec_decode_common_attn_metadata = cm
......@@ -4840,7 +4840,8 @@ class GPUModelRunner(
self.speculative_config.use_eagle()
or self.speculative_config.uses_draft_model()
):
assert isinstance(self.drafter, EagleProposer | DraftModelProposer)
#assert isinstance(self.drafter, EagleProposer | DraftModelProposer)
if hasattr(self, "drafter") and isinstance(self.drafter, EagleProposer | DraftModelProposer):
assert self.speculative_config is not None
# Eagle currently only supports PIECEWISE cudagraphs.
# Therefore only use cudagraphs if the main model uses PIECEWISE
......@@ -5544,7 +5545,7 @@ class GPUModelRunner(
)
# Initialize eagle's cudagraph dispatcher if using eagle spec decode.
if self.speculative_config and self.speculative_config.use_eagle():
if self.speculative_config and self.speculative_config.use_eagle() and hasattr(self, "drafter"):
assert isinstance(self.drafter, EagleProposer)
self.drafter.initialize_cudagraph_keys(cudagraph_mode)
......@@ -6091,7 +6092,8 @@ class GPUModelRunner(
self.speculative_config.use_eagle()
or self.speculative_config.uses_draft_model()
):
assert isinstance(self.drafter, EagleProposer | DraftModelProposer)
#assert isinstance(self.drafter, EagleProposer | DraftModelProposer)
if hasattr(self, "drafter") and isinstance(self.drafter, EagleProposer | DraftModelProposer):
# validate all draft model layers belong to the same kv cache
# group
self.drafter.validate_same_kv_cache_group(kv_cache_config)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment