Commit 272a4a7b authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.9.2-dev-wm-1202' into 'v0.9.2-dev'

[fix]解决dp模式mtp卡住问题

See merge request dcutoolkit/deeplearing/vllm!280
parents 73cbc9fe 55ee9d72
...@@ -135,7 +135,9 @@ def set_forward_context( ...@@ -135,7 +135,9 @@ def set_forward_context(
if need_to_track_batchsize: if need_to_track_batchsize:
forward_start_time = time.perf_counter() forward_start_time = time.perf_counter()
dp_metadata: Optional[DPMetadata] = None dp_metadata: Optional[DPMetadata] = None
if vllm_config.parallel_config.data_parallel_size > 1 and ( dp_size = vllm_config.parallel_config.data_parallel_size
use_navie_ep = envs.VLLM_ALL2ALL_BACKEND == 'naive' and dp_size > 1 and vllm_config.parallel_config.enable_expert_parallel
if use_navie_ep and dp_size > 1 and (
attn_metadata is not None or num_tokens is not None): attn_metadata is not None or num_tokens is not None):
dp_metadata = DPMetadata.make(vllm_config.parallel_config, dp_metadata = DPMetadata.make(vllm_config.parallel_config,
attn_metadata, num_tokens or 0, attn_metadata, num_tokens or 0,
......
...@@ -1537,7 +1537,9 @@ class FusedMoE(torch.nn.Module): ...@@ -1537,7 +1537,9 @@ class FusedMoE(torch.nn.Module):
do_naive_dispatch_combine: bool = ( do_naive_dispatch_combine: bool = (
self.dp_size > 1 self.dp_size > 1
and not self.moe_parallel_config.use_deepep_ht_kernels) and self.ep_size > 1
and envs.VLLM_ALL2ALL_BACKEND == 'naive')
#and not self.moe_parallel_config.use_deepep_ht_kernels)
if do_naive_dispatch_combine: if do_naive_dispatch_combine:
hidden_states, router_logits = get_ep_group().dispatch( hidden_states, router_logits = get_ep_group().dispatch(
hidden_states, router_logits) hidden_states, router_logits)
......
...@@ -88,6 +88,9 @@ class EagleProposer: ...@@ -88,6 +88,9 @@ class EagleProposer:
1, 1,
device=device, device=device,
dtype=torch.int32) dtype=torch.int32)
self.dp_size = vllm_config.parallel_config.data_parallel_size
self.enable_expert_parallel = vllm_config.parallel_config.enable_expert_parallel
def propose( def propose(
self, self,
...@@ -529,6 +532,17 @@ class EagleProposer: ...@@ -529,6 +532,17 @@ class EagleProposer:
self.hidden_states[:num_tokens], self.hidden_states[:num_tokens],
) )
if self.dp_size > 1 and self.enable_expert_parallel and self.num_speculative_tokens > 1:
for _ in range(self.num_speculative_tokens - 1):
with set_forward_context(attn_metadata,
self.vllm_config,
num_tokens=num_tokens):
self.model(
self.input_ids[:num_tokens],
self.positions[:num_tokens],
self.hidden_states[:num_tokens],
)
def validate_same_kv_cache_group(self, def validate_same_kv_cache_group(self,
kv_cache_config: KVCacheConfig) -> None: kv_cache_config: KVCacheConfig) -> None:
""" """
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment