fix deepseek pp + mtp issue

bfd0c5b8 · zhuwenwen · af7b564d · bfd0c5b8
Commit bfd0c5b8 authored Sep 03, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 4 deletions

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +6 -4

No files found.
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2450,8 +2450,9 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                hidden_states = outputs

            if self.speculative_config and self.speculative_config.use_eagle() and not is_profile:
-                assert isinstance(self.drafter, EagleProposer)
-                self.drafter.dummy_run(num_tokens, attn_metadata)
+                # assert isinstance(self.drafter, EagleProposer)
+                if hasattr(self, 'drafter') and isinstance(self.drafter, EagleProposer):
+                    self.drafter.dummy_run(num_tokens, attn_metadata)

        # This is necessary to avoid blocking DP.
        # For dummy runs, we typically skip EPLB since we don't have any real
@@ -3291,10 +3292,11 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
        kv_caches = self.initialize_kv_cache_tensors(kv_cache_config)

        if self.speculative_config and self.speculative_config.use_eagle():
-            assert isinstance(self.drafter, EagleProposer)
+            # assert isinstance(self.drafter, EagleProposer)
            # validate all draft model layers belong to the same kv cache
            # group
-            self.drafter.validate_same_kv_cache_group(kv_cache_config)
+            if hasattr(self, 'drafter') and isinstance(self.drafter, EagleProposer):
+                self.drafter.validate_same_kv_cache_group(kv_cache_config)

        if has_kv_transfer_group():
            get_kv_transfer_group().register_kv_caches(kv_caches)