fix deepseek pp + mtp issue

bfffd59a · lizhigong · 4a62a3eb · bfffd59a
Commit bfffd59a authored Sep 02, 2025 by lizhigong
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 4 deletions

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +6 -4

No files found.
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2088,7 +2088,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
                hidden_states = outputs

            if self.speculative_config and self.speculative_config.use_eagle() and not is_profile:
-                assert isinstance(self.drafter, EagleProposer)
+                #assert isinstance(self.drafter, EagleProposer)
+                if hasattr(self, 'drafter') and isinstance(self.drafter, EagleProposer):
                    self.drafter.dummy_run(num_tokens, attn_metadata)

        # This is necessary to avoid blocking DP.
@@ -2677,9 +2678,10 @@ class GPUModelRunner(LoRAModelRunnerMixin):
        kv_caches = self.initialize_kv_cache_tensors(kv_cache_config)

        if self.speculative_config and self.speculative_config.use_eagle():
-            assert isinstance(self.drafter, EagleProposer)
+            #assert isinstance(self.drafter, EagleProposer)
            # validate all draft model layers belong to the same kv cache
            # group
+            if hasattr(self, 'drafter') and isinstance(self.drafter, EagleProposer):
                self.drafter.validate_same_kv_cache_group(kv_cache_config)

        if has_kv_transfer_group():