Merge branch 'v0.9.2-dev' of http://10.16.6.30/dcutoolkit/deeplearing/vllm into v0.9.2-dev

99963991 · zhuwenwen · a7668e46 · 6cc81877 · 99963991
Commit 99963991 authored Sep 03, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 4 deletions

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +6 -4

No files found.
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2088,8 +2088,9 @@ class GPUModelRunner(LoRAModelRunnerMixin):
                hidden_states = outputs
            if self.speculative_config and self.speculative_config.use_eagle() and not is_profile:
-                assert isinstance(self.drafter, EagleProposer)
+                #assert isinstance(self.drafter, EagleProposer)
-                self.drafter.dummy_run(num_tokens, attn_metadata)
+                if hasattr(self, 'drafter') and isinstance(self.drafter, EagleProposer):
+                    self.drafter.dummy_run(num_tokens, attn_metadata)
        # This is necessary to avoid blocking DP.
        # For dummy runs, we typically skip EPLB since we don't have any real
@@ -2677,10 +2678,11 @@ class GPUModelRunner(LoRAModelRunnerMixin):
        kv_caches = self.initialize_kv_cache_tensors(kv_cache_config)
        if self.speculative_config and self.speculative_config.use_eagle():
-            assert isinstance(self.drafter, EagleProposer)
+            #assert isinstance(self.drafter, EagleProposer)
            # validate all draft model layers belong to the same kv cache
            # group
-            self.drafter.validate_same_kv_cache_group(kv_cache_config)
+            if hasattr(self, 'drafter') and isinstance(self.drafter, EagleProposer):
+                self.drafter.validate_same_kv_cache_group(kv_cache_config)
        if has_kv_transfer_group():
            get_kv_transfer_group().register_kv_caches(kv_caches)