[Model Runner V2][Minor] Remove redundant `do_spec_decode` field (#35039)

Signed-off-by: Nick Hill <nickhill123@gmail.com> Co-authored-by: Woosuk Kwon <woosuk@inferact.ai>

[Model Runner V2][Minor] Remove redundant `do_spec_decode` field (#35039)
Signed-off-by: Nick Hill <nickhill123@gmail.com> Co-authored-by: Woosuk Kwon <woosuk@inferact.ai>
944ffb59 · Nick Hill · GitHub · 2bcf71b9 · 944ffb59
Unverified Commit 944ffb59 authored Feb 22, 2026 by Nick Hill Committed by GitHub Feb 22, 2026
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 8 deletions

vllm/v1/worker/gpu/model_runner.py vllm/v1/worker/gpu/model_runner.py +3 -8

No files found.
--- a/vllm/v1/worker/gpu/model_runner.py
+++ b/vllm/v1/worker/gpu/model_runner.py
@@ -153,9 +153,9 @@ class GPUModelRunner(LoRAModelRunnerMixin):
        self.cp_interleave = self.parallel_config.cp_kv_cache_interleave_size

        self.speculator = None
+        self.num_speculative_steps = 0
        self.use_aux_hidden_state_outputs = False
        if self.speculative_config is not None:
-            self.do_spec_decode = True
            self.num_speculative_steps = self.speculative_config.num_speculative_tokens
            if self.is_last_pp_rank:
                self.speculator = init_speculator(self.vllm_config, self.device)
@@ -165,9 +165,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
                self.use_aux_hidden_state_outputs = True
                if self.pp_size > 1:
                    raise ValueError("EAGLE3 with pipeline parallel is not supported.")
-        else:
-            self.do_spec_decode = False
-            self.num_speculative_steps = 0

        # Draft tokens propagation - for spec-dec + struct outputs.
        self.draft_tokens_handler = DraftTokensHandler(self.device)
@@ -251,10 +248,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
        )

        prepare_communication_buffer_for_model(self.model)
-        if self.do_spec_decode:
-            speculator_model = getattr(self.speculator, "model", None)
-            if speculator_model is not None:
-                prepare_communication_buffer_for_model(speculator_model)
+        if self.speculator is not None:
+            prepare_communication_buffer_for_model(self.speculator)

    def get_model(self) -> nn.Module:
        return self.model