Unverified Commit 944ffb59 authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[Model Runner V2][Minor] Remove redundant `do_spec_decode` field (#35039)


Signed-off-by: default avatarNick Hill <nickhill123@gmail.com>
Co-authored-by: default avatarWoosuk Kwon <woosuk@inferact.ai>
parent 2bcf71b9
......@@ -153,9 +153,9 @@ class GPUModelRunner(LoRAModelRunnerMixin):
self.cp_interleave = self.parallel_config.cp_kv_cache_interleave_size
self.speculator = None
self.num_speculative_steps = 0
self.use_aux_hidden_state_outputs = False
if self.speculative_config is not None:
self.do_spec_decode = True
self.num_speculative_steps = self.speculative_config.num_speculative_tokens
if self.is_last_pp_rank:
self.speculator = init_speculator(self.vllm_config, self.device)
......@@ -165,9 +165,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
self.use_aux_hidden_state_outputs = True
if self.pp_size > 1:
raise ValueError("EAGLE3 with pipeline parallel is not supported.")
else:
self.do_spec_decode = False
self.num_speculative_steps = 0
# Draft tokens propagation - for spec-dec + struct outputs.
self.draft_tokens_handler = DraftTokensHandler(self.device)
......@@ -251,10 +248,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
)
prepare_communication_buffer_for_model(self.model)
if self.do_spec_decode:
speculator_model = getattr(self.speculator, "model", None)
if speculator_model is not None:
prepare_communication_buffer_for_model(speculator_model)
if self.speculator is not None:
prepare_communication_buffer_for_model(self.speculator)
def get_model(self) -> nn.Module:
return self.model
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment