Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9e5bd307
Unverified
Commit
9e5bd307
authored
Oct 31, 2025
by
Nick Hill
Committed by
GitHub
Oct 31, 2025
Browse files
[Cleanup] Remove no-longer-used `SpeculativeConfig.enable_chunked_prefill` (#27826)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
fc16f1c4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
1 addition
and
16 deletions
+1
-16
vllm/config/speculative.py
vllm/config/speculative.py
+0
-10
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+0
-6
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+1
-0
No files found.
vllm/config/speculative.py
View file @
9e5bd307
...
...
@@ -78,10 +78,6 @@ class SpeculativeConfig:
draft_tensor_parallel_size
:
int
|
None
=
Field
(
default
=
None
,
ge
=
1
)
"""The degree of the tensor parallelism for the draft model. Can only be 1
or the same as the target model's tensor parallel size."""
disable_logprobs
:
bool
=
True
"""If set to True, token log probabilities are not returned during
speculative decoding. If set to False, token log probabilities are returned
according to the log probability settings in SamplingParams."""
# Draft model configuration
quantization
:
me_quant
.
QuantizationMethods
|
None
=
None
...
...
@@ -126,12 +122,6 @@ class SpeculativeConfig:
"""The configuration of the target model."""
target_parallel_config
:
SkipValidation
[
ParallelConfig
]
=
None
# type: ignore
"""The parallel configuration for the target model."""
enable_chunked_prefill
:
SkipValidation
[
bool
]
=
None
# type: ignore
"""Whether vLLM is configured to use chunked prefill or not. Used for
raising an error since it's not yet compatible with speculative decode."""
disable_log_stats
:
SkipValidation
[
bool
]
=
None
# type: ignore
"""Whether to disable the periodic printing of stage times in speculative
decoding."""
# params generated in the post-init stage
draft_model_config
:
SkipValidation
[
ModelConfig
]
=
None
# type: ignore
...
...
vllm/engine/arg_utils.py
View file @
9e5bd307
...
...
@@ -1246,8 +1246,6 @@ class EngineArgs:
self
,
target_model_config
:
ModelConfig
,
target_parallel_config
:
ParallelConfig
,
enable_chunked_prefill
:
bool
,
disable_log_stats
:
bool
,
)
->
SpeculativeConfig
|
None
:
"""Initializes and returns a SpeculativeConfig object based on
`speculative_config`.
...
...
@@ -1267,8 +1265,6 @@ class EngineArgs:
{
"target_model_config"
:
target_model_config
,
"target_parallel_config"
:
target_parallel_config
,
"enable_chunked_prefill"
:
enable_chunked_prefill
,
"disable_log_stats"
:
disable_log_stats
,
}
)
return
SpeculativeConfig
(
**
self
.
speculative_config
)
...
...
@@ -1561,8 +1557,6 @@ class EngineArgs:
speculative_config
=
self
.
create_speculative_config
(
target_model_config
=
model_config
,
target_parallel_config
=
parallel_config
,
enable_chunked_prefill
=
self
.
enable_chunked_prefill
,
disable_log_stats
=
self
.
disable_log_stats
,
)
# make sure num_lookahead_slots is set appropriately depending on
...
...
vllm/entrypoints/openai/api_server.py
View file @
9e5bd307
...
...
@@ -241,6 +241,7 @@ async def build_async_engine_client_from_engine_args(
)
# Don't keep the dummy data in memory
assert
async_llm
is
not
None
await
async_llm
.
reset_mm_cache
()
yield
async_llm
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment