Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7f570f1c
Unverified
Commit
7f570f1c
authored
Sep 25, 2025
by
wang.yuqi
Committed by
GitHub
Sep 25, 2025
Browse files
[V0 deprecation] Remove unreachable model_config.supported_tasks (#25642)
Signed-off-by:
wang.yuqi
<
noooop@126.com
>
parent
eaeca3cd
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
7 additions
and
76 deletions
+7
-76
tests/test_config.py
tests/test_config.py
+0
-3
vllm/config/model.py
vllm/config/model.py
+0
-61
vllm/engine/protocol.py
vllm/engine/protocol.py
+5
-0
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+1
-5
vllm/entrypoints/openai/run_batch.py
vllm/entrypoints/openai/run_batch.py
+1
-7
No files found.
tests/test_config.py
View file @
7f570f1c
...
...
@@ -97,7 +97,6 @@ def test_auto_task(model_id, expected_runner_type, expected_convert_type,
assert
config
.
runner_type
==
expected_runner_type
assert
config
.
convert_type
==
expected_convert_type
assert
expected_task
in
config
.
supported_tasks
# Can remove once --task option is fully deprecated
...
...
@@ -120,7 +119,6 @@ def test_score_task(model_id, expected_runner_type, expected_convert_type,
assert
config
.
runner_type
==
expected_runner_type
assert
config
.
convert_type
==
expected_convert_type
assert
expected_task
in
config
.
supported_tasks
# Can remove once --task option is fully deprecated
...
...
@@ -137,7 +135,6 @@ def test_transcription_task(model_id, expected_runner_type,
assert
config
.
runner_type
==
expected_runner_type
assert
config
.
convert_type
==
expected_convert_type
assert
expected_task
in
config
.
supported_tasks
@
pytest
.
mark
.
parametrize
(
...
...
vllm/config/model.py
View file @
7f570f1c
...
...
@@ -14,7 +14,6 @@ from pydantic import (ConfigDict, SkipValidation, field_validator,
model_validator
)
from
pydantic.dataclasses
import
dataclass
from
safetensors.torch
import
_TYPES
as
_SAFETENSORS_TO_TORCH_DTYPE
from
typing_extensions
import
assert_never
import
vllm.envs
as
envs
from
vllm.config.multimodal
import
(
MMCacheType
,
MMEncoderTPMode
,
...
...
@@ -534,9 +533,6 @@ class ModelConfig:
f
"You can pass `--convert
{
convert_option
}
to adapt "
"it into a pooling model."
)
self
.
supported_tasks
=
self
.
_get_supported_tasks
(
architectures
,
self
.
runner_type
,
self
.
convert_type
)
# Note: Initialize these attributes early because transformers fallback
# may fail to load dynamic modules in child processes
model_info
,
arch
=
registry
.
inspect_model_cls
(
architectures
,
self
)
...
...
@@ -834,27 +830,6 @@ class ModelConfig:
return
convert_type
def
_get_supported_generation_tasks
(
self
,
architectures
:
list
[
str
],
convert_type
:
ConvertType
,
)
->
list
[
_ResolvedTask
]:
registry
=
self
.
registry
if
registry
.
is_transcription_only_model
(
architectures
,
self
):
return
[
"transcription"
]
# TODO: Use get_supported_generation_tasks once V0 is removed
supported_tasks
=
list
[
_ResolvedTask
]()
if
(
registry
.
is_text_generation_model
(
architectures
,
self
)
or
convert_type
in
_RUNNER_CONVERTS
[
"generate"
]):
supported_tasks
.
append
(
"generate"
)
if
registry
.
is_transcription_model
(
architectures
,
self
):
supported_tasks
.
append
(
"transcription"
)
return
supported_tasks
def
_get_default_pooling_task
(
self
,
architectures
:
list
[
str
],
...
...
@@ -872,42 +847,6 @@ class ModelConfig:
return
"embed"
def
_get_supported_pooling_tasks
(
self
,
architectures
:
list
[
str
],
convert_type
:
ConvertType
,
)
->
list
[
_ResolvedTask
]:
registry
=
self
.
registry
# TODO: Use get_supported_pooling_tasks once V0 is removed
supported_tasks
=
list
[
_ResolvedTask
]()
if
(
registry
.
is_pooling_model
(
architectures
,
self
)
or
convert_type
in
_RUNNER_CONVERTS
[
"pooling"
]):
supported_tasks
.
append
(
"encode"
)
extra_task
=
(
self
.
_get_default_pooling_task
(
architectures
)
if
convert_type
==
"none"
else
convert_type
)
supported_tasks
.
append
(
extra_task
)
return
supported_tasks
def
_get_supported_tasks
(
self
,
architectures
:
list
[
str
],
runner_type
:
RunnerType
,
convert_type
:
ConvertType
,
)
->
list
[
_ResolvedTask
]:
if
runner_type
==
"generate"
:
return
self
.
_get_supported_generation_tasks
(
architectures
,
convert_type
)
if
runner_type
==
"pooling"
:
return
self
.
_get_supported_pooling_tasks
(
architectures
,
convert_type
)
if
runner_type
==
"draft"
:
return
[
"draft"
]
assert_never
(
runner_type
)
def
_parse_quant_hf_config
(
self
,
hf_config
:
PretrainedConfig
):
quant_cfg
=
getattr
(
hf_config
,
"quantization_config"
,
None
)
if
quant_cfg
is
None
:
...
...
vllm/engine/protocol.py
View file @
7f570f1c
...
...
@@ -16,6 +16,7 @@ from vllm.outputs import CompletionOutput, PoolingRequestOutput, RequestOutput
from
vllm.plugins.io_processors.interface
import
IOProcessor
from
vllm.pooling_params
import
PoolingParams
from
vllm.sampling_params
import
BeamSearchParams
,
SamplingParams
from
vllm.tasks
import
SupportedTask
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.utils
import
Device
,
collect_from_async_generator
,
random_uuid
...
...
@@ -326,3 +327,7 @@ class EngineClient(ABC):
kwargs
:
Optional
[
dict
]
=
None
):
"""Perform a collective RPC call to the given path."""
raise
NotImplementedError
async
def
get_supported_tasks
(
self
)
->
tuple
[
SupportedTask
,
...]:
"""Get supported tasks"""
raise
NotImplementedError
vllm/entrypoints/openai/api_server.py
View file @
7f570f1c
...
...
@@ -1609,11 +1609,7 @@ async def init_app_state(
state
.
vllm_config
=
vllm_config
model_config
=
vllm_config
.
model_config
if
envs
.
VLLM_USE_V1
:
supported_tasks
=
await
engine_client
\
.
get_supported_tasks
()
# type: ignore
else
:
supported_tasks
=
model_config
.
supported_tasks
supported_tasks
=
await
engine_client
.
get_supported_tasks
()
logger
.
info
(
"Supported_tasks: %s"
,
supported_tasks
)
...
...
vllm/entrypoints/openai/run_batch.py
View file @
7f570f1c
...
...
@@ -14,7 +14,6 @@ import torch
from
prometheus_client
import
start_http_server
from
tqdm
import
tqdm
import
vllm.envs
as
envs
from
vllm.config
import
VllmConfig
from
vllm.engine.arg_utils
import
AsyncEngineArgs
,
optional_type
from
vllm.engine.protocol
import
EngineClient
...
...
@@ -334,12 +333,7 @@ async def run_batch(
model_config
=
vllm_config
.
model_config
if
envs
.
VLLM_USE_V1
:
supported_tasks
=
await
engine_client
\
.
get_supported_tasks
()
# type: ignore
else
:
supported_tasks
=
model_config
.
supported_tasks
supported_tasks
=
await
engine_client
.
get_supported_tasks
()
logger
.
info
(
"Supported_tasks: %s"
,
supported_tasks
)
# Create the openai serving objects.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment