Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6fa78d8f
Unverified
Commit
6fa78d8f
authored
Sep 23, 2025
by
Isotr0py
Committed by
GitHub
Sep 23, 2025
Browse files
[V0 deprecation] Remove platform v1 controling interface (#25410)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
9949aa2e
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
0 additions
and
81 deletions
+0
-81
tests/v1/test_async_llm_dp.py
tests/v1/test_async_llm_dp.py
+0
-5
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+0
-27
vllm/platforms/cpu.py
vllm/platforms/cpu.py
+0
-17
vllm/platforms/cuda.py
vllm/platforms/cuda.py
+0
-4
vllm/platforms/interface.py
vllm/platforms/interface.py
+0
-14
vllm/platforms/rocm.py
vllm/platforms/rocm.py
+0
-5
vllm/platforms/tpu.py
vllm/platforms/tpu.py
+0
-5
vllm/platforms/xpu.py
vllm/platforms/xpu.py
+0
-4
No files found.
tests/v1/test_async_llm_dp.py
View file @
6fa78d8f
...
@@ -13,7 +13,6 @@ from vllm import SamplingParams
...
@@ -13,7 +13,6 @@ from vllm import SamplingParams
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.inputs
import
PromptType
from
vllm.inputs
import
PromptType
from
vllm.platforms
import
current_platform
from
vllm.sampling_params
import
RequestOutputKind
from
vllm.sampling_params
import
RequestOutputKind
from
vllm.v1.engine.async_llm
import
AsyncLLM
from
vllm.v1.engine.async_llm
import
AsyncLLM
from
vllm.v1.engine.core_client
import
DPAsyncMPClient
from
vllm.v1.engine.core_client
import
DPAsyncMPClient
...
@@ -29,10 +28,6 @@ engine_args = AsyncEngineArgs(
...
@@ -29,10 +28,6 @@ engine_args = AsyncEngineArgs(
data_parallel_size
=
DP_SIZE
,
data_parallel_size
=
DP_SIZE
,
)
)
if
not
current_platform
.
supports_v1
(
engine_args
.
create_model_config
()):
pytest
.
skip
(
reason
=
"Requires V1-supporting platform."
,
allow_module_level
=
True
)
async
def
generate
(
async
def
generate
(
engine
:
AsyncLLM
,
engine
:
AsyncLLM
,
...
...
vllm/engine/arg_utils.py
View file @
6fa78d8f
...
@@ -1502,12 +1502,6 @@ class EngineArgs:
...
@@ -1502,12 +1502,6 @@ class EngineArgs:
_raise_or_fallback
(
feature_name
=
name
,
recommend_to_remove
=
True
)
_raise_or_fallback
(
feature_name
=
name
,
recommend_to_remove
=
True
)
return
False
return
False
# Platforms must decide if they can support v1 for this model
if
not
current_platform
.
supports_v1
(
model_config
=
model_config
):
_raise_or_fallback
(
feature_name
=
f
"device type=
{
current_platform
.
device_type
}
"
,
recommend_to_remove
=
False
)
return
False
#############################################################
#############################################################
# Experimental Features - allow users to opt in.
# Experimental Features - allow users to opt in.
...
@@ -1524,12 +1518,6 @@ class EngineArgs:
...
@@ -1524,12 +1518,6 @@ class EngineArgs:
recommend_to_remove
=
False
)
recommend_to_remove
=
False
)
return
False
return
False
# The platform may be supported on V1, but off by default for now.
if
not
current_platform
.
default_v1
(
# noqa: SIM103
model_config
=
model_config
)
and
_warn_or_fallback
(
current_platform
.
device_name
):
return
False
if
(
current_platform
.
is_cpu
()
if
(
current_platform
.
is_cpu
()
and
model_config
.
get_sliding_window
()
is
not
None
):
and
model_config
.
get_sliding_window
()
is
not
None
):
_raise_or_fallback
(
feature_name
=
"sliding window (CPU backend)"
,
_raise_or_fallback
(
feature_name
=
"sliding window (CPU backend)"
,
...
@@ -1796,21 +1784,6 @@ def _raise_or_fallback(feature_name: str, recommend_to_remove: bool):
...
@@ -1796,21 +1784,6 @@ def _raise_or_fallback(feature_name: str, recommend_to_remove: bool):
logger
.
warning
(
msg
)
logger
.
warning
(
msg
)
def
_warn_or_fallback
(
feature_name
:
str
)
->
bool
:
if
envs
.
is_set
(
"VLLM_USE_V1"
)
and
envs
.
VLLM_USE_V1
:
logger
.
warning
(
"Detected VLLM_USE_V1=1 with %s. Usage should "
"be considered experimental. Please report any "
"issues on Github."
,
feature_name
)
should_exit
=
False
else
:
logger
.
info
(
"%s is experimental on VLLM_USE_V1=1. "
"Falling back to V0 Engine."
,
feature_name
)
should_exit
=
True
return
should_exit
def
human_readable_int
(
value
):
def
human_readable_int
(
value
):
"""Parse human-readable integers like '1k', '2M', etc.
"""Parse human-readable integers like '1k', '2M', etc.
Including decimal values with decimal multipliers.
Including decimal values with decimal multipliers.
...
...
vllm/platforms/cpu.py
View file @
6fa78d8f
...
@@ -328,23 +328,6 @@ class CpuPlatform(Platform):
...
@@ -328,23 +328,6 @@ class CpuPlatform(Platform):
def
supports_structured_output
(
cls
)
->
bool
:
def
supports_structured_output
(
cls
)
->
bool
:
return
True
return
True
@
classmethod
def
supports_v1
(
cls
,
model_config
)
->
bool
:
"""Returns whether the current platform can support v1 for the supplied
model configuration.
"""
return
True
@
classmethod
def
default_v1
(
cls
,
model_config
)
->
bool
:
"""Returns whether the current platform can use v1 by default for the
supplied model configuration.
"""
arch
=
cls
.
get_cpu_architecture
()
return
(
cls
.
supports_v1
(
model_config
)
and
arch
in
(
CpuArchEnum
.
X86
,
CpuArchEnum
.
POWERPC
,
CpuArchEnum
.
ARM
,
CpuArchEnum
.
S390X
))
@
classmethod
@
classmethod
def
opaque_attention_op
(
cls
)
->
bool
:
def
opaque_attention_op
(
cls
)
->
bool
:
return
True
return
True
...
...
vllm/platforms/cuda.py
View file @
6fa78d8f
...
@@ -384,10 +384,6 @@ class CudaPlatformBase(Platform):
...
@@ -384,10 +384,6 @@ class CudaPlatformBase(Platform):
def
supports_fp8
(
cls
)
->
bool
:
def
supports_fp8
(
cls
)
->
bool
:
return
cls
.
has_device_capability
(
89
)
return
cls
.
has_device_capability
(
89
)
@
classmethod
def
supports_v1
(
cls
,
model_config
:
"ModelConfig"
)
->
bool
:
return
True
@
classmethod
@
classmethod
def
use_custom_allreduce
(
cls
)
->
bool
:
def
use_custom_allreduce
(
cls
)
->
bool
:
return
True
return
True
...
...
vllm/platforms/interface.py
View file @
6fa78d8f
...
@@ -482,20 +482,6 @@ class Platform:
...
@@ -482,20 +482,6 @@ class Platform:
or
parallel_config
.
distributed_executor_backend
or
parallel_config
.
distributed_executor_backend
==
"external_launcher"
)
==
"external_launcher"
)
@
classmethod
def
supports_v1
(
cls
,
model_config
:
ModelConfig
)
->
bool
:
"""Returns whether the current platform can support v1 for the supplied
model configuration.
"""
return
False
@
classmethod
def
default_v1
(
cls
,
model_config
:
ModelConfig
)
->
bool
:
"""
Returns whether the current platform supports v1 by default.
"""
return
cls
.
supports_v1
(
model_config
)
@
classmethod
@
classmethod
def
use_custom_allreduce
(
cls
)
->
bool
:
def
use_custom_allreduce
(
cls
)
->
bool
:
"""
"""
...
...
vllm/platforms/rocm.py
View file @
6fa78d8f
...
@@ -396,11 +396,6 @@ class RocmPlatform(Platform):
...
@@ -396,11 +396,6 @@ class RocmPlatform(Platform):
else
:
else
:
return
torch
.
float8_e4m3fn
return
torch
.
float8_e4m3fn
@
classmethod
def
supports_v1
(
cls
,
model_config
:
"ModelConfig"
)
->
bool
:
# V1 support on AMD gpus is experimental
return
True
@
classmethod
@
classmethod
def
use_custom_allreduce
(
cls
)
->
bool
:
def
use_custom_allreduce
(
cls
)
->
bool
:
# We only enable custom allreduce for MI300 series
# We only enable custom allreduce for MI300 series
...
...
vllm/platforms/tpu.py
View file @
6fa78d8f
...
@@ -174,11 +174,6 @@ class TpuPlatform(Platform):
...
@@ -174,11 +174,6 @@ class TpuPlatform(Platform):
def
use_all_gather
(
cls
)
->
bool
:
def
use_all_gather
(
cls
)
->
bool
:
return
True
return
True
@
classmethod
def
supports_v1
(
cls
,
model_config
:
ModelConfig
)
->
bool
:
# V1 support on TPU is experimental
return
True
@
classmethod
@
classmethod
def
validate_request
(
def
validate_request
(
cls
,
cls
,
...
...
vllm/platforms/xpu.py
View file @
6fa78d8f
...
@@ -194,10 +194,6 @@ class XPUPlatform(Platform):
...
@@ -194,10 +194,6 @@ class XPUPlatform(Platform):
def
get_device_communicator_cls
(
cls
)
->
str
:
def
get_device_communicator_cls
(
cls
)
->
str
:
return
"vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator"
# noqa
return
"vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator"
# noqa
@
classmethod
def
supports_v1
(
cls
,
model_config
:
ModelConfig
)
->
bool
:
return
True
@
classmethod
@
classmethod
def
device_count
(
cls
)
->
int
:
def
device_count
(
cls
)
->
int
:
return
torch
.
xpu
.
device_count
()
return
torch
.
xpu
.
device_count
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment