Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
10138c92
Unverified
Commit
10138c92
authored
Nov 12, 2025
by
wangxiyuan
Committed by
GitHub
Nov 12, 2025
Browse files
[V0 deprecation] Deprecate use_v1 parameter (#28112)
Signed-off-by:
wangxiyuan
<
wangxiyuan1007@gmail.com
>
parent
a9d18b51
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
31 additions
and
35 deletions
+31
-35
tests/plugins/vllm_add_dummy_platform/vllm_add_dummy_platform/dummy_platform.py
..._dummy_platform/vllm_add_dummy_platform/dummy_platform.py
+0
-1
vllm/attention/selector.py
vllm/attention/selector.py
+30
-11
vllm/platforms/cpu.py
vllm/platforms/cpu.py
+0
-3
vllm/platforms/cuda.py
vllm/platforms/cuda.py
+0
-7
vllm/platforms/interface.py
vllm/platforms/interface.py
+0
-1
vllm/platforms/rocm.py
vllm/platforms/rocm.py
+0
-7
vllm/platforms/tpu.py
vllm/platforms/tpu.py
+0
-3
vllm/platforms/xpu.py
vllm/platforms/xpu.py
+1
-2
No files found.
tests/plugins/vllm_add_dummy_platform/vllm_add_dummy_platform/dummy_platform.py
View file @
10138c92
...
...
@@ -27,7 +27,6 @@ class DummyPlatform(Platform):
dtype
,
kv_cache_dtype
,
block_size
,
use_v1
,
use_mla
,
has_sink
,
use_sparse
,
...
...
vllm/attention/selector.py
View file @
10138c92
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
inspect
import
os
from
collections.abc
import
Generator
from
contextlib
import
contextmanager
...
...
@@ -141,13 +142,31 @@ def _cached_get_attn_backend(
# get device-specific attn_backend
from
vllm.platforms
import
current_platform
sig
=
inspect
.
signature
(
current_platform
.
get_attn_backend_cls
)
if
"use_v1"
in
sig
.
parameters
:
logger
.
warning_once
(
"use_v1 parameter for get_attn_backend_cls is deprecated and will "
"be removed in v0.13.0 or v1.0.0, whichever is soonest. Please "
"remove it from your plugin code."
)
attention_cls
=
current_platform
.
get_attn_backend_cls
(
selected_backend
,
head_size
,
dtype
,
kv_cache_dtype
,
block_size
,
True
,
# use_v1
use_mla
,
has_sink
,
use_sparse
,
)
else
:
attention_cls
=
current_platform
.
get_attn_backend_cls
(
selected_backend
,
head_size
,
dtype
,
kv_cache_dtype
,
block_size
,
True
,
use_mla
,
has_sink
,
use_sparse
,
...
...
vllm/platforms/cpu.py
View file @
10138c92
...
...
@@ -131,7 +131,6 @@ class CpuPlatform(Platform):
dtype
:
torch
.
dtype
,
kv_cache_dtype
:
str
|
None
,
block_size
:
int
,
use_v1
:
bool
,
use_mla
:
bool
,
has_sink
:
bool
,
use_sparse
:
bool
,
...
...
@@ -144,8 +143,6 @@ class CpuPlatform(Platform):
raise
NotImplementedError
(
"MLA is not supported on CPU."
)
if
use_sparse
:
raise
NotImplementedError
(
"Sparse Attention is not supported on CPU."
)
if
not
use_v1
:
raise
ValueError
(
"CPU backend only supports V1."
)
return
AttentionBackendEnum
.
CPU_ATTN
.
get_path
()
@
classmethod
...
...
vllm/platforms/cuda.py
View file @
10138c92
...
...
@@ -336,17 +336,10 @@ class CudaPlatformBase(Platform):
dtype
:
torch
.
dtype
,
kv_cache_dtype
:
"CacheDType | None"
,
block_size
:
int
|
None
,
use_v1
:
bool
,
use_mla
:
bool
,
has_sink
:
bool
,
use_sparse
:
bool
,
)
->
str
:
if
not
use_v1
:
raise
RuntimeError
(
"V0 attention backends have been removed. Set VLLM_USE_V1=1 "
"to select a supported backend."
)
device_capability
=
cls
.
get_device_capability
()
assert
device_capability
is
not
None
...
...
vllm/platforms/interface.py
View file @
10138c92
...
...
@@ -215,7 +215,6 @@ class Platform:
dtype
:
torch
.
dtype
,
kv_cache_dtype
:
"CacheDType | None"
,
block_size
:
int
,
use_v1
:
bool
,
use_mla
:
bool
,
has_sink
:
bool
,
use_sparse
:
bool
,
...
...
vllm/platforms/rocm.py
View file @
10138c92
...
...
@@ -213,7 +213,6 @@ class RocmPlatform(Platform):
dtype
,
kv_cache_dtype
,
block_size
,
use_v1
,
use_mla
,
has_sink
,
use_sparse
,
...
...
@@ -224,12 +223,6 @@ class RocmPlatform(Platform):
if
use_sparse
:
raise
NotImplementedError
(
"Sparse Attention is not supported on ROCm."
)
if
not
use_v1
:
raise
RuntimeError
(
"V0 attention backends have been removed. Set VLLM_USE_V1=1 "
"to select a supported backend."
)
if
use_mla
:
if
selected_backend
is
None
:
selected_backend
=
(
...
...
vllm/platforms/tpu.py
View file @
10138c92
...
...
@@ -58,7 +58,6 @@ class TpuPlatform(Platform):
dtype
:
torch
.
dtype
,
kv_cache_dtype
:
str
|
None
,
block_size
:
int
,
use_v1
:
bool
,
use_mla
:
bool
,
has_sink
,
use_sparse
,
...
...
@@ -70,8 +69,6 @@ class TpuPlatform(Platform):
if
selected_backend
!=
AttentionBackendEnum
.
PALLAS
:
logger
.
info
(
"Cannot use %s backend on TPU."
,
selected_backend
)
if
not
use_v1
:
raise
ValueError
(
"TPU backend only supports V1."
)
logger
.
info
(
"Using Pallas V1 backend."
)
return
AttentionBackendEnum
.
PALLAS
.
get_path
()
...
...
vllm/platforms/xpu.py
View file @
10138c92
...
...
@@ -48,7 +48,6 @@ class XPUPlatform(Platform):
dtype
:
torch
.
dtype
,
kv_cache_dtype
:
str
|
None
,
block_size
:
int
,
use_v1
:
bool
,
use_mla
:
bool
,
has_sink
:
bool
,
use_sparse
,
...
...
@@ -76,7 +75,7 @@ class XPUPlatform(Platform):
elif
selected_backend
:
raise
ValueError
(
f
"Invalid attention backend for
{
cls
.
device_name
}
, "
f
"with
use_v1:
{
use_v1
}
use_mla:
{
use_mla
}
"
f
"with use_mla:
{
use_mla
}
"
)
logger
.
info
(
"Using Flash Attention backend."
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment