Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
dd6ac1c2
Unverified
Commit
dd6ac1c2
authored
Nov 14, 2025
by
Zhuohan Li
Committed by
GitHub
Nov 14, 2025
Browse files
[RL] [V1] Remove unused device argument from reset_kv_cache (#28766)
Signed-off-by:
Zhuohan Li
<
zhuohan123@gmail.com
>
parent
98b4d389
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
9 additions
and
17 deletions
+9
-17
vllm/engine/protocol.py
vllm/engine/protocol.py
+1
-1
vllm/entrypoints/llm.py
vllm/entrypoints/llm.py
+2
-3
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+3
-7
vllm/v1/engine/async_llm.py
vllm/v1/engine/async_llm.py
+2
-4
vllm/v1/engine/llm_engine.py
vllm/v1/engine/llm_engine.py
+1
-2
No files found.
vllm/engine/protocol.py
View file @
dd6ac1c2
...
...
@@ -125,7 +125,7 @@ class EngineClient(ABC):
...
@
abstractmethod
async
def
reset_prefix_cache
(
self
,
device
:
Device
|
None
=
None
)
->
None
:
async
def
reset_prefix_cache
(
self
)
->
None
:
"""Reset the prefix cache"""
...
...
...
vllm/entrypoints/llm.py
View file @
dd6ac1c2
...
...
@@ -32,7 +32,6 @@ from vllm.config.model import (
TokenizerMode
,
)
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.engine.protocol
import
Device
from
vllm.entrypoints.chat_utils
import
(
ChatCompletionMessageParam
,
ChatTemplateContentFormatOption
,
...
...
@@ -1499,8 +1498,8 @@ class LLM:
def
stop_profile
(
self
)
->
None
:
self
.
llm_engine
.
stop_profile
()
def
reset_prefix_cache
(
self
,
device
:
Device
|
None
=
None
)
->
None
:
self
.
llm_engine
.
reset_prefix_cache
(
device
)
def
reset_prefix_cache
(
self
)
->
None
:
self
.
llm_engine
.
reset_prefix_cache
()
def
sleep
(
self
,
level
:
int
=
1
):
"""
...
...
vllm/entrypoints/openai/api_server.py
View file @
dd6ac1c2
...
...
@@ -39,7 +39,7 @@ from typing_extensions import assert_never
import
vllm.envs
as
envs
from
vllm.config
import
VllmConfig
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.protocol
import
Device
,
EngineClient
from
vllm.engine.protocol
import
EngineClient
from
vllm.entrypoints.anthropic.protocol
import
(
AnthropicError
,
AnthropicErrorResponse
,
...
...
@@ -1069,12 +1069,8 @@ if envs.VLLM_SERVER_DEV_MODE:
Reset the prefix cache. Note that we currently do not check if the
prefix cache is successfully reset in the API server.
"""
device
=
None
device_str
=
raw_request
.
query_params
.
get
(
"device"
)
if
device_str
is
not
None
:
device
=
Device
[
device_str
.
upper
()]
logger
.
info
(
"Resetting prefix cache with specific %s..."
,
str
(
device
))
await
engine_client
(
raw_request
).
reset_prefix_cache
(
device
)
logger
.
info
(
"Resetting prefix cache..."
)
await
engine_client
(
raw_request
).
reset_prefix_cache
()
return
Response
(
status_code
=
200
)
@
router
.
post
(
"/reset_mm_cache"
)
...
...
vllm/v1/engine/async_llm.py
View file @
dd6ac1c2
...
...
@@ -14,7 +14,7 @@ import torch
import
vllm.envs
as
envs
from
vllm.config
import
VllmConfig
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.protocol
import
Device
,
EngineClient
from
vllm.engine.protocol
import
EngineClient
from
vllm.entrypoints.utils
import
_validate_truncation_size
from
vllm.inputs
import
PromptType
from
vllm.logger
import
init_logger
...
...
@@ -672,9 +672,7 @@ class AsyncLLM(EngineClient):
self
.
processor
.
clear_mm_cache
()
await
self
.
engine_core
.
reset_mm_cache_async
()
async
def
reset_prefix_cache
(
self
,
device
:
Device
|
None
=
None
)
->
None
:
if
device
==
Device
.
CPU
:
raise
ValueError
(
"Not supported on CPU."
)
async
def
reset_prefix_cache
(
self
)
->
None
:
await
self
.
engine_core
.
reset_prefix_cache_async
()
async
def
sleep
(
self
,
level
:
int
=
1
)
->
None
:
...
...
vllm/v1/engine/llm_engine.py
View file @
dd6ac1c2
...
...
@@ -14,7 +14,6 @@ from vllm.config import ParallelConfig, VllmConfig
from
vllm.distributed
import
stateless_destroy_torch_distributed_process_group
from
vllm.distributed.parallel_state
import
get_dp_group
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.engine.protocol
import
Device
from
vllm.inputs
import
PromptType
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
...
...
@@ -321,7 +320,7 @@ class LLMEngine:
self
.
processor
.
clear_mm_cache
()
self
.
engine_core
.
reset_mm_cache
()
def
reset_prefix_cache
(
self
,
device
:
Device
|
None
=
None
):
def
reset_prefix_cache
(
self
):
self
.
engine_core
.
reset_prefix_cache
()
def
sleep
(
self
,
level
:
int
=
1
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment