Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bd1e64d6
Commit
bd1e64d6
authored
Jun 17, 2025
by
lizhigong
Browse files
fix zero overhead bug when kvcache oom and schedule
parent
aa906d98
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
7 deletions
+10
-7
vllm/sequence.py
vllm/sequence.py
+10
-7
No files found.
vllm/sequence.py
View file @
bd1e64d6
...
...
@@ -14,6 +14,7 @@ from typing import Any, Callable, Optional, Union
import
msgspec
import
torch
from
vllm
import
envs
from
vllm.inputs
import
SingletonInputs
from
vllm.lora.request
import
LoRARequest
from
vllm.multimodal
import
MultiModalKwargs
,
MultiModalPlaceholderDict
...
...
@@ -809,18 +810,20 @@ class SequenceGroup:
def
set_last_token_time
(
self
,
now
:
float
)
->
None
:
"""Sets the last token time for Request level timings."""
# If still in prefill phase, assertion fails.
assert
not
self
.
is_prefill
(),
(
"seq_group.set_last_token_time() should not be called "
"if the seq_group is in prefill phase."
)
if
not
envs
.
VLLM_ZERO_OVERHEAD
:
# If still in prefill phase, assertion fails.
assert
not
self
.
is_prefill
(),
(
"seq_group.set_last_token_time() should not be called "
"if the seq_group is in prefill phase."
)
self
.
last_token_latency
=
now
-
self
.
metrics
.
last_token_time
self
.
metrics
.
last_token_time
=
now
def
get_last_token_latency
(
self
)
->
float
:
"""Returns the latency of the last token."""
assert
not
self
.
is_prefill
(),
(
"seq_group.get_last_token_latency() should not be called "
"if the seq_group is in prefill phase."
)
if
not
envs
.
VLLM_ZERO_OVERHEAD
:
assert
not
self
.
is_prefill
(),
(
"seq_group.get_last_token_latency() should not be called "
"if the seq_group is in prefill phase."
)
return
self
.
last_token_latency
def
maybe_set_first_token_time
(
self
,
time
:
float
)
->
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment