Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
68210201
Unverified
Commit
68210201
authored
Sep 12, 2024
by
Alexander Matveev
Committed by
GitHub
Sep 12, 2024
Browse files
[Bugfix] Fix async log stats (#8417)
parent
84275504
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
4 deletions
+17
-4
tests/basic_correctness/test_preemption.py
tests/basic_correctness/test_preemption.py
+1
-0
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+16
-4
No files found.
tests/basic_correctness/test_preemption.py
View file @
68210201
...
...
@@ -64,6 +64,7 @@ def test_chunked_prefill_recompute(
enable_chunked_prefill
=
enable_chunked_prefill
,
max_num_seqs
=
max_num_seqs
,
worker_use_ray
=
worker_use_ray
,
disable_log_stats
=
False
,
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
generate_greedy
(
example_prompts
,
max_tokens
)
assert
(
vllm_model
.
model
.
llm_engine
.
scheduler
[
0
].
artificial_preempt_cnt
...
...
vllm/engine/llm_engine.py
View file @
68210201
...
...
@@ -1056,7 +1056,8 @@ class LLMEngine:
# LLMEngine/AsyncLLMEngine directly
if
is_async
:
# Log stats.
self
.
do_log_stats
(
scheduler_outputs
,
outputs
,
finished_before
)
self
.
do_log_stats
(
scheduler_outputs
,
outputs
,
finished_before
,
skip
)
# Tracing
self
.
do_tracing
(
scheduler_outputs
)
...
...
@@ -1363,18 +1364,20 @@ class LLMEngine:
def
do_log_stats
(
self
,
scheduler_outputs
:
Optional
[
SchedulerOutputs
]
=
None
,
model_output
:
Optional
[
List
[
SamplerOutput
]]
=
None
,
finished_before
:
Optional
[
List
[
int
]]
=
None
)
->
None
:
finished_before
:
Optional
[
List
[
int
]]
=
None
,
skip
:
Optional
[
List
[
int
]]
=
None
)
->
None
:
"""Forced log when no requests active."""
if
self
.
log_stats
:
stats
=
self
.
_get_stats
(
scheduler_outputs
,
model_output
,
finished_before
)
finished_before
,
skip
)
for
logger
in
self
.
stat_loggers
.
values
():
logger
.
log
(
stats
)
def
_get_stats
(
self
,
scheduler_outputs
:
Optional
[
SchedulerOutputs
],
model_output
:
Optional
[
List
[
SamplerOutput
]]
=
None
,
finished_before
:
Optional
[
List
[
int
]]
=
None
)
->
Stats
:
finished_before
:
Optional
[
List
[
int
]]
=
None
,
skip
:
Optional
[
List
[
int
]]
=
None
)
->
Stats
:
"""Get Stats to be Logged to Prometheus.
Args:
...
...
@@ -1382,6 +1385,10 @@ class LLMEngine:
the scheduled batch,
model_output: Optional, used to emit speculative decoding metrics
which are created by the workers.
finished_before: Optional, indices of sequences that were finished
before. These sequences will be ignored.
skip: Optional, indices of sequences that were preempted. These
sequences will be ignored.
"""
now
=
time
.
time
()
...
...
@@ -1456,6 +1463,11 @@ class LLMEngine:
actual_num_batched_tokens
-=
1
continue
# Currently, skip == preempted sequences, so we need to skip
# their log stats
if
skip
and
idx
in
skip
:
continue
group_was_prefill
=
idx
<
scheduler_outputs
.
num_prefill_groups
seq_group
=
scheduled_seq_group
.
seq_group
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment