Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3e70e3d4
Unverified
Commit
3e70e3d4
authored
Oct 03, 2025
by
HUIJONG JEONG
Committed by
GitHub
Oct 03, 2025
Browse files
add(v1): RequestStatesStats to RequestOutput (#24947)
Signed-off-by:
huijjj
<
huijong.jeong@squeezebits.com
>
parent
eb0fa438
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
24 additions
and
11 deletions
+24
-11
tests/entrypoints/llm/test_generate.py
tests/entrypoints/llm/test_generate.py
+13
-0
vllm/outputs.py
vllm/outputs.py
+2
-1
vllm/v1/engine/output_processor.py
vllm/v1/engine/output_processor.py
+9
-10
No files found.
tests/entrypoints/llm/test_generate.py
View file @
3e70e3d4
...
...
@@ -86,3 +86,16 @@ def test_max_model_len():
# It can be less if generation finishes due to other reasons (e.g., EOS)
# before reaching the absolute model length limit.
assert
num_total_tokens
<=
max_model_len
def
test_log_stats
():
llm
=
LLM
(
model
=
MODEL_NAME
,
disable_log_stats
=
False
,
gpu_memory_utilization
=
0.10
,
enforce_eager
=
True
,
# reduce test time
)
outputs
=
llm
.
generate
(
PROMPTS
,
sampling_params
=
None
)
# disable_log_stats is False, every output should have metrics
assert
all
(
output
.
metrics
is
not
None
for
output
in
outputs
)
vllm/outputs.py
View file @
3e70e3d4
...
...
@@ -14,6 +14,7 @@ from vllm.logprobs import PromptLogprobs, SampleLogprobs
from
vllm.lora.request
import
LoRARequest
from
vllm.multimodal.inputs
import
MultiModalPlaceholderDict
from
vllm.sequence
import
RequestMetrics
from
vllm.v1.metrics.stats
import
RequestStateStats
logger
=
init_logger
(
__name__
)
...
...
@@ -108,7 +109,7 @@ class RequestOutput:
prompt_logprobs
:
Optional
[
PromptLogprobs
],
outputs
:
list
[
CompletionOutput
],
finished
:
bool
,
metrics
:
Optional
[
RequestMetrics
]
=
None
,
metrics
:
Optional
[
Union
[
RequestMetrics
,
RequestStateStats
]
]
=
None
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
encoder_prompt
:
Optional
[
str
]
=
None
,
encoder_prompt_token_ids
:
Optional
[
list
[
int
]]
=
None
,
...
...
vllm/v1/engine/output_processor.py
View file @
3e70e3d4
...
...
@@ -248,8 +248,7 @@ class RequestState:
if
prompt_token_ids
is
None
and
self
.
prompt_embeds
is
not
None
:
prompt_token_ids
=
[
0
]
*
len
(
self
.
prompt_embeds
)
return
RequestOutput
(
request_id
=
request_id
,
return
RequestOutput
(
request_id
=
request_id
,
prompt
=
self
.
prompt
,
prompt_token_ids
=
prompt_token_ids
,
prompt_logprobs
=
prompt_logprobs
,
...
...
@@ -257,7 +256,7 @@ class RequestState:
finished
=
finished
,
kv_transfer_params
=
kv_transfer_params
,
num_cached_tokens
=
self
.
num_cached_tokens
,
)
metrics
=
self
.
stats
)
def
_new_completion_output
(
self
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment