Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3e70e3d4
Unverified
Commit
3e70e3d4
authored
Oct 03, 2025
by
HUIJONG JEONG
Committed by
GitHub
Oct 03, 2025
Browse files
add(v1): RequestStatesStats to RequestOutput (#24947)
Signed-off-by:
huijjj
<
huijong.jeong@squeezebits.com
>
parent
eb0fa438
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
24 additions
and
11 deletions
+24
-11
tests/entrypoints/llm/test_generate.py
tests/entrypoints/llm/test_generate.py
+13
-0
vllm/outputs.py
vllm/outputs.py
+2
-1
vllm/v1/engine/output_processor.py
vllm/v1/engine/output_processor.py
+9
-10
No files found.
tests/entrypoints/llm/test_generate.py
View file @
3e70e3d4
...
@@ -86,3 +86,16 @@ def test_max_model_len():
...
@@ -86,3 +86,16 @@ def test_max_model_len():
# It can be less if generation finishes due to other reasons (e.g., EOS)
# It can be less if generation finishes due to other reasons (e.g., EOS)
# before reaching the absolute model length limit.
# before reaching the absolute model length limit.
assert
num_total_tokens
<=
max_model_len
assert
num_total_tokens
<=
max_model_len
def
test_log_stats
():
llm
=
LLM
(
model
=
MODEL_NAME
,
disable_log_stats
=
False
,
gpu_memory_utilization
=
0.10
,
enforce_eager
=
True
,
# reduce test time
)
outputs
=
llm
.
generate
(
PROMPTS
,
sampling_params
=
None
)
# disable_log_stats is False, every output should have metrics
assert
all
(
output
.
metrics
is
not
None
for
output
in
outputs
)
vllm/outputs.py
View file @
3e70e3d4
...
@@ -14,6 +14,7 @@ from vllm.logprobs import PromptLogprobs, SampleLogprobs
...
@@ -14,6 +14,7 @@ from vllm.logprobs import PromptLogprobs, SampleLogprobs
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.multimodal.inputs
import
MultiModalPlaceholderDict
from
vllm.multimodal.inputs
import
MultiModalPlaceholderDict
from
vllm.sequence
import
RequestMetrics
from
vllm.sequence
import
RequestMetrics
from
vllm.v1.metrics.stats
import
RequestStateStats
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
@@ -108,7 +109,7 @@ class RequestOutput:
...
@@ -108,7 +109,7 @@ class RequestOutput:
prompt_logprobs
:
Optional
[
PromptLogprobs
],
prompt_logprobs
:
Optional
[
PromptLogprobs
],
outputs
:
list
[
CompletionOutput
],
outputs
:
list
[
CompletionOutput
],
finished
:
bool
,
finished
:
bool
,
metrics
:
Optional
[
RequestMetrics
]
=
None
,
metrics
:
Optional
[
Union
[
RequestMetrics
,
RequestStateStats
]
]
=
None
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
encoder_prompt
:
Optional
[
str
]
=
None
,
encoder_prompt
:
Optional
[
str
]
=
None
,
encoder_prompt_token_ids
:
Optional
[
list
[
int
]]
=
None
,
encoder_prompt_token_ids
:
Optional
[
list
[
int
]]
=
None
,
...
...
vllm/v1/engine/output_processor.py
View file @
3e70e3d4
...
@@ -248,16 +248,15 @@ class RequestState:
...
@@ -248,16 +248,15 @@ class RequestState:
if
prompt_token_ids
is
None
and
self
.
prompt_embeds
is
not
None
:
if
prompt_token_ids
is
None
and
self
.
prompt_embeds
is
not
None
:
prompt_token_ids
=
[
0
]
*
len
(
self
.
prompt_embeds
)
prompt_token_ids
=
[
0
]
*
len
(
self
.
prompt_embeds
)
return
RequestOutput
(
return
RequestOutput
(
request_id
=
request_id
,
request_id
=
request_id
,
prompt
=
self
.
prompt
,
prompt
=
self
.
prompt
,
prompt_token_ids
=
prompt_token_ids
,
prompt_token_ids
=
prompt_token_ids
,
prompt_logprobs
=
prompt_logprobs
,
prompt_logprobs
=
prompt_logprobs
,
outputs
=
cast
(
list
[
CompletionOutput
],
outputs
),
outputs
=
cast
(
list
[
CompletionOutput
],
outputs
),
finished
=
finished
,
finished
=
finished
,
kv_transfer_params
=
kv_transfer_params
,
kv_transfer_params
=
kv_transfer_params
,
num_cached_tokens
=
self
.
num_cached_tokens
,
num_cached_tokens
=
self
.
num_cached_tokens
,
metrics
=
self
.
stats
)
)
def
_new_completion_output
(
def
_new_completion_output
(
self
,
self
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment