Unverified Commit 3e70e3d4 authored by HUIJONG JEONG's avatar HUIJONG JEONG Committed by GitHub
Browse files

add(v1): RequestStatesStats to RequestOutput (#24947)


Signed-off-by: default avatarhuijjj <huijong.jeong@squeezebits.com>
parent eb0fa438
...@@ -86,3 +86,16 @@ def test_max_model_len(): ...@@ -86,3 +86,16 @@ def test_max_model_len():
# It can be less if generation finishes due to other reasons (e.g., EOS) # It can be less if generation finishes due to other reasons (e.g., EOS)
# before reaching the absolute model length limit. # before reaching the absolute model length limit.
assert num_total_tokens <= max_model_len assert num_total_tokens <= max_model_len
def test_log_stats():
llm = LLM(
model=MODEL_NAME,
disable_log_stats=False,
gpu_memory_utilization=0.10,
enforce_eager=True, # reduce test time
)
outputs = llm.generate(PROMPTS, sampling_params=None)
# disable_log_stats is False, every output should have metrics
assert all(output.metrics is not None for output in outputs)
...@@ -14,6 +14,7 @@ from vllm.logprobs import PromptLogprobs, SampleLogprobs ...@@ -14,6 +14,7 @@ from vllm.logprobs import PromptLogprobs, SampleLogprobs
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.multimodal.inputs import MultiModalPlaceholderDict from vllm.multimodal.inputs import MultiModalPlaceholderDict
from vllm.sequence import RequestMetrics from vllm.sequence import RequestMetrics
from vllm.v1.metrics.stats import RequestStateStats
logger = init_logger(__name__) logger = init_logger(__name__)
...@@ -108,7 +109,7 @@ class RequestOutput: ...@@ -108,7 +109,7 @@ class RequestOutput:
prompt_logprobs: Optional[PromptLogprobs], prompt_logprobs: Optional[PromptLogprobs],
outputs: list[CompletionOutput], outputs: list[CompletionOutput],
finished: bool, finished: bool,
metrics: Optional[RequestMetrics] = None, metrics: Optional[Union[RequestMetrics, RequestStateStats]] = None,
lora_request: Optional[LoRARequest] = None, lora_request: Optional[LoRARequest] = None,
encoder_prompt: Optional[str] = None, encoder_prompt: Optional[str] = None,
encoder_prompt_token_ids: Optional[list[int]] = None, encoder_prompt_token_ids: Optional[list[int]] = None,
......
...@@ -248,16 +248,15 @@ class RequestState: ...@@ -248,16 +248,15 @@ class RequestState:
if prompt_token_ids is None and self.prompt_embeds is not None: if prompt_token_ids is None and self.prompt_embeds is not None:
prompt_token_ids = [0] * len(self.prompt_embeds) prompt_token_ids = [0] * len(self.prompt_embeds)
return RequestOutput( return RequestOutput(request_id=request_id,
request_id=request_id, prompt=self.prompt,
prompt=self.prompt, prompt_token_ids=prompt_token_ids,
prompt_token_ids=prompt_token_ids, prompt_logprobs=prompt_logprobs,
prompt_logprobs=prompt_logprobs, outputs=cast(list[CompletionOutput], outputs),
outputs=cast(list[CompletionOutput], outputs), finished=finished,
finished=finished, kv_transfer_params=kv_transfer_params,
kv_transfer_params=kv_transfer_params, num_cached_tokens=self.num_cached_tokens,
num_cached_tokens=self.num_cached_tokens, metrics=self.stats)
)
def _new_completion_output( def _new_completion_output(
self, self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment