Unverified Commit 97ca4c3b authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Chore] Remove more V0 dead code from `sequence.py` (#31783)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent ee2e69d6
...@@ -13,7 +13,6 @@ from vllm.logger import init_logger ...@@ -13,7 +13,6 @@ from vllm.logger import init_logger
from vllm.logprobs import PromptLogprobs, SampleLogprobs from vllm.logprobs import PromptLogprobs, SampleLogprobs
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.multimodal.inputs import MultiModalPlaceholderDict from vllm.multimodal.inputs import MultiModalPlaceholderDict
from vllm.sequence import RequestMetrics
from vllm.v1.metrics.stats import RequestStateStats from vllm.v1.metrics.stats import RequestStateStats
logger = init_logger(__name__) logger = init_logger(__name__)
...@@ -113,7 +112,7 @@ class RequestOutput: ...@@ -113,7 +112,7 @@ class RequestOutput:
prompt_logprobs: PromptLogprobs | None, prompt_logprobs: PromptLogprobs | None,
outputs: list[CompletionOutput], outputs: list[CompletionOutput],
finished: bool, finished: bool,
metrics: RequestMetrics | RequestStateStats | None = None, metrics: RequestStateStats | None = None,
lora_request: LoRARequest | None = None, lora_request: LoRARequest | None = None,
encoder_prompt: str | None = None, encoder_prompt: str | None = None,
encoder_prompt_token_ids: list[int] | None = None, encoder_prompt_token_ids: list[int] | None = None,
......
...@@ -12,40 +12,6 @@ if TYPE_CHECKING: ...@@ -12,40 +12,6 @@ if TYPE_CHECKING:
else: else:
KVConnectorOutput = Any KVConnectorOutput = Any
VLLM_TOKEN_ID_ARRAY_TYPE = "l"
VLLM_INVALID_TOKEN_ID = -1
@dataclass
class RequestMetrics:
"""Metrics associated with a request.
Attributes:
arrival_time: The time when the request arrived.
first_scheduled_time: The time when the request was first scheduled.
first_token_time: The time when the first token was generated.
time_in_queue: The time the request spent in the queue.
finished_time: The time when the request was finished.
scheduler_time: The time spent in the scheduler when this request was
being considered by the scheduler.
model_forward_time: The time spent in the model forward pass when this
request was in the batch.
model_execute_time: The time spent in the model execute function. This
will include model forward, block/sync across
workers, cpu-gpu sync time and sampling time.
"""
arrival_time: float
last_token_time: float
first_scheduled_time: float | None
first_token_time: float | None
time_in_queue: float | None
finished_time: float | None = None
scheduler_time: float | None = None
model_forward_time: float | None = None
model_execute_time: float | None = None
# cannot use msgspec.Struct here because Dynamo does not support it # cannot use msgspec.Struct here because Dynamo does not support it
@dataclass @dataclass
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment