[Chore] Remove more V0 dead code from `sequence.py` (#31783)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

[Chore] Remove more V0 dead code from `sequence.py` (#31783)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
97ca4c3b · Cyrus Leung · GitHub · ee2e69d6 · 97ca4c3b · 97ca4c3b
Unverified Commit 97ca4c3b authored Jan 06, 2026 by Cyrus Leung Committed by GitHub Jan 06, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 36 deletions

vllm/outputs.py vllm/outputs.py +1 -2

vllm/sequence.py vllm/sequence.py +0 -34

No files found.
--- a/vllm/outputs.py
+++ b/vllm/outputs.py
@@ -13,7 +13,6 @@ from vllm.logger import init_logger
 from vllm.logprobs import PromptLogprobs, SampleLogprobs
 from vllm.lora.request import LoRARequest
 from vllm.multimodal.inputs import MultiModalPlaceholderDict
-from vllm.sequence import RequestMetrics
 from vllm.v1.metrics.stats import RequestStateStats

 logger = init_logger(__name__)
@@ -113,7 +112,7 @@ class RequestOutput:
        prompt_logprobs: PromptLogprobs | None,
        outputs: list[CompletionOutput],
        finished: bool,
-        metrics: RequestMetrics | RequestStateStats | None = None,
+        metrics: RequestStateStats | None = None,
        lora_request: LoRARequest | None = None,
        encoder_prompt: str | None = None,
        encoder_prompt_token_ids: list[int] | None = None,

--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -12,40 +12,6 @@ if TYPE_CHECKING:
 else:
    KVConnectorOutput = Any

-VLLM_TOKEN_ID_ARRAY_TYPE = "l"
-
-VLLM_INVALID_TOKEN_ID = -1
-
-
-@dataclass
-class RequestMetrics:
-    """Metrics associated with a request.
-
-    Attributes:
-        arrival_time: The time when the request arrived.
-        first_scheduled_time: The time when the request was first scheduled.
-        first_token_time: The time when the first token was generated.
-        time_in_queue: The time the request spent in the queue.
-        finished_time: The time when the request was finished.
-        scheduler_time: The time spent in the scheduler when this request was
-                        being considered by the scheduler.
-        model_forward_time: The time spent in the model forward pass when this
-                            request was in the batch.
-        model_execute_time: The time spent in the model execute function. This
-                            will include model forward, block/sync across
-                            workers, cpu-gpu sync time and sampling time.
-    """
-
-    arrival_time: float
-    last_token_time: float
-    first_scheduled_time: float | None
-    first_token_time: float | None
-    time_in_queue: float | None
-    finished_time: float | None = None
-    scheduler_time: float | None = None
-    model_forward_time: float | None = None
-    model_execute_time: float | None = None
-

 # cannot use msgspec.Struct here because Dynamo does not support it
 @dataclass