logger.py 2.6 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
import logging
5
from collections.abc import Sequence
6

7
8
import torch

9
10
11
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.pooling_params import PoolingParams
12
from vllm.sampling_params import BeamSearchParams, SamplingParams
13
14
15
16
17

logger = init_logger(__name__)


class RequestLogger:
18
    def __init__(self, *, max_log_len: int | None) -> None:
19
20
21
22
23
        self.max_log_len = max_log_len

    def log_inputs(
        self,
        request_id: str,
24
25
26
27
28
        prompt: str | None,
        prompt_token_ids: list[int] | None,
        prompt_embeds: torch.Tensor | None,
        params: SamplingParams | PoolingParams | BeamSearchParams | None,
        lora_request: LoRARequest | None,
29
    ) -> None:
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
        if logger.isEnabledFor(logging.DEBUG):
            max_log_len = self.max_log_len
            if max_log_len is not None:
                if prompt is not None:
                    prompt = prompt[:max_log_len]

                if prompt_token_ids is not None:
                    prompt_token_ids = prompt_token_ids[:max_log_len]

            logger.debug(
                "Request %s details: prompt: %r, "
                "prompt_token_ids: %s, "
                "prompt_embeds shape: %s.",
                request_id,
                prompt,
                prompt_token_ids,
                prompt_embeds.shape if prompt_embeds is not None else None,
            )
48
49
50
51
52

        logger.info(
            "Received request %s: params: %s, lora_request: %s.",
            request_id,
            params,
53
54
            lora_request,
        )
55
56
57
58
59

    def log_outputs(
        self,
        request_id: str,
        outputs: str,
60
61
        output_token_ids: Sequence[int] | None,
        finish_reason: str | None = None,
62
63
64
65
66
67
68
69
70
71
72
73
74
75
        is_streaming: bool = False,
        delta: bool = False,
    ) -> None:
        max_log_len = self.max_log_len
        if max_log_len is not None:
            if outputs is not None:
                outputs = outputs[:max_log_len]

            if output_token_ids is not None:
                # Convert to list and apply truncation
                output_token_ids = list(output_token_ids)[:max_log_len]

        stream_info = ""
        if is_streaming:
76
            stream_info = " (streaming delta)" if delta else " (streaming complete)"
77
78
79
80
81
82
83
84
85
86

        logger.info(
            "Generated response %s%s: output: %r, "
            "output_token_ids: %s, finish_reason: %s",
            request_id,
            stream_info,
            outputs,
            output_token_ids,
            finish_reason,
        )