"vscode:/vscode.git/clone" did not exist on "1d4cfe2be1907408d610489bdca7bc8f8d2345b1"
__init__.py 2.66 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

Zhuohan Li's avatar
Zhuohan Li committed
4
import uuid
5
import warnings
6
from typing import Any
Zhuohan Li's avatar
Zhuohan Li committed
7
8

import torch
9

10
from vllm.logger import init_logger
11

12
13
14
_DEPRECATED_MAPPINGS = {
    "cprofile": "profiling",
    "cprofile_context": "profiling",
15
    # Used by lm-eval
16
17
    "get_open_port": "network_utils",
}
18
19
20


def __getattr__(name: str) -> Any:  # noqa: D401 - short deprecation docstring
21
22
23
    """Module-level getattr to handle deprecated utilities."""
    if name in _DEPRECATED_MAPPINGS:
        submodule_name = _DEPRECATED_MAPPINGS[name]
24
25
        warnings.warn(
            f"vllm.utils.{name} is deprecated and will be removed in a future version. "
26
            f"Use vllm.utils.{submodule_name}.{name} instead.",
27
28
29
            DeprecationWarning,
            stacklevel=2,
        )
30
31
        module = __import__(f"vllm.utils.{submodule_name}", fromlist=[submodule_name])
        return getattr(module, name)
32
33
34
35
36
    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


def __dir__() -> list[str]:
    # expose deprecated names in dir() for better UX/tab-completion
37
    return sorted(list(globals().keys()) + list(_DEPRECATED_MAPPINGS.keys()))
38
39


40
41
logger = init_logger(__name__)

42
43
44
45
46
47
48
49
50
51
52
53
54
# Constants related to forcing the attention backend selection

# String name of register which may be set in order to
# force auto-selection of attention backend by Attention
# wrapper
STR_BACKEND_ENV_VAR: str = "VLLM_ATTENTION_BACKEND"

# Possible string values of STR_BACKEND_ENV_VAR
# register, corresponding to possible backends
STR_FLASHINFER_ATTN_VAL: str = "FLASHINFER"
STR_FLASH_ATTN_VAL: str = "FLASH_ATTN"
STR_INVALID_VAL: str = "INVALID"

55

Cyrus Leung's avatar
Cyrus Leung committed
56
57
def random_uuid() -> str:
    return str(uuid.uuid4().hex)
58
59


60
def length_from_prompt_token_ids_or_embeds(
61
62
    prompt_token_ids: list[int] | None,
    prompt_embeds: torch.Tensor | None,
63
) -> int:
64
    """Calculate the request length (in number of tokens) give either
65
66
    prompt_token_ids or prompt_embeds.
    """
67
68
    prompt_token_len = None if prompt_token_ids is None else len(prompt_token_ids)
    prompt_embeds_len = None if prompt_embeds is None else len(prompt_embeds)
69
70
71

    if prompt_token_len is None:
        if prompt_embeds_len is None:
72
            raise ValueError("Neither prompt_token_ids nor prompt_embeds were defined.")
73
74
        return prompt_embeds_len
    else:
75
        if prompt_embeds_len is not None and prompt_embeds_len != prompt_token_len:
76
77
78
            raise ValueError(
                "Prompt token ids and prompt embeds had different lengths"
                f" prompt_token_ids={prompt_token_len}"
79
80
                f" prompt_embeds={prompt_embeds_len}"
            )
81
        return prompt_token_len