__init__.py 3.79 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""
4

5
6
7
8
# The version.py should be independent library, and we always import the
# version library first.  Such assumption is critical for some customization.
from .version import __version__, __version_tuple__  # isort:skip

9
10
import typing

11
12
13
# The environment variables override should be imported before any other
# modules to ensure that the environment variables are set before any
# other modules are imported.
14
15
16
import vllm.env_override  # noqa: F401

MODULE_ATTRS = {
17
18
    "bc_linter_skip": "._bc_linter:bc_linter_skip",
    "bc_linter_include": "._bc_linter:bc_linter_include",
19
20
21
22
23
    "AsyncEngineArgs": ".engine.arg_utils:AsyncEngineArgs",
    "EngineArgs": ".engine.arg_utils:EngineArgs",
    "AsyncLLMEngine": ".engine.async_llm_engine:AsyncLLMEngine",
    "LLMEngine": ".engine.llm_engine:LLMEngine",
    "LLM": ".entrypoints.llm:LLM",
24
    "initialize_ray_cluster": ".v1.executor.ray_utils:initialize_ray_cluster",
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
    "PromptType": ".inputs:PromptType",
    "TextPrompt": ".inputs:TextPrompt",
    "TokensPrompt": ".inputs:TokensPrompt",
    "ModelRegistry": ".model_executor.models:ModelRegistry",
    "SamplingParams": ".sampling_params:SamplingParams",
    "PoolingParams": ".pooling_params:PoolingParams",
    "ClassificationOutput": ".outputs:ClassificationOutput",
    "ClassificationRequestOutput": ".outputs:ClassificationRequestOutput",
    "CompletionOutput": ".outputs:CompletionOutput",
    "EmbeddingOutput": ".outputs:EmbeddingOutput",
    "EmbeddingRequestOutput": ".outputs:EmbeddingRequestOutput",
    "PoolingOutput": ".outputs:PoolingOutput",
    "PoolingRequestOutput": ".outputs:PoolingRequestOutput",
    "RequestOutput": ".outputs:RequestOutput",
    "ScoringOutput": ".outputs:ScoringOutput",
    "ScoringRequestOutput": ".outputs:ScoringRequestOutput",
}

if typing.TYPE_CHECKING:
    from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
    from vllm.engine.async_llm_engine import AsyncLLMEngine
    from vllm.engine.llm_engine import LLMEngine
    from vllm.entrypoints.llm import LLM
    from vllm.inputs import PromptType, TextPrompt, TokensPrompt
    from vllm.model_executor.models import ModelRegistry
50
51
52
53
54
55
56
57
58
59
60
61
    from vllm.outputs import (
        ClassificationOutput,
        ClassificationRequestOutput,
        CompletionOutput,
        EmbeddingOutput,
        EmbeddingRequestOutput,
        PoolingOutput,
        PoolingRequestOutput,
        RequestOutput,
        ScoringOutput,
        ScoringRequestOutput,
    )
62
63
    from vllm.pooling_params import PoolingParams
    from vllm.sampling_params import SamplingParams
64
    from vllm.v1.executor.ray_utils import initialize_ray_cluster
65
66

    from ._bc_linter import bc_linter_include, bc_linter_skip
67
68
69
70
71
72
73
74
75
76
else:

    def __getattr__(name: str) -> typing.Any:
        from importlib import import_module

        if name in MODULE_ATTRS:
            module_name, attr_name = MODULE_ATTRS[name].split(":")
            module = import_module(module_name, __package__)
            return getattr(module, attr_name)
        else:
77
            raise AttributeError(f"module {__package__} has no attribute {name}")
78

Woosuk Kwon's avatar
Woosuk Kwon committed
79
80

__all__ = [
81
    "__version__",
82
83
    "bc_linter_skip",
    "bc_linter_include",
84
    "__version_tuple__",
Woosuk Kwon's avatar
Woosuk Kwon committed
85
    "LLM",
86
    "ModelRegistry",
87
    "PromptType",
88
89
    "TextPrompt",
    "TokensPrompt",
Woosuk Kwon's avatar
Woosuk Kwon committed
90
91
92
    "SamplingParams",
    "RequestOutput",
    "CompletionOutput",
93
94
    "PoolingOutput",
    "PoolingRequestOutput",
95
96
97
98
99
100
    "EmbeddingOutput",
    "EmbeddingRequestOutput",
    "ClassificationOutput",
    "ClassificationRequestOutput",
    "ScoringOutput",
    "ScoringRequestOutput",
Woosuk Kwon's avatar
Woosuk Kwon committed
101
102
103
104
    "LLMEngine",
    "EngineArgs",
    "AsyncLLMEngine",
    "AsyncEngineArgs",
105
    "initialize_ray_cluster",
106
    "PoolingParams",
Woosuk Kwon's avatar
Woosuk Kwon committed
107
]