__init__.py 1.94 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""
3
4
5
6
# The version.py should be independent library, and we always import the
# version library first.  Such assumption is critical for some customization.
from .version import __version__, __version_tuple__  # isort:skip

7
8
9
10
# The environment variables override should be imported before any other
# modules to ensure that the environment variables are set before any
# other modules are imported.
import vllm.env_override  # isort:skip  # noqa: F401
11

Woosuk Kwon's avatar
Woosuk Kwon committed
12
13
14
15
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.engine.llm_engine import LLMEngine
from vllm.entrypoints.llm import LLM
16
from vllm.executor.ray_utils import initialize_ray_cluster
17
from vllm.inputs import PromptType, TextPrompt, TokensPrompt
18
from vllm.model_executor.models import ModelRegistry
19
20
21
22
23
from vllm.outputs import (ClassificationOutput, ClassificationRequestOutput,
                          CompletionOutput, EmbeddingOutput,
                          EmbeddingRequestOutput, PoolingOutput,
                          PoolingRequestOutput, RequestOutput, ScoringOutput,
                          ScoringRequestOutput)
24
from vllm.pooling_params import PoolingParams
Woosuk Kwon's avatar
Woosuk Kwon committed
25
from vllm.sampling_params import SamplingParams
zhuwenwen's avatar
zhuwenwen committed
26
from vllm.version import __version__, __version_tuple__, __hcu_version__
Woosuk Kwon's avatar
Woosuk Kwon committed
27
28
29


__all__ = [
30
    "__version__",
31
    "__version_tuple__",
Woosuk Kwon's avatar
Woosuk Kwon committed
32
    "LLM",
33
    "ModelRegistry",
34
    "PromptType",
35
36
    "TextPrompt",
    "TokensPrompt",
Woosuk Kwon's avatar
Woosuk Kwon committed
37
38
39
    "SamplingParams",
    "RequestOutput",
    "CompletionOutput",
40
41
    "PoolingOutput",
    "PoolingRequestOutput",
42
43
    "EmbeddingOutput",
    "EmbeddingRequestOutput",
44
45
46
47
    "ClassificationOutput",
    "ClassificationRequestOutput",
    "ScoringOutput",
    "ScoringRequestOutput",
Woosuk Kwon's avatar
Woosuk Kwon committed
48
49
50
51
    "LLMEngine",
    "EngineArgs",
    "AsyncLLMEngine",
    "AsyncEngineArgs",
52
    "initialize_ray_cluster",
53
    "PoolingParams",
Woosuk Kwon's avatar
Woosuk Kwon committed
54
]