__init__.py 1.19 KB
Newer Older
1
2
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""

3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

# Adapted from https://github.com/ray-project/ray/blob/f92928c9cfcbbf80c3a8534ca4911de1b44069c0/python/ray/__init__.py#L11
def _configure_system():
    import os
    import sys

    # Importing flash-attn.
    thirdparty_files = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                                    "thirdparty_files")
    sys.path.insert(0, thirdparty_files)


_configure_system()
# Delete configuration function.
del _configure_system

from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs  # noqa: E402
from vllm.engine.async_llm_engine import AsyncLLMEngine  # noqa: E402
from vllm.engine.llm_engine import LLMEngine  # noqa: E402
from vllm.engine.ray_utils import initialize_cluster  # noqa: E402
from vllm.entrypoints.llm import LLM  # noqa: E402
from vllm.outputs import CompletionOutput, RequestOutput  # noqa: E402
from vllm.sampling_params import SamplingParams  # noqa: E402
Woosuk Kwon's avatar
Woosuk Kwon committed
26

Woosuk Kwon's avatar
Woosuk Kwon committed
27
__version__ = "0.3.3"
Woosuk Kwon's avatar
Woosuk Kwon committed
28
29
30
31
32
33
34
35
36
37
38
39

__all__ = [
    "LLM",
    "SamplingParams",
    "RequestOutput",
    "CompletionOutput",
    "LLMEngine",
    "EngineArgs",
    "AsyncLLMEngine",
    "AsyncEngineArgs",
    "initialize_cluster",
]