__init__.py 672 Bytes
Newer Older
1
2
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""

3
4
5
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.engine.llm_engine import LLMEngine
6
from vllm.engine.ray_utils import initialize_ray_cluster
7
8
9
from vllm.entrypoints.llm import LLM
from vllm.outputs import CompletionOutput, RequestOutput
from vllm.sampling_params import SamplingParams
Woosuk Kwon's avatar
Woosuk Kwon committed
10

11
__version__ = "0.4.0.post1"
Woosuk Kwon's avatar
Woosuk Kwon committed
12
13
14
15
16
17
18
19
20
21

__all__ = [
    "LLM",
    "SamplingParams",
    "RequestOutput",
    "CompletionOutput",
    "LLMEngine",
    "EngineArgs",
    "AsyncLLMEngine",
    "AsyncEngineArgs",
22
    "initialize_ray_cluster",
Woosuk Kwon's avatar
Woosuk Kwon committed
23
]