__init__.py 931 Bytes
Newer Older
1
2
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""

3
4
5
6
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.engine.llm_engine import LLMEngine
from vllm.entrypoints.llm import LLM
7
from vllm.executor.ray_utils import initialize_ray_cluster
8
from vllm.model_executor.models import ModelRegistry
9
10
11
from vllm.outputs import (CompletionOutput, EmbeddingOutput,
                          EmbeddingRequestOutput, RequestOutput)
from vllm.pooling_params import PoolingParams
12
from vllm.sampling_params import SamplingParams
Woosuk Kwon's avatar
Woosuk Kwon committed
13

Simon Mo's avatar
Simon Mo committed
14
__version__ = "0.4.2"
Woosuk Kwon's avatar
Woosuk Kwon committed
15
16
17

__all__ = [
    "LLM",
18
    "ModelRegistry",
Woosuk Kwon's avatar
Woosuk Kwon committed
19
20
21
    "SamplingParams",
    "RequestOutput",
    "CompletionOutput",
22
23
    "EmbeddingOutput",
    "EmbeddingRequestOutput",
Woosuk Kwon's avatar
Woosuk Kwon committed
24
25
26
27
    "LLMEngine",
    "EngineArgs",
    "AsyncLLMEngine",
    "AsyncEngineArgs",
28
    "initialize_ray_cluster",
29
    "PoolingParams",
Woosuk Kwon's avatar
Woosuk Kwon committed
30
]