__init__.py 1.04 KB
Newer Older
1
2
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""

3
4
5
6
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.engine.llm_engine import LLMEngine
from vllm.entrypoints.llm import LLM
7
from vllm.executor.ray_utils import initialize_ray_cluster
8
from vllm.inputs import PromptStrictInputs, TextPrompt, TokensPrompt
9
from vllm.model_executor.models import ModelRegistry
10
11
12
from vllm.outputs import (CompletionOutput, EmbeddingOutput,
                          EmbeddingRequestOutput, RequestOutput)
from vllm.pooling_params import PoolingParams
13
from vllm.sampling_params import SamplingParams
Woosuk Kwon's avatar
Woosuk Kwon committed
14

Simon Mo's avatar
Simon Mo committed
15
__version__ = "0.4.2"
Woosuk Kwon's avatar
Woosuk Kwon committed
16
17
18

__all__ = [
    "LLM",
19
    "ModelRegistry",
20
21
22
    "PromptStrictInputs",
    "TextPrompt",
    "TokensPrompt",
Woosuk Kwon's avatar
Woosuk Kwon committed
23
24
25
    "SamplingParams",
    "RequestOutput",
    "CompletionOutput",
26
27
    "EmbeddingOutput",
    "EmbeddingRequestOutput",
Woosuk Kwon's avatar
Woosuk Kwon committed
28
29
30
31
    "LLMEngine",
    "EngineArgs",
    "AsyncLLMEngine",
    "AsyncEngineArgs",
32
    "initialize_ray_cluster",
33
    "PoolingParams",
Woosuk Kwon's avatar
Woosuk Kwon committed
34
]