__init__.py 1.47 KB
Newer Older
1
2
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""

Woosuk Kwon's avatar
Woosuk Kwon committed
3
4
5
6
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.engine.llm_engine import LLMEngine
from vllm.entrypoints.llm import LLM
7
from vllm.executor.ray_utils import initialize_ray_cluster
8
from vllm.inputs import PromptType, TextPrompt, TokensPrompt
9
from vllm.model_executor.models import ModelRegistry
10
11
12
13
14
from vllm.outputs import (ClassificationOutput, ClassificationRequestOutput,
                          CompletionOutput, EmbeddingOutput,
                          EmbeddingRequestOutput, PoolingOutput,
                          PoolingRequestOutput, RequestOutput, ScoringOutput,
                          ScoringRequestOutput)
15
from vllm.pooling_params import PoolingParams
Woosuk Kwon's avatar
Woosuk Kwon committed
16
from vllm.sampling_params import SamplingParams
zhuwenwen's avatar
zhuwenwen committed
17
from vllm.version import __version__, __version_tuple__, __hcu_version__
Woosuk Kwon's avatar
Woosuk Kwon committed
18
19
20


__all__ = [
21
    "__version__",
22
    "__version_tuple__",
Woosuk Kwon's avatar
Woosuk Kwon committed
23
    "LLM",
24
    "ModelRegistry",
25
    "PromptType",
26
27
    "TextPrompt",
    "TokensPrompt",
Woosuk Kwon's avatar
Woosuk Kwon committed
28
29
30
    "SamplingParams",
    "RequestOutput",
    "CompletionOutput",
31
32
    "PoolingOutput",
    "PoolingRequestOutput",
33
34
    "EmbeddingOutput",
    "EmbeddingRequestOutput",
35
36
37
38
    "ClassificationOutput",
    "ClassificationRequestOutput",
    "ScoringOutput",
    "ScoringRequestOutput",
Woosuk Kwon's avatar
Woosuk Kwon committed
39
40
41
42
    "LLMEngine",
    "EngineArgs",
    "AsyncLLMEngine",
    "AsyncEngineArgs",
43
    "initialize_ray_cluster",
44
    "PoolingParams",
Woosuk Kwon's avatar
Woosuk Kwon committed
45
]