__init__.py 2.24 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""
3
4
5
6
# The version.py should be independent library, and we always import the
# version library first.  Such assumption is critical for some customization.
from .version import __version__, __version_tuple__  # isort:skip

7
8
9
import os

import torch
10

Woosuk Kwon's avatar
Woosuk Kwon committed
11
12
13
14
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.engine.llm_engine import LLMEngine
from vllm.entrypoints.llm import LLM
15
from vllm.executor.ray_utils import initialize_ray_cluster
16
from vllm.inputs import PromptType, TextPrompt, TokensPrompt
17
from vllm.model_executor.models import ModelRegistry
18
19
20
21
22
from vllm.outputs import (ClassificationOutput, ClassificationRequestOutput,
                          CompletionOutput, EmbeddingOutput,
                          EmbeddingRequestOutput, PoolingOutput,
                          PoolingRequestOutput, RequestOutput, ScoringOutput,
                          ScoringRequestOutput)
23
from vllm.pooling_params import PoolingParams
Woosuk Kwon's avatar
Woosuk Kwon committed
24
from vllm.sampling_params import SamplingParams
zhuwenwen's avatar
zhuwenwen committed
25
from vllm.version import __version__, __version_tuple__, __hcu_version__
Woosuk Kwon's avatar
Woosuk Kwon committed
26
27


28
29
30
31
# set some common config/environment variables that should be set
# for all processes created by vllm and all processes
# that interact with vllm workers.
# they are executed whenever `import vllm` is called.
32

33
34
# see https://github.com/NVIDIA/nccl/issues/1234
os.environ['NCCL_CUMEM_ENABLE'] = '0'
35

36
37
38
39
# see https://github.com/vllm-project/vllm/issues/10480
os.environ['TORCHINDUCTOR_COMPILE_THREADS'] = '1'
# see https://github.com/vllm-project/vllm/issues/10619
torch._inductor.config.compile_threads = 1
40

Woosuk Kwon's avatar
Woosuk Kwon committed
41
__all__ = [
42
    "__version__",
43
    "__version_tuple__",
Woosuk Kwon's avatar
Woosuk Kwon committed
44
    "LLM",
45
    "ModelRegistry",
46
    "PromptType",
47
48
    "TextPrompt",
    "TokensPrompt",
Woosuk Kwon's avatar
Woosuk Kwon committed
49
50
51
    "SamplingParams",
    "RequestOutput",
    "CompletionOutput",
52
53
    "PoolingOutput",
    "PoolingRequestOutput",
54
55
    "EmbeddingOutput",
    "EmbeddingRequestOutput",
56
57
58
59
    "ClassificationOutput",
    "ClassificationRequestOutput",
    "ScoringOutput",
    "ScoringRequestOutput",
Woosuk Kwon's avatar
Woosuk Kwon committed
60
61
62
63
    "LLMEngine",
    "EngineArgs",
    "AsyncLLMEngine",
    "AsyncEngineArgs",
64
    "initialize_ray_cluster",
65
    "PoolingParams",
Woosuk Kwon's avatar
Woosuk Kwon committed
66
]