Unverified Commit 03dccc88 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Misc] Add vLLM version getter to utils (#5098)

parent a65634d3
...@@ -314,7 +314,7 @@ def find_version(filepath: str) -> str: ...@@ -314,7 +314,7 @@ def find_version(filepath: str) -> str:
def get_vllm_version() -> str: def get_vllm_version() -> str:
version = find_version(get_path("vllm", "__init__.py")) version = find_version(get_path("vllm", "version.py"))
if _is_cuda(): if _is_cuda():
cuda_version = str(get_nvcc_cuda_version()) cuda_version = str(get_nvcc_cuda_version())
......
...@@ -12,9 +12,10 @@ from vllm.outputs import (CompletionOutput, EmbeddingOutput, ...@@ -12,9 +12,10 @@ from vllm.outputs import (CompletionOutput, EmbeddingOutput,
from vllm.pooling_params import PoolingParams from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
__version__ = "0.5.0" from .version import __version__
__all__ = [ __all__ = [
"__version__",
"LLM", "LLM",
"ModelRegistry", "ModelRegistry",
"PromptStrictInputs", "PromptStrictInputs",
......
...@@ -6,7 +6,6 @@ from typing import Type, TypeVar, Union ...@@ -6,7 +6,6 @@ from typing import Type, TypeVar, Union
from transformers import GenerationConfig, PreTrainedTokenizer from transformers import GenerationConfig, PreTrainedTokenizer
import vllm
from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig, LoadConfig, from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig, LoadConfig,
LoRAConfig, ModelConfig, ParallelConfig, LoRAConfig, ModelConfig, ParallelConfig,
SchedulerConfig, SpeculativeConfig, SchedulerConfig, SpeculativeConfig,
...@@ -38,6 +37,7 @@ from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup, ...@@ -38,6 +37,7 @@ from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup,
from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled, from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled,
usage_message) usage_message)
from vllm.utils import Counter from vllm.utils import Counter
from vllm.version import __version__ as VLLM_VERSION
logger = init_logger(__name__) logger = init_logger(__name__)
_LOCAL_LOGGING_INTERVAL_SEC = 5 _LOCAL_LOGGING_INTERVAL_SEC = 5
...@@ -169,7 +169,7 @@ class LLMEngine: ...@@ -169,7 +169,7 @@ class LLMEngine:
"enforce_eager=%s, kv_cache_dtype=%s, " "enforce_eager=%s, kv_cache_dtype=%s, "
"quantization_param_path=%s, device_config=%s, " "quantization_param_path=%s, device_config=%s, "
"decoding_config=%r, seed=%d, served_model_name=%s)", "decoding_config=%r, seed=%d, served_model_name=%s)",
vllm.__version__, VLLM_VERSION,
model_config.model, model_config.model,
speculative_config, speculative_config,
model_config.tokenizer, model_config.tokenizer,
......
...@@ -15,7 +15,6 @@ from fastapi.responses import JSONResponse, Response, StreamingResponse ...@@ -15,7 +15,6 @@ from fastapi.responses import JSONResponse, Response, StreamingResponse
from prometheus_client import make_asgi_app from prometheus_client import make_asgi_app
from starlette.routing import Mount from starlette.routing import Mount
import vllm
import vllm.envs as envs import vllm.envs as envs
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
...@@ -29,6 +28,7 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion ...@@ -29,6 +28,7 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.version import __version__ as VLLM_VERSION
TIMEOUT_KEEP_ALIVE = 5 # seconds TIMEOUT_KEEP_ALIVE = 5 # seconds
...@@ -93,7 +93,7 @@ async def show_available_models(): ...@@ -93,7 +93,7 @@ async def show_available_models():
@app.get("/version") @app.get("/version")
async def show_version(): async def show_version():
ver = {"version": vllm.__version__} ver = {"version": VLLM_VERSION}
return JSONResponse(content=ver) return JSONResponse(content=ver)
...@@ -174,7 +174,7 @@ if __name__ == "__main__": ...@@ -174,7 +174,7 @@ if __name__ == "__main__":
raise ValueError(f"Invalid middleware {middleware}. " raise ValueError(f"Invalid middleware {middleware}. "
f"Must be a function or a class.") f"Must be a function or a class.")
logger.info("vLLM API server version %s", vllm.__version__) logger.info("vLLM API server version %s", VLLM_VERSION)
logger.info("args: %s", args) logger.info("args: %s", args)
if args.served_model_name is not None: if args.served_model_name is not None:
......
...@@ -5,7 +5,6 @@ from io import StringIO ...@@ -5,7 +5,6 @@ from io import StringIO
import aiohttp import aiohttp
import vllm
from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.openai.protocol import (BatchRequestInput, from vllm.entrypoints.openai.protocol import (BatchRequestInput,
...@@ -15,6 +14,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat ...@@ -15,6 +14,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils import random_uuid from vllm.utils import random_uuid
from vllm.version import __version__ as VLLM_VERSION
logger = init_logger(__name__) logger = init_logger(__name__)
...@@ -135,7 +135,7 @@ async def main(args): ...@@ -135,7 +135,7 @@ async def main(args):
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() args = parse_args()
logger.info("vLLM API server version %s", vllm.__version__) logger.info("vLLM API server version %s", VLLM_VERSION)
logger.info("args: %s", args) logger.info("args: %s", args)
asyncio.run(main(args)) asyncio.run(main(args))
...@@ -16,6 +16,7 @@ import requests ...@@ -16,6 +16,7 @@ import requests
import torch import torch
import vllm.envs as envs import vllm.envs as envs
from vllm.version import __version__ as VLLM_VERSION
_config_home = envs.VLLM_CONFIG_ROOT _config_home = envs.VLLM_CONFIG_ROOT
_USAGE_STATS_JSON_PATH = os.path.join(_config_home, "vllm/usage_stats.json") _USAGE_STATS_JSON_PATH = os.path.join(_config_home, "vllm/usage_stats.json")
...@@ -163,9 +164,8 @@ class UsageMessage: ...@@ -163,9 +164,8 @@ class UsageMessage:
]) ])
# vLLM information # vLLM information
import vllm # delayed import to prevent circular import
self.context = usage_context.value self.context = usage_context.value
self.vllm_version = vllm.__version__ self.vllm_version = VLLM_VERSION
self.model_architecture = model_architecture self.model_architecture = model_architecture
# Metadata # Metadata
......
__version__ = "0.5.0"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment