Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
03dccc88
Unverified
Commit
03dccc88
authored
Jun 14, 2024
by
Cyrus Leung
Committed by
GitHub
Jun 13, 2024
Browse files
[Misc] Add vLLM version getter to utils (#5098)
parent
a65634d3
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
13 additions
and
11 deletions
+13
-11
setup.py
setup.py
+1
-1
vllm/__init__.py
vllm/__init__.py
+2
-1
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+2
-2
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+3
-3
vllm/entrypoints/openai/run_batch.py
vllm/entrypoints/openai/run_batch.py
+2
-2
vllm/usage/usage_lib.py
vllm/usage/usage_lib.py
+2
-2
vllm/version.py
vllm/version.py
+1
-0
No files found.
setup.py
View file @
03dccc88
...
@@ -314,7 +314,7 @@ def find_version(filepath: str) -> str:
...
@@ -314,7 +314,7 @@ def find_version(filepath: str) -> str:
def
get_vllm_version
()
->
str
:
def
get_vllm_version
()
->
str
:
version
=
find_version
(
get_path
(
"vllm"
,
"
__init__
.py"
))
version
=
find_version
(
get_path
(
"vllm"
,
"
version
.py"
))
if
_is_cuda
():
if
_is_cuda
():
cuda_version
=
str
(
get_nvcc_cuda_version
())
cuda_version
=
str
(
get_nvcc_cuda_version
())
...
...
vllm/__init__.py
View file @
03dccc88
...
@@ -12,9 +12,10 @@ from vllm.outputs import (CompletionOutput, EmbeddingOutput,
...
@@ -12,9 +12,10 @@ from vllm.outputs import (CompletionOutput, EmbeddingOutput,
from
vllm.pooling_params
import
PoolingParams
from
vllm.pooling_params
import
PoolingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
__version__
=
"0.5.0"
from
.version
import
__version__
__all__
=
[
__all__
=
[
"__version__"
,
"LLM"
,
"LLM"
,
"ModelRegistry"
,
"ModelRegistry"
,
"PromptStrictInputs"
,
"PromptStrictInputs"
,
...
...
vllm/engine/llm_engine.py
View file @
03dccc88
...
@@ -6,7 +6,6 @@ from typing import Type, TypeVar, Union
...
@@ -6,7 +6,6 @@ from typing import Type, TypeVar, Union
from
transformers
import
GenerationConfig
,
PreTrainedTokenizer
from
transformers
import
GenerationConfig
,
PreTrainedTokenizer
import
vllm
from
vllm.config
import
(
CacheConfig
,
DecodingConfig
,
DeviceConfig
,
LoadConfig
,
from
vllm.config
import
(
CacheConfig
,
DecodingConfig
,
DeviceConfig
,
LoadConfig
,
LoRAConfig
,
ModelConfig
,
ParallelConfig
,
LoRAConfig
,
ModelConfig
,
ParallelConfig
,
SchedulerConfig
,
SpeculativeConfig
,
SchedulerConfig
,
SpeculativeConfig
,
...
@@ -38,6 +37,7 @@ from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup,
...
@@ -38,6 +37,7 @@ from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup,
from
vllm.usage.usage_lib
import
(
UsageContext
,
is_usage_stats_enabled
,
from
vllm.usage.usage_lib
import
(
UsageContext
,
is_usage_stats_enabled
,
usage_message
)
usage_message
)
from
vllm.utils
import
Counter
from
vllm.utils
import
Counter
from
vllm.version
import
__version__
as
VLLM_VERSION
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
_LOCAL_LOGGING_INTERVAL_SEC
=
5
_LOCAL_LOGGING_INTERVAL_SEC
=
5
...
@@ -169,7 +169,7 @@ class LLMEngine:
...
@@ -169,7 +169,7 @@ class LLMEngine:
"enforce_eager=%s, kv_cache_dtype=%s, "
"enforce_eager=%s, kv_cache_dtype=%s, "
"quantization_param_path=%s, device_config=%s, "
"quantization_param_path=%s, device_config=%s, "
"decoding_config=%r, seed=%d, served_model_name=%s)"
,
"decoding_config=%r, seed=%d, served_model_name=%s)"
,
vllm
.
__version__
,
VLLM_VERSION
,
model_config
.
model
,
model_config
.
model
,
speculative_config
,
speculative_config
,
model_config
.
tokenizer
,
model_config
.
tokenizer
,
...
...
vllm/entrypoints/openai/api_server.py
View file @
03dccc88
...
@@ -15,7 +15,6 @@ from fastapi.responses import JSONResponse, Response, StreamingResponse
...
@@ -15,7 +15,6 @@ from fastapi.responses import JSONResponse, Response, StreamingResponse
from
prometheus_client
import
make_asgi_app
from
prometheus_client
import
make_asgi_app
from
starlette.routing
import
Mount
from
starlette.routing
import
Mount
import
vllm
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
...
@@ -29,6 +28,7 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
...
@@ -29,6 +28,7 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from
vllm.entrypoints.openai.serving_embedding
import
OpenAIServingEmbedding
from
vllm.entrypoints.openai.serving_embedding
import
OpenAIServingEmbedding
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.version
import
__version__
as
VLLM_VERSION
TIMEOUT_KEEP_ALIVE
=
5
# seconds
TIMEOUT_KEEP_ALIVE
=
5
# seconds
...
@@ -93,7 +93,7 @@ async def show_available_models():
...
@@ -93,7 +93,7 @@ async def show_available_models():
@
app
.
get
(
"/version"
)
@
app
.
get
(
"/version"
)
async
def
show_version
():
async
def
show_version
():
ver
=
{
"version"
:
vllm
.
__version__
}
ver
=
{
"version"
:
VLLM_VERSION
}
return
JSONResponse
(
content
=
ver
)
return
JSONResponse
(
content
=
ver
)
...
@@ -174,7 +174,7 @@ if __name__ == "__main__":
...
@@ -174,7 +174,7 @@ if __name__ == "__main__":
raise
ValueError
(
f
"Invalid middleware
{
middleware
}
. "
raise
ValueError
(
f
"Invalid middleware
{
middleware
}
. "
f
"Must be a function or a class."
)
f
"Must be a function or a class."
)
logger
.
info
(
"vLLM API server version %s"
,
vllm
.
__version__
)
logger
.
info
(
"vLLM API server version %s"
,
VLLM_VERSION
)
logger
.
info
(
"args: %s"
,
args
)
logger
.
info
(
"args: %s"
,
args
)
if
args
.
served_model_name
is
not
None
:
if
args
.
served_model_name
is
not
None
:
...
...
vllm/entrypoints/openai/run_batch.py
View file @
03dccc88
...
@@ -5,7 +5,6 @@ from io import StringIO
...
@@ -5,7 +5,6 @@ from io import StringIO
import
aiohttp
import
aiohttp
import
vllm
from
vllm.engine.arg_utils
import
AsyncEngineArgs
,
nullable_str
from
vllm.engine.arg_utils
import
AsyncEngineArgs
,
nullable_str
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.entrypoints.openai.protocol
import
(
BatchRequestInput
,
from
vllm.entrypoints.openai.protocol
import
(
BatchRequestInput
,
...
@@ -15,6 +14,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
...
@@ -15,6 +14,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils
import
random_uuid
from
vllm.utils
import
random_uuid
from
vllm.version
import
__version__
as
VLLM_VERSION
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
@@ -135,7 +135,7 @@ async def main(args):
...
@@ -135,7 +135,7 @@ async def main(args):
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
args
=
parse_args
()
args
=
parse_args
()
logger
.
info
(
"vLLM API server version %s"
,
vllm
.
__version__
)
logger
.
info
(
"vLLM API server version %s"
,
VLLM_VERSION
)
logger
.
info
(
"args: %s"
,
args
)
logger
.
info
(
"args: %s"
,
args
)
asyncio
.
run
(
main
(
args
))
asyncio
.
run
(
main
(
args
))
vllm/usage/usage_lib.py
View file @
03dccc88
...
@@ -16,6 +16,7 @@ import requests
...
@@ -16,6 +16,7 @@ import requests
import
torch
import
torch
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.version
import
__version__
as
VLLM_VERSION
_config_home
=
envs
.
VLLM_CONFIG_ROOT
_config_home
=
envs
.
VLLM_CONFIG_ROOT
_USAGE_STATS_JSON_PATH
=
os
.
path
.
join
(
_config_home
,
"vllm/usage_stats.json"
)
_USAGE_STATS_JSON_PATH
=
os
.
path
.
join
(
_config_home
,
"vllm/usage_stats.json"
)
...
@@ -163,9 +164,8 @@ class UsageMessage:
...
@@ -163,9 +164,8 @@ class UsageMessage:
])
])
# vLLM information
# vLLM information
import
vllm
# delayed import to prevent circular import
self
.
context
=
usage_context
.
value
self
.
context
=
usage_context
.
value
self
.
vllm_version
=
vllm
.
__version__
self
.
vllm_version
=
VLLM_VERSION
self
.
model_architecture
=
model_architecture
self
.
model_architecture
=
model_architecture
# Metadata
# Metadata
...
...
vllm/version.py
0 → 100644
View file @
03dccc88
__version__
=
"0.5.0"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment