Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
xdb4_94051
vllm
Commits
d0215a58
Unverified
Commit
d0215a58
authored
Jan 05, 2024
by
Iskren Ivov Chernev
Committed by
GitHub
Jan 05, 2024
Browse files
Ensure metrics are logged regardless of requests (#2347)
parent
937e7b7d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
1 deletion
+28
-1
vllm/engine/async_llm_engine.py
vllm/engine/async_llm_engine.py
+6
-0
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+3
-0
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+19
-1
No files found.
vllm/engine/async_llm_engine.py
View file @
d0215a58
...
...
@@ -506,3 +506,9 @@ class AsyncLLMEngine:
max_log_len
=
engine_args
.
max_log_len
,
start_engine_loop
=
start_engine_loop
)
return
engine
async
def
do_log_stats
(
self
)
->
None
:
if
self
.
engine_use_ray
:
await
self
.
engine
.
do_log_stats
.
remote
()
else
:
self
.
engine
.
do_log_stats
()
vllm/engine/llm_engine.py
View file @
d0215a58
...
...
@@ -641,6 +641,9 @@ class LLMEngine:
return
self
.
_process_model_outputs
(
output
,
scheduler_outputs
)
def
do_log_stats
(
self
)
->
None
:
self
.
_log_system_stats
(
False
,
0
)
def
_log_system_stats
(
self
,
prompt_run
:
bool
,
...
...
vllm/entrypoints/openai/api_server.py
View file @
d0215a58
...
...
@@ -6,6 +6,7 @@ import asyncio
import
codecs
import
json
import
time
from
contextlib
import
asynccontextmanager
from
http
import
HTTPStatus
from
typing
import
AsyncGenerator
,
Dict
,
List
,
Optional
,
Tuple
,
Union
...
...
@@ -38,11 +39,28 @@ TIMEOUT_KEEP_ALIVE = 5 # seconds
logger
=
init_logger
(
__name__
)
served_model
=
None
app
=
fastapi
.
FastAPI
()
engine_args
=
None
engine
=
None
response_role
=
None
@
asynccontextmanager
async
def
lifespan
(
app
:
fastapi
.
FastAPI
):
async
def
_force_log
():
while
True
:
await
asyncio
.
sleep
(
10
)
await
engine
.
do_log_stats
()
if
not
engine_args
.
disable_log_stats
:
asyncio
.
create_task
(
_force_log
())
yield
app
=
fastapi
.
FastAPI
(
lifespan
=
lifespan
)
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
"vLLM OpenAI-Compatible RESTful API server."
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment