Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8da48e4d
Unverified
Commit
8da48e4d
authored
Aug 23, 2024
by
Pooya Davoodi
Committed by
GitHub
Aug 23, 2024
Browse files
[Frontend] Publish Prometheus metrics in run_batch API (#7641)
parent
6885fde3
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
76 additions
and
0 deletions
+76
-0
tests/entrypoints/openai/test_metrics.py
tests/entrypoints/openai/test_metrics.py
+49
-0
vllm/entrypoints/openai/run_batch.py
vllm/entrypoints/openai/run_batch.py
+27
-0
No files found.
tests/entrypoints/openai/test_metrics.py
View file @
8da48e4d
import
subprocess
import
sys
import
tempfile
import
time
from
http
import
HTTPStatus
from
http
import
HTTPStatus
import
openai
import
openai
...
@@ -177,3 +181,48 @@ async def test_metrics_exist(client: openai.AsyncOpenAI):
...
@@ -177,3 +181,48 @@ async def test_metrics_exist(client: openai.AsyncOpenAI):
for
metric
in
EXPECTED_METRICS
:
for
metric
in
EXPECTED_METRICS
:
assert
metric
in
response
.
text
assert
metric
in
response
.
text
def
test_metrics_exist_run_batch
():
input_batch
=
"""{"custom_id": "request-0", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}}"""
# noqa: E501
base_url
=
"0.0.0.0"
port
=
"8001"
server_url
=
f
"http://
{
base_url
}
:
{
port
}
"
with
tempfile
.
NamedTemporaryFile
(
"w"
)
as
input_file
,
tempfile
.
NamedTemporaryFile
(
"r"
)
as
output_file
:
input_file
.
write
(
input_batch
)
input_file
.
flush
()
proc
=
subprocess
.
Popen
([
sys
.
executable
,
"-m"
,
"vllm.entrypoints.openai.run_batch"
,
"-i"
,
input_file
.
name
,
"-o"
,
output_file
.
name
,
"--model"
,
"intfloat/e5-mistral-7b-instruct"
,
"--enable-metrics"
,
"--url"
,
base_url
,
"--port"
,
port
,
],
)
def
is_server_up
(
url
):
try
:
response
=
requests
.
get
(
url
)
return
response
.
status_code
==
200
except
requests
.
ConnectionError
:
return
False
while
not
is_server_up
(
server_url
):
time
.
sleep
(
1
)
response
=
requests
.
get
(
server_url
+
"/metrics"
)
assert
response
.
status_code
==
HTTPStatus
.
OK
proc
.
wait
()
vllm/entrypoints/openai/run_batch.py
View file @
8da48e4d
...
@@ -3,6 +3,7 @@ from io import StringIO
...
@@ -3,6 +3,7 @@ from io import StringIO
from
typing
import
Awaitable
,
Callable
,
List
from
typing
import
Awaitable
,
Callable
,
List
import
aiohttp
import
aiohttp
from
prometheus_client
import
start_http_server
from
vllm.engine.arg_utils
import
AsyncEngineArgs
,
nullable_str
from
vllm.engine.arg_utils
import
AsyncEngineArgs
,
nullable_str
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
...
@@ -56,6 +57,24 @@ def parse_args():
...
@@ -56,6 +57,24 @@ def parse_args():
'ID numbers being printed in log.'
'ID numbers being printed in log.'
'
\n\n
Default: Unlimited'
)
'
\n\n
Default: Unlimited'
)
parser
.
add_argument
(
"--enable-metrics"
,
action
=
"store_true"
,
help
=
"Enable Prometheus metrics"
)
parser
.
add_argument
(
"--url"
,
type
=
str
,
default
=
"0.0.0.0"
,
help
=
"URL to the Prometheus metrics server "
"(only needed if enable-metrics is set)."
,
)
parser
.
add_argument
(
"--port"
,
type
=
int
,
default
=
8000
,
help
=
"Port number for the Prometheus metrics server "
"(only needed if enable-metrics is set)."
,
)
return
parser
.
parse_args
()
return
parser
.
parse_args
()
...
@@ -184,4 +203,12 @@ if __name__ == "__main__":
...
@@ -184,4 +203,12 @@ if __name__ == "__main__":
logger
.
info
(
"vLLM batch processing API version %s"
,
VLLM_VERSION
)
logger
.
info
(
"vLLM batch processing API version %s"
,
VLLM_VERSION
)
logger
.
info
(
"args: %s"
,
args
)
logger
.
info
(
"args: %s"
,
args
)
# Start the Prometheus metrics server. LLMEngine uses the Prometheus client
# to publish metrics at the /metrics endpoint.
if
args
.
enable_metrics
:
logger
.
info
(
"Prometheus metrics enabled"
)
start_http_server
(
port
=
args
.
port
,
addr
=
args
.
url
)
else
:
logger
.
info
(
"Prometheus metrics disabled"
)
asyncio
.
run
(
main
(
args
))
asyncio
.
run
(
main
(
args
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment