Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6366efc6
Unverified
Commit
6366efc6
authored
Jul 19, 2024
by
Cyrus Leung
Committed by
GitHub
Jul 19, 2024
Browse files
[Bugfix][Frontend] Fix missing `/metrics` endpoint (#6463)
parent
dbe55885
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
70 additions
and
5 deletions
+70
-5
tests/entrypoints/openai/test_basic.py
tests/entrypoints/openai/test_basic.py
+61
-0
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+9
-5
No files found.
tests/entrypoints/openai/test_basic.py
0 → 100644
View file @
6366efc6
from
http
import
HTTPStatus
import
openai
import
pytest
import
requests
from
vllm.version
import
__version__
as
VLLM_VERSION
from
...utils
import
RemoteOpenAIServer
MODEL_NAME
=
"HuggingFaceH4/zephyr-7b-beta"
@
pytest
.
fixture
(
scope
=
"module"
)
def
server
():
args
=
[
# use half precision for speed and memory savings in CI environment
"--dtype"
,
"bfloat16"
,
"--max-model-len"
,
"8192"
,
"--enforce-eager"
,
"--max-num-seqs"
,
"128"
,
]
with
RemoteOpenAIServer
(
MODEL_NAME
,
args
)
as
remote_server
:
yield
remote_server
@
pytest
.
fixture
(
scope
=
"module"
)
def
client
(
server
):
return
server
.
get_async_client
()
@
pytest
.
mark
.
asyncio
async
def
test_show_version
(
client
:
openai
.
AsyncOpenAI
):
base_url
=
str
(
client
.
base_url
)[:
-
3
].
strip
(
"/"
)
response
=
requests
.
get
(
base_url
+
"/version"
)
response
.
raise_for_status
()
assert
response
.
json
()
==
{
"version"
:
VLLM_VERSION
}
@
pytest
.
mark
.
asyncio
async
def
test_check_health
(
client
:
openai
.
AsyncOpenAI
):
base_url
=
str
(
client
.
base_url
)[:
-
3
].
strip
(
"/"
)
response
=
requests
.
get
(
base_url
+
"/health"
)
assert
response
.
status_code
==
HTTPStatus
.
OK
@
pytest
.
mark
.
asyncio
async
def
test_log_metrics
(
client
:
openai
.
AsyncOpenAI
):
base_url
=
str
(
client
.
base_url
)[:
-
3
].
strip
(
"/"
)
response
=
requests
.
get
(
base_url
+
"/metrics"
)
assert
response
.
status_code
==
HTTPStatus
.
OK
vllm/entrypoints/openai/api_server.py
View file @
6366efc6
...
...
@@ -73,11 +73,13 @@ async def lifespan(app: fastapi.FastAPI):
router
=
APIRouter
()
# Add prometheus asgi middleware to route /metrics requests
route
=
Mount
(
"/metrics"
,
make_asgi_app
())
# Workaround for 307 Redirect for /metrics
route
.
path_regex
=
re
.
compile
(
'^/metrics(?P<path>.*)$'
)
router
.
routes
.
append
(
route
)
def
mount_metrics
(
app
:
fastapi
.
FastAPI
):
# Add prometheus asgi middleware to route /metrics requests
metrics_route
=
Mount
(
"/metrics"
,
make_asgi_app
())
# Workaround for 307 Redirect for /metrics
metrics_route
.
path_regex
=
re
.
compile
(
'^/metrics(?P<path>.*)$'
)
app
.
routes
.
append
(
metrics_route
)
@
router
.
get
(
"/health"
)
...
...
@@ -167,6 +169,8 @@ def build_app(args):
app
.
include_router
(
router
)
app
.
root_path
=
args
.
root_path
mount_metrics
(
app
)
app
.
add_middleware
(
CORSMiddleware
,
allow_origins
=
args
.
allowed_origins
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment