Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b903e1ba
Unverified
Commit
b903e1ba
authored
Aug 22, 2024
by
Joe Runde
Committed by
GitHub
Aug 22, 2024
Browse files
[Frontend] error suppression cleanup (#7786)
Signed-off-by:
Joe Runde
<
Joseph.Runde@ibm.com
>
parent
a1522464
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
18 additions
and
7 deletions
+18
-7
tests/entrypoints/openai/rpc/test_zmq_client.py
tests/entrypoints/openai/rpc/test_zmq_client.py
+4
-3
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+2
-3
vllm/entrypoints/openai/rpc/client.py
vllm/entrypoints/openai/rpc/client.py
+12
-1
No files found.
tests/entrypoints/openai/rpc/test_zmq_client.py
View file @
b903e1ba
...
...
@@ -75,10 +75,11 @@ async def test_client_aborts_use_timeouts(monkeypatch, dummy_server,
m
.
setattr
(
dummy_server
,
"abort"
,
lambda
x
:
None
)
m
.
setattr
(
client
,
"_data_timeout"
,
10
)
# Ensure the client doesn't hang
# The client should suppress timeouts on `abort`s
# and return normally, assuming the server will eventually
# abort the request.
client_task
=
asyncio
.
get_running_loop
().
create_task
(
client
.
abort
(
"test request id"
))
with
pytest
.
raises
(
TimeoutError
,
match
=
"Server didn't reply within"
):
await
asyncio
.
wait_for
(
client_task
,
timeout
=
0.05
)
...
...
vllm/entrypoints/openai/api_server.py
View file @
b903e1ba
...
...
@@ -6,7 +6,7 @@ import os
import
re
import
tempfile
from
argparse
import
Namespace
from
contextlib
import
asynccontextmanager
,
suppress
from
contextlib
import
asynccontextmanager
from
http
import
HTTPStatus
from
typing
import
AsyncIterator
,
Optional
,
Set
...
...
@@ -83,7 +83,6 @@ async def lifespan(app: FastAPI):
async
def
_force_log
():
while
True
:
await
asyncio
.
sleep
(
10
)
with
suppress
(
Exception
):
await
async_engine_client
.
do_log_stats
()
if
not
engine_args
.
disable_log_stats
:
...
...
vllm/entrypoints/openai/rpc/client.py
View file @
b903e1ba
...
...
@@ -335,7 +335,18 @@ class AsyncEngineRPCClient:
async
def
abort
(
self
,
request_id
:
str
):
"""Send an ABORT_REQUEST signal to the RPC Server"""
with
suppress
(
RPCClientClosedError
):
# Suppress timeouts as well.
# In cases where the server is busy processing requests and a very
# large volume of abort requests arrive, it is likely that the server
# will not be able to ack all of them in time. We have seen this when
# we abort 20k requests at once while another 2k are processing- many
# of them time out, but we see the server successfully abort all of the
# requests.
# In this case we assume that the server has received or will receive
# these abort requests, and ignore the timeout. This prevents a massive
# wall of `TimeoutError` stack traces.
with
suppress
(
RPCClientClosedError
,
TimeoutError
):
await
self
.
_send_one_way_rpc_request
(
request
=
RPCAbortRequest
(
request_id
),
error_message
=
f
"RPCAbortRequest
{
request_id
}
failed"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment