Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ad39bd64
Unverified
Commit
ad39bd64
authored
Nov 08, 2024
by
DearPlanet
Committed by
GitHub
Nov 08, 2024
Browse files
[Bugfix] Add error handling when server cannot respond any valid tokens (#5895)
parent
40d0e741
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
3 deletions
+10
-3
benchmarks/backend_request_func.py
benchmarks/backend_request_func.py
+10
-3
No files found.
benchmarks/backend_request_func.py
View file @
ad39bd64
...
@@ -256,6 +256,7 @@ async def async_request_openai_completions(
...
@@ -256,6 +256,7 @@ async def async_request_openai_completions(
async
with
session
.
post
(
url
=
api_url
,
json
=
payload
,
async
with
session
.
post
(
url
=
api_url
,
json
=
payload
,
headers
=
headers
)
as
response
:
headers
=
headers
)
as
response
:
if
response
.
status
==
200
:
if
response
.
status
==
200
:
first_valid_chunk_received
=
False
async
for
chunk_bytes
in
response
.
content
:
async
for
chunk_bytes
in
response
.
content
:
chunk_bytes
=
chunk_bytes
.
strip
()
chunk_bytes
=
chunk_bytes
.
strip
()
if
not
chunk_bytes
:
if
not
chunk_bytes
:
...
@@ -274,7 +275,8 @@ async def async_request_openai_completions(
...
@@ -274,7 +275,8 @@ async def async_request_openai_completions(
if
data
[
"choices"
][
0
][
"text"
]:
if
data
[
"choices"
][
0
][
"text"
]:
timestamp
=
time
.
perf_counter
()
timestamp
=
time
.
perf_counter
()
# First token
# First token
if
ttft
==
0.0
:
if
not
first_valid_chunk_received
:
first_chunk_received
=
True
ttft
=
time
.
perf_counter
()
-
st
ttft
=
time
.
perf_counter
()
-
st
output
.
ttft
=
ttft
output
.
ttft
=
ttft
...
@@ -285,9 +287,14 @@ async def async_request_openai_completions(
...
@@ -285,9 +287,14 @@ async def async_request_openai_completions(
most_recent_timestamp
=
timestamp
most_recent_timestamp
=
timestamp
generated_text
+=
data
[
"choices"
][
0
][
"text"
]
generated_text
+=
data
[
"choices"
][
0
][
"text"
]
if
first_chunk_received
:
output
.
success
=
True
else
:
output
.
success
=
False
output
.
error
=
(
"Never received a valid chunk to calculate TTFT."
"This response will be marked as failed!"
)
output
.
generated_text
=
generated_text
output
.
generated_text
=
generated_text
output
.
success
=
True
output
.
latency
=
latency
output
.
latency
=
latency
else
:
else
:
output
.
error
=
response
.
reason
or
""
output
.
error
=
response
.
reason
or
""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment