Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
d279d499
"src/vscode:/vscode.git/clone" did not exist on "2bc82d6381c6bc5ec9c73e43a30f38434db5a9e1"
Unverified
Commit
d279d499
authored
May 30, 2025
by
Yuhong Guo
Committed by
GitHub
May 30, 2025
Browse files
Fix aiohttp 'Chunk too big' in bench_serving (#6737)
parent
6cb00c63
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
6 deletions
+18
-6
python/sglang/bench_serving.py
python/sglang/bench_serving.py
+18
-6
No files found.
python/sglang/bench_serving.py
View file @
d279d499
...
@@ -39,7 +39,6 @@ from transformers import (
...
@@ -39,7 +39,6 @@ from transformers import (
PreTrainedTokenizerFast
,
PreTrainedTokenizerFast
,
)
)
AIOHTTP_TIMEOUT
=
aiohttp
.
ClientTimeout
(
total
=
6
*
60
*
60
)
ASSISTANT_SUFFIX
=
"Assistant:"
ASSISTANT_SUFFIX
=
"Assistant:"
global
args
global
args
...
@@ -51,6 +50,19 @@ def _get_bool_env_var(name: str, default: str = "false") -> bool:
...
@@ -51,6 +50,19 @@ def _get_bool_env_var(name: str, default: str = "false") -> bool:
return
value
.
lower
()
in
(
"true"
,
"1"
)
return
value
.
lower
()
in
(
"true"
,
"1"
)
def
_create_bench_client_session
():
# When the pressure is big, the read buffer could be full before aio thread read
# the content. We increase the read_bufsize from 64K to 10M.
# Define constants for timeout and buffer size for clarity and maintainability
BENCH_AIOHTTP_TIMEOUT_SECONDS
=
6
*
60
*
60
# 6 hours
BENCH_AIOHTTP_READ_BUFSIZE_BYTES
=
10
*
1024
**
2
# 10 MB
aiohttp_timeout
=
aiohttp
.
ClientTimeout
(
total
=
BENCH_AIOHTTP_TIMEOUT_SECONDS
)
return
aiohttp
.
ClientSession
(
timeout
=
aiohttp_timeout
,
read_bufsize
=
BENCH_AIOHTTP_READ_BUFSIZE_BYTES
)
@
dataclass
@
dataclass
class
RequestFuncInput
:
class
RequestFuncInput
:
prompt
:
str
prompt
:
str
...
@@ -106,7 +118,7 @@ async def async_request_trt_llm(
...
@@ -106,7 +118,7 @@ async def async_request_trt_llm(
api_url
=
request_func_input
.
api_url
api_url
=
request_func_input
.
api_url
assert
api_url
.
endswith
(
"generate_stream"
)
assert
api_url
.
endswith
(
"generate_stream"
)
async
with
aiohttp
.
C
lient
S
ession
(
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
async
with
_create_bench_c
lient
_s
ession
()
as
session
:
payload
=
{
payload
=
{
"accumulate_tokens"
:
True
,
"accumulate_tokens"
:
True
,
"text_input"
:
request_func_input
.
prompt
,
"text_input"
:
request_func_input
.
prompt
,
...
@@ -179,7 +191,7 @@ async def async_request_openai_completions(
...
@@ -179,7 +191,7 @@ async def async_request_openai_completions(
prompt
=
request_func_input
.
prompt
prompt
=
request_func_input
.
prompt
async
with
aiohttp
.
C
lient
S
ession
(
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
async
with
_create_bench_c
lient
_s
ession
()
as
session
:
payload
=
{
payload
=
{
"model"
:
request_func_input
.
model
,
"model"
:
request_func_input
.
model
,
"prompt"
:
prompt
,
"prompt"
:
prompt
,
...
@@ -261,7 +273,7 @@ async def async_request_truss(
...
@@ -261,7 +273,7 @@ async def async_request_truss(
prompt
=
request_func_input
.
prompt
prompt
=
request_func_input
.
prompt
async
with
aiohttp
.
C
lient
S
ession
(
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
async
with
_create_bench_c
lient
_s
ession
()
as
session
:
payload
=
{
payload
=
{
"model"
:
request_func_input
.
model
,
"model"
:
request_func_input
.
model
,
"prompt"
:
prompt
,
"prompt"
:
prompt
,
...
@@ -338,7 +350,7 @@ async def async_request_sglang_generate(
...
@@ -338,7 +350,7 @@ async def async_request_sglang_generate(
api_url
=
request_func_input
.
api_url
api_url
=
request_func_input
.
api_url
prompt
=
request_func_input
.
prompt
prompt
=
request_func_input
.
prompt
async
with
aiohttp
.
C
lient
S
ession
(
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
async
with
_create_bench_c
lient
_s
ession
()
as
session
:
payload
=
{
payload
=
{
(
"text"
if
isinstance
(
prompt
,
str
)
else
"input_ids"
):
prompt
,
(
"text"
if
isinstance
(
prompt
,
str
)
else
"input_ids"
):
prompt
,
"sampling_params"
:
{
"sampling_params"
:
{
...
@@ -437,7 +449,7 @@ async def async_request_gserver(
...
@@ -437,7 +449,7 @@ async def async_request_gserver(
async
def
async_request_profile
(
api_url
:
str
)
->
RequestFuncOutput
:
async
def
async_request_profile
(
api_url
:
str
)
->
RequestFuncOutput
:
async
with
aiohttp
.
C
lient
S
ession
(
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
async
with
_create_bench_c
lient
_s
ession
()
as
session
:
output
=
RequestFuncOutput
()
output
=
RequestFuncOutput
()
try
:
try
:
async
with
session
.
post
(
url
=
api_url
)
as
response
:
async
with
session
.
post
(
url
=
api_url
)
as
response
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment