Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3bb8e2c9
Unverified
Commit
3bb8e2c9
authored
Jan 24, 2025
by
Junichi Sato
Committed by
GitHub
Jan 24, 2025
Browse files
[Misc] Enable proxy support in benchmark script (#12356)
Signed-off-by:
Junichi Sato
<
junichi.sato@sbintuitions.co.jp
>
parent
e784c6b9
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
5 deletions
+10
-5
benchmarks/backend_request_func.py
benchmarks/backend_request_func.py
+10
-5
No files found.
benchmarks/backend_request_func.py
View file @
3bb8e2c9
...
@@ -51,7 +51,8 @@ async def async_request_tgi(
...
@@ -51,7 +51,8 @@ async def async_request_tgi(
api_url
=
request_func_input
.
api_url
api_url
=
request_func_input
.
api_url
assert
api_url
.
endswith
(
"generate_stream"
)
assert
api_url
.
endswith
(
"generate_stream"
)
async
with
aiohttp
.
ClientSession
(
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
async
with
aiohttp
.
ClientSession
(
trust_env
=
True
,
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
params
=
{
params
=
{
"best_of"
:
request_func_input
.
best_of
,
"best_of"
:
request_func_input
.
best_of
,
"max_new_tokens"
:
request_func_input
.
output_len
,
"max_new_tokens"
:
request_func_input
.
output_len
,
...
@@ -123,7 +124,8 @@ async def async_request_trt_llm(
...
@@ -123,7 +124,8 @@ async def async_request_trt_llm(
api_url
=
request_func_input
.
api_url
api_url
=
request_func_input
.
api_url
assert
api_url
.
endswith
(
"generate_stream"
)
assert
api_url
.
endswith
(
"generate_stream"
)
async
with
aiohttp
.
ClientSession
(
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
async
with
aiohttp
.
ClientSession
(
trust_env
=
True
,
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
assert
request_func_input
.
best_of
==
1
assert
request_func_input
.
best_of
==
1
payload
=
{
payload
=
{
"accumulate_tokens"
:
True
,
"accumulate_tokens"
:
True
,
...
@@ -187,7 +189,8 @@ async def async_request_deepspeed_mii(
...
@@ -187,7 +189,8 @@ async def async_request_deepspeed_mii(
request_func_input
:
RequestFuncInput
,
request_func_input
:
RequestFuncInput
,
pbar
:
Optional
[
tqdm
]
=
None
,
pbar
:
Optional
[
tqdm
]
=
None
,
)
->
RequestFuncOutput
:
)
->
RequestFuncOutput
:
async
with
aiohttp
.
ClientSession
(
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
async
with
aiohttp
.
ClientSession
(
trust_env
=
True
,
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
assert
request_func_input
.
best_of
==
1
assert
request_func_input
.
best_of
==
1
payload
=
{
payload
=
{
...
@@ -235,7 +238,8 @@ async def async_request_openai_completions(
...
@@ -235,7 +238,8 @@ async def async_request_openai_completions(
(
"completions"
,
"profile"
)
(
"completions"
,
"profile"
)
),
"OpenAI Completions API URL must end with 'completions' or 'profile'."
),
"OpenAI Completions API URL must end with 'completions' or 'profile'."
async
with
aiohttp
.
ClientSession
(
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
async
with
aiohttp
.
ClientSession
(
trust_env
=
True
,
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
payload
=
{
payload
=
{
"model"
:
request_func_input
.
model_name
\
"model"
:
request_func_input
.
model_name
\
if
request_func_input
.
model_name
else
request_func_input
.
model
,
if
request_func_input
.
model_name
else
request_func_input
.
model
,
...
@@ -333,7 +337,8 @@ async def async_request_openai_chat_completions(
...
@@ -333,7 +337,8 @@ async def async_request_openai_chat_completions(
"chat/completions"
"chat/completions"
),
"OpenAI Chat Completions API URL must end with 'chat/completions'."
),
"OpenAI Chat Completions API URL must end with 'chat/completions'."
async
with
aiohttp
.
ClientSession
(
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
async
with
aiohttp
.
ClientSession
(
trust_env
=
True
,
timeout
=
AIOHTTP_TIMEOUT
)
as
session
:
content
=
[{
"type"
:
"text"
,
"text"
:
request_func_input
.
prompt
}]
content
=
[{
"type"
:
"text"
,
"text"
:
request_func_input
.
prompt
}]
if
request_func_input
.
multi_modal_content
:
if
request_func_input
.
multi_modal_content
:
content
.
append
(
request_func_input
.
multi_modal_content
)
content
.
append
(
request_func_input
.
multi_modal_content
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment