Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5be7ca1b
Unverified
Commit
5be7ca1b
authored
Oct 12, 2025
by
Cyrus Leung
Committed by
GitHub
Oct 12, 2025
Browse files
[Benchmark] Support Infinity API (#26641)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
f0a30a06
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
96 additions
and
29 deletions
+96
-29
vllm/benchmarks/datasets.py
vllm/benchmarks/datasets.py
+1
-1
vllm/benchmarks/lib/endpoint_request_func.py
vllm/benchmarks/lib/endpoint_request_func.py
+95
-28
No files found.
vllm/benchmarks/datasets.py
View file @
5be7ca1b
...
...
@@ -1584,7 +1584,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]:
if
dataset_class
.
IS_MULTIMODAL
and
not
(
args
.
backend
in
(
"openai-chat"
,
"openai-audio"
)
or
"
openai-
embeddings-"
in
args
.
backend
or
"embeddings-"
in
args
.
backend
):
# multi-modal benchmark is only available on OpenAI Chat
# endpoint-type.
...
...
vllm/benchmarks/lib/endpoint_request_func.py
View file @
5be7ca1b
...
...
@@ -581,29 +581,6 @@ async def async_request_openai_embeddings_chat(
)
async
def
async_request_openai_embeddings_clip
(
request_func_input
:
RequestFuncInput
,
session
:
aiohttp
.
ClientSession
,
pbar
:
Optional
[
tqdm
]
=
None
,
)
->
RequestFuncOutput
:
if
request_func_input
.
multi_modal_content
:
# Image input
request_func_input
.
prompt
=
""
# max_model_len=77 is too short for most datasets,
# so by default we truncate the prompt to max_model_len
if
request_func_input
.
extra_body
is
None
:
request_func_input
.
extra_body
=
{}
if
"truncate_prompt_tokens"
not
in
request_func_input
.
extra_body
:
request_func_input
.
extra_body
[
"truncate_prompt_tokens"
]
=
-
1
return
await
async_request_openai_embeddings_chat
(
request_func_input
,
session
,
pbar
=
pbar
,
)
def
_try_extract_request_idx
(
request_func_input
:
RequestFuncInput
):
if
request_func_input
.
request_id
:
match
=
re
.
search
(
r
"(\d+)$"
,
request_func_input
.
request_id
)
...
...
@@ -616,11 +593,20 @@ def _try_extract_request_idx(request_func_input: RequestFuncInput):
return
None
async
def
async_request_openai_embeddings_vlm2vec
(
request_func_input
:
RequestFuncInput
,
session
:
aiohttp
.
ClientSession
,
pbar
:
Optional
[
tqdm
]
=
None
,
)
->
RequestFuncOutput
:
def
_preprocess_clip
(
request_func_input
:
RequestFuncInput
):
if
request_func_input
.
multi_modal_content
:
# Image input
request_func_input
.
prompt
=
""
# max_model_len=77 is too short for most datasets,
# so by default we truncate the prompt to max_model_len
if
request_func_input
.
extra_body
is
None
:
request_func_input
.
extra_body
=
{}
if
"truncate_prompt_tokens"
not
in
request_func_input
.
extra_body
:
request_func_input
.
extra_body
[
"truncate_prompt_tokens"
]
=
-
1
def
_preprocess_vlm2vec
(
request_func_input
:
RequestFuncInput
):
if
request_func_input
.
multi_modal_content
:
request_idx
=
_try_extract_request_idx
(
request_func_input
)
...
...
@@ -637,6 +623,28 @@ async def async_request_openai_embeddings_vlm2vec(
f
"
{
request_func_input
.
prompt
}
"
)
async
def
async_request_openai_embeddings_clip
(
request_func_input
:
RequestFuncInput
,
session
:
aiohttp
.
ClientSession
,
pbar
:
Optional
[
tqdm
]
=
None
,
)
->
RequestFuncOutput
:
_preprocess_clip
(
request_func_input
)
return
await
async_request_openai_embeddings_chat
(
request_func_input
,
session
,
pbar
=
pbar
,
)
async
def
async_request_openai_embeddings_vlm2vec
(
request_func_input
:
RequestFuncInput
,
session
:
aiohttp
.
ClientSession
,
pbar
:
Optional
[
tqdm
]
=
None
,
)
->
RequestFuncOutput
:
_preprocess_vlm2vec
(
request_func_input
)
return
await
async_request_openai_embeddings_chat
(
request_func_input
,
session
,
...
...
@@ -645,6 +653,61 @@ async def async_request_openai_embeddings_vlm2vec(
)
async
def
async_request_infinity_embeddings
(
request_func_input
:
RequestFuncInput
,
session
:
aiohttp
.
ClientSession
,
pbar
:
Optional
[
tqdm
]
=
None
,
)
->
RequestFuncOutput
:
api_url
=
request_func_input
.
api_url
_validate_api_url
(
api_url
,
"Infinity Embeddings API"
,
"embeddings"
)
payload
=
{
"model"
:
request_func_input
.
model_name
if
request_func_input
.
model_name
else
request_func_input
.
model
,
}
if
request_func_input
.
prompt
:
payload
[
"input"
]
=
request_func_input
.
prompt
else
:
mm_content
=
request_func_input
.
multi_modal_content
assert
isinstance
(
mm_content
,
dict
)
mm_type
=
mm_content
[
"type"
]
payload
[
"input"
]
=
mm_content
[
mm_type
][
"url"
]
payload
[
"modality"
]
=
mm_type
.
split
(
"_"
,
1
)[
0
]
_update_payload_common
(
payload
,
request_func_input
)
headers
=
{
"Content-Type"
:
"application/json"
,
"Authorization"
:
f
"Bearer
{
os
.
environ
.
get
(
'OPENAI_API_KEY'
)
}
"
,
}
_update_headers_common
(
headers
,
request_func_input
)
return
await
_run_openai_embeddings
(
session
,
api_url
,
payload
=
payload
,
headers
=
headers
,
pbar
=
pbar
,
)
async
def
async_request_infinity_embeddings_clip
(
request_func_input
:
RequestFuncInput
,
session
:
aiohttp
.
ClientSession
,
pbar
:
Optional
[
tqdm
]
=
None
,
)
->
RequestFuncOutput
:
_preprocess_clip
(
request_func_input
)
return
await
async_request_infinity_embeddings
(
request_func_input
,
session
,
pbar
=
pbar
,
)
# TODO: Add more request functions for different API protocols.
ASYNC_REQUEST_FUNCS
:
dict
[
str
,
RequestFunc
]
=
{
"vllm"
:
async_request_openai_completions
,
...
...
@@ -655,6 +718,10 @@ ASYNC_REQUEST_FUNCS: dict[str, RequestFunc] = {
"openai-embeddings-chat"
:
async_request_openai_embeddings_chat
,
"openai-embeddings-clip"
:
async_request_openai_embeddings_clip
,
"openai-embeddings-vlm2vec"
:
async_request_openai_embeddings_vlm2vec
,
# Infinity embedding server: https://github.com/michaelfeil/infinity
"infinity-embeddings"
:
async_request_infinity_embeddings
,
"infinity-embeddings-clip"
:
async_request_infinity_embeddings_clip
,
# (Infinity embedding server does not support vlm2vec)
}
OPENAI_COMPATIBLE_BACKENDS
=
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment