Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
38d80967
Commit
38d80967
authored
Sep 12, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.10.2rc2' into v0.10.2rc2-ori
parents
33650733
880c741b
Changes
544
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
248 additions
and
118 deletions
+248
-118
tests/entrypoints/openai/test_transcription_validation.py
tests/entrypoints/openai/test_transcription_validation.py
+19
-16
tests/entrypoints/openai/test_translation_validation.py
tests/entrypoints/openai/test_translation_validation.py
+44
-34
tests/entrypoints/openai/test_vision.py
tests/entrypoints/openai/test_vision.py
+152
-20
tests/entrypoints/pooling/__init__.py
tests/entrypoints/pooling/__init__.py
+0
-0
tests/entrypoints/pooling/correctness/__init__.py
tests/entrypoints/pooling/correctness/__init__.py
+0
-0
tests/entrypoints/pooling/correctness/test_mteb_embed.py
tests/entrypoints/pooling/correctness/test_mteb_embed.py
+6
-5
tests/entrypoints/pooling/correctness/test_mteb_score.py
tests/entrypoints/pooling/correctness/test_mteb_score.py
+11
-18
tests/entrypoints/pooling/llm/__init__.py
tests/entrypoints/pooling/llm/__init__.py
+0
-0
tests/entrypoints/pooling/llm/test_classify.py
tests/entrypoints/pooling/llm/test_classify.py
+1
-2
tests/entrypoints/pooling/llm/test_embedding.py
tests/entrypoints/pooling/llm/test_embedding.py
+0
-0
tests/entrypoints/pooling/llm/test_encode.py
tests/entrypoints/pooling/llm/test_encode.py
+0
-0
tests/entrypoints/pooling/llm/test_reward.py
tests/entrypoints/pooling/llm/test_reward.py
+1
-2
tests/entrypoints/pooling/llm/test_score.py
tests/entrypoints/pooling/llm/test_score.py
+1
-2
tests/entrypoints/pooling/openai/__init__.py
tests/entrypoints/pooling/openai/__init__.py
+0
-0
tests/entrypoints/pooling/openai/test_classification.py
tests/entrypoints/pooling/openai/test_classification.py
+1
-2
tests/entrypoints/pooling/openai/test_embedding.py
tests/entrypoints/pooling/openai/test_embedding.py
+4
-5
tests/entrypoints/pooling/openai/test_embedding_dimensions.py
...s/entrypoints/pooling/openai/test_embedding_dimensions.py
+5
-6
tests/entrypoints/pooling/openai/test_embedding_long_text.py
tests/entrypoints/pooling/openai/test_embedding_long_text.py
+1
-2
tests/entrypoints/pooling/openai/test_pooling.py
tests/entrypoints/pooling/openai/test_pooling.py
+1
-2
tests/entrypoints/pooling/openai/test_rerank.py
tests/entrypoints/pooling/openai/test_rerank.py
+1
-2
No files found.
Too many changes to show.
To preserve performance only
544 of 544+
files are displayed.
Plain diff
Email patch
tests/entrypoints/openai/test_transcription_validation.py
View file @
38d80967
...
...
@@ -12,8 +12,6 @@ import pytest
import
pytest_asyncio
import
soundfile
as
sf
from
vllm.assets.audio
import
AudioAsset
from
...utils
import
RemoteOpenAIServer
MODEL_NAME
=
"openai/whisper-large-v3-turbo"
...
...
@@ -24,20 +22,6 @@ MISTRAL_FORMAT_ARGS = [
]
@
pytest
.
fixture
def
mary_had_lamb
():
path
=
AudioAsset
(
'mary_had_lamb'
).
get_local_path
()
with
open
(
str
(
path
),
"rb"
)
as
f
:
yield
f
@
pytest
.
fixture
def
winning_call
():
path
=
AudioAsset
(
'winning_call'
).
get_local_path
()
with
open
(
str
(
path
),
"rb"
)
as
f
:
yield
f
@
pytest
.
fixture
(
scope
=
"module"
)
def
server
():
with
RemoteOpenAIServer
(
MODEL_NAME
,
SERVER_ARGS
)
as
remote_server
:
...
...
@@ -76,6 +60,25 @@ async def test_basic_audio(mary_had_lamb, model_name):
assert
out_usage
[
"seconds"
]
==
16
,
out_usage
[
"seconds"
]
@
pytest
.
mark
.
asyncio
async
def
test_basic_audio_gemma
(
foscolo
):
# Gemma accuracy on some of the audio samples we use is particularly bad,
# hence we use a different one here. WER is evaluated separately.
model_name
=
"google/gemma-3n-E2B-it"
server_args
=
[
"--enforce-eager"
]
with
RemoteOpenAIServer
(
model_name
,
server_args
)
as
remote_server
:
client
=
remote_server
.
get_async_client
()
transcription
=
await
client
.
audio
.
transcriptions
.
create
(
model
=
model_name
,
file
=
foscolo
,
language
=
"it"
,
response_format
=
"text"
,
temperature
=
0.0
)
out
=
json
.
loads
(
transcription
)[
'text'
]
assert
"da cui vergine nacque Venere"
in
out
@
pytest
.
mark
.
asyncio
async
def
test_non_asr_model
(
winning_call
):
# text to text model
...
...
tests/entrypoints/openai/test_translation_validation.py
View file @
38d80967
...
...
@@ -12,32 +12,24 @@ import pytest
import
pytest_asyncio
import
soundfile
as
sf
from
vllm.assets.audio
import
AudioAsset
from
...utils
import
RemoteOpenAIServer
MODEL_NAME
=
"openai/whisper-small"
SERVER_ARGS
=
[
"--enforce-eager"
]
@
pytest
.
fixture
def
foscolo
():
# Test translation it->en
path
=
AudioAsset
(
'azacinto_foscolo'
).
get_local_path
()
with
open
(
str
(
path
),
"rb"
)
as
f
:
yield
f
@
pytest
.
fixture
(
scope
=
"module"
)
def
server
():
with
RemoteOpenAIServer
(
MODEL_NAME
,
SERVER_ARGS
)
as
remote_server
:
yield
remote_server
@
pytest
.
fixture
(
scope
=
"module"
,
params
=
[
"openai/whisper-small"
,
"google/gemma-3n-E2B-it"
])
def
server
(
request
):
# Parametrize over model name
with
RemoteOpenAIServer
(
request
.
param
,
SERVER_ARGS
)
as
remote_server
:
yield
remote_server
,
request
.
param
@
pytest_asyncio
.
fixture
async
def
client
(
server
):
async
def
client_and_model
(
server
):
server
,
model_name
=
server
async
with
server
.
get_async_client
()
as
async_client
:
yield
async_client
yield
async_client
,
model_name
@
pytest
.
mark
.
asyncio
...
...
@@ -56,27 +48,29 @@ async def test_non_asr_model(foscolo):
# NOTE: (NickLucche) the large-v3-turbo model was not trained on translation!
@
pytest
.
mark
.
asyncio
async
def
test_basic_audio
(
foscolo
,
client
):
async
def
test_basic_audio
(
foscolo
,
client_and_model
):
client
,
model_name
=
client_and_model
translation
=
await
client
.
audio
.
translations
.
create
(
model
=
MODEL_NAME
,
model
=
model_name
,
file
=
foscolo
,
response_format
=
"text"
,
# TODO remove once language detection is implemented
extra_body
=
dict
(
language
=
"it"
),
# TODO remove
`language="it"`
once language detection is implemented
extra_body
=
dict
(
language
=
"it"
,
to_language
=
"en"
),
temperature
=
0.0
)
out
=
json
.
loads
(
translation
)[
'text'
].
strip
().
lower
()
assert
"greek sea"
in
out
@
pytest
.
mark
.
asyncio
async
def
test_audio_prompt
(
foscolo
,
client
):
async
def
test_audio_prompt
(
foscolo
,
client_and_model
):
client
,
model_name
=
client_and_model
# Condition whisper on starting text
prompt
=
"Nor have I ever"
transcription
=
await
client
.
audio
.
translations
.
create
(
model
=
MODEL_NAME
,
model
=
model_name
,
file
=
foscolo
,
prompt
=
prompt
,
extra_body
=
dict
(
language
=
"it"
),
extra_body
=
dict
(
language
=
"it"
,
to_language
=
"en"
),
response_format
=
"text"
,
temperature
=
0.0
)
out
=
json
.
loads
(
transcription
)[
'text'
]
...
...
@@ -85,22 +79,27 @@ async def test_audio_prompt(foscolo, client):
@
pytest
.
mark
.
asyncio
async
def
test_streaming_response
(
foscolo
,
client
,
server
):
async
def
test_streaming_response
(
foscolo
,
client_and_model
,
server
):
client
,
model_name
=
client_and_model
translation
=
""
res_no_stream
=
await
client
.
audio
.
translations
.
create
(
model
=
MODEL_NAME
,
model
=
model_name
,
file
=
foscolo
,
response_format
=
"json"
,
extra_body
=
dict
(
language
=
"it"
),
extra_body
=
dict
(
language
=
"it"
,
to_language
=
"en"
,
seed
=
42
),
temperature
=
0.0
)
# Stream via HTTPX since OpenAI translation client doesn't expose streaming
server
,
model_name
=
server
url
=
server
.
url_for
(
"v1/audio/translations"
)
headers
=
{
"Authorization"
:
f
"Bearer
{
server
.
DUMMY_API_KEY
}
"
}
data
=
{
"model"
:
MODEL_NAME
,
"model"
:
model_name
,
"language"
:
"it"
,
"to_language"
:
"en"
,
"stream"
:
True
,
"temperature"
:
0.0
,
"seed"
:
42
,
}
foscolo
.
seek
(
0
)
async
with
httpx
.
AsyncClient
()
as
http_client
:
...
...
@@ -121,16 +120,24 @@ async def test_streaming_response(foscolo, client, server):
text
=
chunk
[
"choices"
][
0
].
get
(
"delta"
,
{}).
get
(
"content"
)
translation
+=
text
or
""
assert
translation
==
res_no_stream
.
text
res_stream
=
translation
.
split
()
# NOTE There's a small non-deterministic issue here, likely in the attn
# computation, which will cause a few tokens to be different, while still
# being very close semantically.
assert
sum
([
x
==
y
for
x
,
y
in
zip
(
res_stream
,
res_no_stream
.
text
.
split
())
])
>=
len
(
res_stream
)
*
0.9
@
pytest
.
mark
.
asyncio
async
def
test_stream_options
(
foscolo
,
client
,
server
):
async
def
test_stream_options
(
foscolo
,
server
):
server
,
model_name
=
server
url
=
server
.
url_for
(
"v1/audio/translations"
)
headers
=
{
"Authorization"
:
f
"Bearer
{
server
.
DUMMY_API_KEY
}
"
}
data
=
{
"model"
:
MODEL_NAME
,
"model"
:
model_name
,
"language"
:
"it"
,
"to_language"
:
"en"
,
"stream"
:
True
,
"stream_include_usage"
:
True
,
"stream_continuous_usage_stats"
:
True
,
...
...
@@ -164,7 +171,10 @@ async def test_stream_options(foscolo, client, server):
@
pytest
.
mark
.
asyncio
async
def
test_long_audio_request
(
foscolo
,
client
):
async
def
test_long_audio_request
(
foscolo
,
client_and_model
):
client
,
model_name
=
client_and_model
if
model_name
==
"google/gemma-3n-E2B-it"
:
pytest
.
skip
(
"Gemma3n does not support long audio requests"
)
foscolo
.
seek
(
0
)
audio
,
sr
=
librosa
.
load
(
foscolo
)
repeated_audio
=
np
.
tile
(
audio
,
2
)
...
...
@@ -173,9 +183,9 @@ async def test_long_audio_request(foscolo, client):
sf
.
write
(
buffer
,
repeated_audio
,
sr
,
format
=
'WAV'
)
buffer
.
seek
(
0
)
translation
=
await
client
.
audio
.
translations
.
create
(
model
=
MODEL_NAME
,
model
=
model_name
,
file
=
buffer
,
extra_body
=
dict
(
language
=
"it"
),
extra_body
=
dict
(
language
=
"it"
,
to_language
=
"en"
),
response_format
=
"text"
,
temperature
=
0.0
)
out
=
json
.
loads
(
translation
)[
'text'
].
strip
().
lower
()
...
...
tests/entrypoints/openai/test_vision.py
View file @
38d80967
...
...
@@ -16,11 +16,11 @@ MODEL_NAME = "microsoft/Phi-3.5-vision-instruct"
MAXIMUM_IMAGES
=
2
# Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
TEST_IMAGE_
URL
S
=
[
"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
,
"https://upload.wikimedia.org/wikipedia/commons/f/fa/Grayscale_8bits_palette_sample_image.png"
,
"https://upload.wikimedia.org/wikipedia/commons/thumb/9/91/Venn_diagram_rgb.svg/1280px-Venn_diagram_rgb.svg.png"
,
"https://upload.wikimedia.org/wikipedia/commons/0/0b/RGBA_comp.png"
,
TEST_IMAGE_
ASSET
S
=
[
"2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
,
#
"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
"Grayscale_8bits_palette_sample_image.png"
,
#
"https://upload.wikimedia.org/wikipedia/commons/f/fa/Grayscale_8bits_palette_sample_image.png",
"1280px-Venn_diagram_rgb.svg.png"
,
#
"https://upload.wikimedia.org/wikipedia/commons/thumb/9/91/Venn_diagram_rgb.svg/1280px-Venn_diagram_rgb.svg.png",
"RGBA_comp.png"
,
#
"https://upload.wikimedia.org/wikipedia/commons/0/0b/RGBA_comp.png",
]
EXPECTED_MM_BEAM_SEARCH_RES
=
[
...
...
@@ -69,10 +69,11 @@ async def client(server):
@
pytest
.
fixture
(
scope
=
"session"
)
def
base64_encoded_image
()
->
dict
[
str
,
str
]:
def
base64_encoded_image
(
local_asset_server
)
->
dict
[
str
,
str
]:
return
{
image_url
:
encode_image_base64
(
fetch_image
(
image_url
))
for
image_url
in
TEST_IMAGE_URLS
image_asset
:
encode_image_base64
(
local_asset_server
.
get_image_asset
(
image_asset
))
for
image_asset
in
TEST_IMAGE_ASSETS
}
...
...
@@ -97,7 +98,7 @@ def get_hf_prompt_tokens(model_name, content, image_url):
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_
URLS
)
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_
ASSETS
,
indirect
=
True
)
async
def
test_single_chat_session_image
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_url
:
str
):
content_text
=
"What's in this image?"
...
...
@@ -157,7 +158,7 @@ async def test_single_chat_session_image(client: openai.AsyncOpenAI,
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_
URLS
)
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_
ASSETS
,
indirect
=
True
)
async
def
test_error_on_invalid_image_url_type
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_url
:
str
):
...
...
@@ -187,7 +188,7 @@ async def test_error_on_invalid_image_url_type(client: openai.AsyncOpenAI,
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_
URLS
)
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_
ASSETS
,
indirect
=
True
)
async
def
test_single_chat_session_image_beamsearch
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_url
:
str
):
...
...
@@ -223,10 +224,11 @@ async def test_single_chat_session_image_beamsearch(client: openai.AsyncOpenAI,
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_URLS
)
@
pytest
.
mark
.
parametrize
(
"raw_image_url"
,
TEST_IMAGE_ASSETS
)
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_ASSETS
,
indirect
=
True
)
async
def
test_single_chat_session_image_base64encoded
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_url
:
str
,
base64_encoded_image
:
dict
[
str
,
str
]):
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
raw_
image_url
:
str
,
image_url
:
str
,
base64_encoded_image
:
dict
[
str
,
str
]):
content_text
=
"What's in this image?"
messages
=
[{
...
...
@@ -237,7 +239,7 @@ async def test_single_chat_session_image_base64encoded(
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
f
"data:image/jpeg;base64,
{
base64_encoded_image
[
image_url
]
}
"
f
"data:image/jpeg;base64,
{
base64_encoded_image
[
raw_
image_url
]
}
"
}
},
{
...
...
@@ -287,12 +289,12 @@ async def test_single_chat_session_image_base64encoded(
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
@
pytest
.
mark
.
parametrize
(
"image_idx"
,
list
(
range
(
len
(
TEST_IMAGE_
URL
S
))))
@
pytest
.
mark
.
parametrize
(
"image_idx"
,
list
(
range
(
len
(
TEST_IMAGE_
ASSET
S
))))
async
def
test_single_chat_session_image_base64encoded_beamsearch
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_idx
:
int
,
base64_encoded_image
:
dict
[
str
,
str
]):
# NOTE: This test also validates that we pass MM data through beam search
image_url
=
TEST_IMAGE_
URL
S
[
image_idx
]
raw_
image_url
=
TEST_IMAGE_
ASSET
S
[
image_idx
]
expected_res
=
EXPECTED_MM_BEAM_SEARCH_RES
[
image_idx
]
messages
=
[{
...
...
@@ -303,7 +305,7 @@ async def test_single_chat_session_image_base64encoded_beamsearch(
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
f
"data:image/jpeg;base64,
{
base64_encoded_image
[
image_url
]
}
"
f
"data:image/jpeg;base64,
{
base64_encoded_image
[
raw_
image_url
]
}
"
}
},
{
...
...
@@ -326,7 +328,7 @@ async def test_single_chat_session_image_base64encoded_beamsearch(
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_
URLS
)
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_
ASSETS
,
indirect
=
True
)
async
def
test_chat_streaming_image
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_url
:
str
):
messages
=
[{
...
...
@@ -385,7 +387,8 @@ async def test_chat_streaming_image(client: openai.AsyncOpenAI,
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
@
pytest
.
mark
.
parametrize
(
"image_urls"
,
[
TEST_IMAGE_URLS
[:
i
]
for
i
in
range
(
2
,
len
(
TEST_IMAGE_URLS
))])
[
TEST_IMAGE_ASSETS
[:
i
]
for
i
in
range
(
2
,
len
(
TEST_IMAGE_ASSETS
))],
indirect
=
True
)
async
def
test_multi_image_input
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_urls
:
list
[
str
]):
...
...
@@ -433,3 +436,132 @@ async def test_multi_image_input(client: openai.AsyncOpenAI, model_name: str,
)
message
=
chat_completion
.
choices
[
0
].
message
assert
message
.
content
is
not
None
and
len
(
message
.
content
)
>=
0
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
@
pytest
.
mark
.
parametrize
(
"image_urls"
,
[
TEST_IMAGE_ASSETS
[:
i
]
for
i
in
range
(
2
,
len
(
TEST_IMAGE_ASSETS
))],
indirect
=
True
)
async
def
test_completions_with_image
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_urls
:
list
[
str
],
):
for
image_url
in
image_urls
:
chat_completion
=
await
client
.
chat
.
completions
.
create
(
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"text"
,
"text"
:
"Describe this image."
,
},
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
image_url
,
}
},
],
},
],
model
=
model_name
,
)
assert
chat_completion
.
choices
[
0
].
message
.
content
is
not
None
assert
isinstance
(
chat_completion
.
choices
[
0
].
message
.
content
,
str
)
assert
len
(
chat_completion
.
choices
[
0
].
message
.
content
)
>
0
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
@
pytest
.
mark
.
parametrize
(
"image_urls"
,
[
TEST_IMAGE_ASSETS
[:
i
]
for
i
in
range
(
2
,
len
(
TEST_IMAGE_ASSETS
))],
indirect
=
True
)
async
def
test_completions_with_image_with_uuid
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_urls
:
list
[
str
],
):
for
image_url
in
image_urls
:
chat_completion
=
await
client
.
chat
.
completions
.
create
(
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"text"
,
"text"
:
"Describe this image."
,
},
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
image_url
,
},
"uuid"
:
image_url
},
],
},
],
model
=
model_name
,
)
assert
chat_completion
.
choices
[
0
].
message
.
content
is
not
None
assert
isinstance
(
chat_completion
.
choices
[
0
].
message
.
content
,
str
)
assert
len
(
chat_completion
.
choices
[
0
].
message
.
content
)
>
0
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
@
pytest
.
mark
.
parametrize
(
"image_urls"
,
[
TEST_IMAGE_ASSETS
[:
i
]
for
i
in
range
(
2
,
len
(
TEST_IMAGE_ASSETS
))],
indirect
=
True
)
async
def
test_completions_with_image_with_incorrect_uuid_format
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_urls
:
list
[
str
],
):
for
image_url
in
image_urls
:
chat_completion
=
await
client
.
chat
.
completions
.
create
(
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"text"
,
"text"
:
"Describe this image."
,
},
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
image_url
,
"incorrect_uuid_key"
:
image_url
,
},
"also_incorrect_uuid_key"
:
image_url
,
},
],
},
],
model
=
model_name
,
)
assert
chat_completion
.
choices
[
0
].
message
.
content
is
not
None
assert
isinstance
(
chat_completion
.
choices
[
0
].
message
.
content
,
str
)
assert
len
(
chat_completion
.
choices
[
0
].
message
.
content
)
>
0
tests/entrypoints/pooling/__init__.py
0 → 100644
View file @
38d80967
tests/entrypoints/pooling/correctness/__init__.py
0 → 100644
View file @
38d80967
tests/entrypoints/
openai
/correctness/test_mteb_embed.py
→
tests/entrypoints/
pooling
/correctness/test_mteb_embed.py
View file @
38d80967
...
...
@@ -4,10 +4,9 @@ import os
import
pytest
from
tests.models.language.pooling.mteb_utils
import
(
MTEB_EMBED_TASKS
,
MTEB_EMBED_TOL
,
OpenAIClientMtebEncoder
,
run_mteb_embed_task
)
from
tests.models.language.pooling_mteb_test.mteb_utils
import
(
MTEB_EMBED_TASKS
,
MTEB_EMBED_TOL
,
OpenAIClientMtebEncoder
,
run_mteb_embed_task
)
from
tests.utils
import
RemoteOpenAIServer
os
.
environ
[
"VLLM_LOGGING_LEVEL"
]
=
"WARNING"
...
...
@@ -37,4 +36,6 @@ def test_mteb_embed(server):
print
(
"SentenceTransformer main score: "
,
st_main_score
)
print
(
"Difference: "
,
st_main_score
-
vllm_main_score
)
assert
st_main_score
==
pytest
.
approx
(
vllm_main_score
,
abs
=
MTEB_EMBED_TOL
)
# We are not concerned that the vllm mteb results are better
# than SentenceTransformers, so we only perform one-sided testing.
assert
st_main_score
-
vllm_main_score
<
MTEB_EMBED_TOL
tests/entrypoints/
openai
/correctness/test_mteb_score.py
→
tests/entrypoints/
pooling
/correctness/test_mteb_score.py
View file @
38d80967
...
...
@@ -4,18 +4,15 @@ import os
import
pytest
# yapf conflicts with isort for this block
# yapf: disable
from
tests.models.language.pooling.mteb_utils
import
(
from
tests.models.language.pooling_mteb_test.mteb_utils
import
(
MTEB_RERANK_LANGS
,
MTEB_RERANK_TASKS
,
MTEB_RERANK_TOL
,
RerankClientMtebEncoder
,
ScoreClientMtebEncoder
,
mteb_test_rerank_models_hf
,
run_mteb_rerank
)
# yapf: enable
RerankClientMtebEncoder
,
ScoreClientMtebEncoder
,
run_mteb_rerank
)
from
tests.utils
import
RemoteOpenAIServer
os
.
environ
[
"VLLM_LOGGING_LEVEL"
]
=
"WARNING"
MODEL_NAME
=
"cross-encoder/ms-marco-MiniLM-L-6-v2"
st_main_score
=
0.33457
@
pytest
.
fixture
(
scope
=
"module"
)
...
...
@@ -29,15 +26,7 @@ def server():
yield
remote_server
@
pytest
.
fixture
(
scope
=
"module"
)
def
st_main_score
(
hf_runner
):
# The main score related to the version of the dependency.
# So we need to recalculate every time.
main_score
,
st_dtype
=
mteb_test_rerank_models_hf
(
hf_runner
,
MODEL_NAME
)
return
main_score
def
test_mteb_score
(
server
,
st_main_score
):
def
test_mteb_score
(
server
):
url
=
server
.
url_for
(
"score"
)
encoder
=
ScoreClientMtebEncoder
(
MODEL_NAME
,
url
)
vllm_main_score
=
run_mteb_rerank
(
encoder
,
MTEB_RERANK_TASKS
,
...
...
@@ -47,10 +36,12 @@ def test_mteb_score(server, st_main_score):
print
(
"SentenceTransformer main score: "
,
st_main_score
)
print
(
"Difference: "
,
st_main_score
-
vllm_main_score
)
assert
st_main_score
==
pytest
.
approx
(
vllm_main_score
,
abs
=
MTEB_RERANK_TOL
)
# We are not concerned that the vllm mteb results are better
# than SentenceTransformers, so we only perform one-sided testing.
assert
st_main_score
-
vllm_main_score
<
MTEB_RERANK_TOL
def
test_mteb_rerank
(
server
,
st_main_score
):
def
test_mteb_rerank
(
server
):
url
=
server
.
url_for
(
"rerank"
)
encoder
=
RerankClientMtebEncoder
(
MODEL_NAME
,
url
)
vllm_main_score
=
run_mteb_rerank
(
encoder
,
MTEB_RERANK_TASKS
,
...
...
@@ -60,4 +51,6 @@ def test_mteb_rerank(server, st_main_score):
print
(
"SentenceTransformer main score: "
,
st_main_score
)
print
(
"Difference: "
,
st_main_score
-
vllm_main_score
)
assert
st_main_score
==
pytest
.
approx
(
vllm_main_score
,
abs
=
MTEB_RERANK_TOL
)
# We are not concerned that the vllm mteb results are better
# than SentenceTransformers, so we only perform one-sided testing.
assert
st_main_score
-
vllm_main_score
<
MTEB_RERANK_TOL
tests/entrypoints/pooling/llm/__init__.py
0 → 100644
View file @
38d80967
tests/entrypoints/llm/test_classify.py
→
tests/entrypoints/
pooling/
llm/test_classify.py
View file @
38d80967
...
...
@@ -6,11 +6,10 @@ import weakref
import
pytest
import
torch
from
tests.models.utils
import
softmax
from
vllm
import
LLM
,
PoolingParams
from
vllm.distributed
import
cleanup_dist_env_and_memory
from
...models.utils
import
softmax
MODEL_NAME
=
"jason9693/Qwen2.5-1.5B-apeach"
prompts
=
[
"The chef prepared a delicious meal."
]
...
...
tests/entrypoints/llm/test_embedding.py
→
tests/entrypoints/
pooling/
llm/test_embedding.py
View file @
38d80967
File moved
tests/entrypoints/llm/test_encode.py
→
tests/entrypoints/
pooling/
llm/test_encode.py
View file @
38d80967
File moved
tests/entrypoints/llm/test_reward.py
→
tests/entrypoints/
pooling/
llm/test_reward.py
View file @
38d80967
...
...
@@ -6,11 +6,10 @@ import weakref
import
pytest
import
torch
from
tests.models.utils
import
softmax
from
vllm
import
LLM
,
PoolingParams
from
vllm.distributed
import
cleanup_dist_env_and_memory
from
...models.utils
import
softmax
MODEL_NAME
=
"internlm/internlm2-1_8b-reward"
prompts
=
[
"The chef prepared a delicious meal."
]
...
...
tests/entrypoints/llm/test_score.py
→
tests/entrypoints/
pooling/
llm/test_score.py
View file @
38d80967
...
...
@@ -6,11 +6,10 @@ import weakref
import
pytest
import
torch
from
tests.models.utils
import
softmax
from
vllm
import
LLM
,
PoolingParams
from
vllm.distributed
import
cleanup_dist_env_and_memory
from
...models.utils
import
softmax
MODEL_NAME
=
"tomaarsen/Qwen3-Reranker-0.6B-seq-cls"
...
...
tests/entrypoints/pooling/openai/__init__.py
0 → 100644
View file @
38d80967
tests/entrypoints/openai/test_classification.py
→
tests/entrypoints/
pooling/
openai/test_classification.py
View file @
38d80967
...
...
@@ -6,10 +6,9 @@ import requests
import
torch
import
torch.nn.functional
as
F
from
tests.utils
import
RemoteOpenAIServer
from
vllm.entrypoints.openai.protocol
import
ClassificationResponse
from
...utils
import
RemoteOpenAIServer
MODEL_NAME
=
"jason9693/Qwen2.5-1.5B-apeach"
DTYPE
=
"float32"
# Use float32 to avoid NaN issue
...
...
tests/entrypoints/openai/test_embedding.py
→
tests/entrypoints/
pooling/
openai/test_embedding.py
View file @
38d80967
...
...
@@ -11,14 +11,13 @@ import requests
import
torch
import
torch.nn.functional
as
F
from
tests.models.language.pooling.embed_utils
import
(
run_embedding_correctness_test
)
from
tests.models.utils
import
check_embeddings_close
from
tests.utils
import
RemoteOpenAIServer
from
vllm.entrypoints.openai.protocol
import
EmbeddingResponse
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
from
...models.language.pooling.embed_utils
import
(
run_embedding_correctness_test
)
from
...models.utils
import
check_embeddings_close
from
...utils
import
RemoteOpenAIServer
MODEL_NAME
=
"intfloat/multilingual-e5-small"
DUMMY_CHAT_TEMPLATE
=
"""{% for message in messages %}{{message['role'] + ': ' + message['content'] + '
\\
n'}}{% endfor %}"""
# noqa: E501
DTYPE
=
"bfloat16"
...
...
tests/entrypoints/openai/test_embedding_dimensions.py
→
tests/entrypoints/
pooling/
openai/test_embedding_dimensions.py
View file @
38d80967
...
...
@@ -9,13 +9,12 @@ from typing import Optional
import
openai
import
pytest
from
vllm.entrypoints.openai.protocol
import
EmbeddingResponse
from
...conftest
import
HfRunner
from
...models.language.pooling.embed_utils
import
(
from
tests.conftest
import
HfRunner
from
tests.models.language.pooling.embed_utils
import
(
run_embedding_correctness_test
)
from
...models.utils
import
EmbedModelInfo
from
...utils
import
RemoteOpenAIServer
from
tests.models.utils
import
EmbedModelInfo
from
tests.utils
import
RemoteOpenAIServer
from
vllm.entrypoints.openai.protocol
import
EmbeddingResponse
MODELS
=
[
EmbedModelInfo
(
"intfloat/multilingual-e5-small"
,
is_matryoshka
=
False
),
...
...
tests/entrypoints/openai/test_embedding_long_text.py
→
tests/entrypoints/
pooling/
openai/test_embedding_long_text.py
View file @
38d80967
...
...
@@ -14,10 +14,9 @@ import openai
import
pytest
import
pytest_asyncio
from
tests.utils
import
RemoteOpenAIServer
from
vllm.entrypoints.openai.protocol
import
EmbeddingResponse
from
...utils
import
RemoteOpenAIServer
def
_generate_random_text
(
word_count
:
int
)
->
str
:
"""Generate random text with approximately the specified word count."""
...
...
tests/entrypoints/openai/test_pooling.py
→
tests/entrypoints/
pooling/
openai/test_pooling.py
View file @
38d80967
...
...
@@ -8,11 +8,10 @@ import pytest
import
requests
from
tests.models.utils
import
check_embeddings_close
from
tests.utils
import
RemoteOpenAIServer
from
vllm.entrypoints.openai.protocol
import
PoolingResponse
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
from
...utils
import
RemoteOpenAIServer
MODEL_NAME
=
"internlm/internlm2-1_8b-reward"
DUMMY_CHAT_TEMPLATE
=
"""{% for message in messages %}{{message['role'] + ': ' + message['content'] + '
\\
n'}}{% endfor %}"""
# noqa: E501
...
...
tests/entrypoints/openai/test_rerank.py
→
tests/entrypoints/
pooling/
openai/test_rerank.py
View file @
38d80967
...
...
@@ -6,10 +6,9 @@ import requests
import
torch
import
torch.nn.functional
as
F
from
tests.utils
import
RemoteOpenAIServer
from
vllm.entrypoints.openai.protocol
import
RerankResponse
from
...utils
import
RemoteOpenAIServer
MODEL_NAME
=
"BAAI/bge-reranker-base"
DTYPE
=
"bfloat16"
...
...
Prev
1
…
7
8
9
10
11
12
13
14
15
…
28
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment