Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
86847700
Unverified
Commit
86847700
authored
May 20, 2025
by
wang.yuqi
Committed by
GitHub
May 20, 2025
Browse files
[CI] Add mteb testing to test the accuracy of the embedding model (#17175)
parent
d6c86d09
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
64 additions
and
5 deletions
+64
-5
requirements/test.in
requirements/test.in
+1
-0
requirements/test.txt
requirements/test.txt
+21
-1
tests/entrypoints/openai/correctness/test_mteb.py
tests/entrypoints/openai/correctness/test_mteb.py
+42
-0
tests/models/language/pooling/test_gte.py
tests/models/language/pooling/test_gte.py
+0
-2
tests/models/language/pooling/test_nomic.py
tests/models/language/pooling/test_nomic.py
+0
-1
tests/models/language/pooling/test_snowflake_arctic_embed.py
tests/models/language/pooling/test_snowflake_arctic_embed.py
+0
-1
No files found.
requirements/test.in
View file @
86847700
...
...
@@ -33,6 +33,7 @@ num2words # required for smolvlm test
opencv-python-headless >= 4.11.0 # required for video test
datamodel_code_generator # required for minicpm3 test
lm-eval[api]==0.4.8 # required for model evaluation test
mteb>=1.38.11, <2 # required for mteb test
transformers==4.51.3
tokenizers==0.21.1
huggingface-hub[hf_xet]>=0.30.0 # Required for Xet downloads.
...
...
requirements/test.txt
View file @
86847700
...
...
@@ -99,6 +99,7 @@ datasets==3.0.2
# via
# evaluate
# lm-eval
# mteb
decorator==5.1.1
# via librosa
dill==0.3.8
...
...
@@ -124,6 +125,8 @@ email-validator==2.2.0
# via pydantic
encodec==0.1.1
# via vocos
eval-type-backport==0.2.2
# via mteb
evaluate==0.4.3
# via lm-eval
fastparquet==2024.11.0
...
...
@@ -291,6 +294,8 @@ msgpack==1.1.0
# via
# librosa
# ray
mteb==1.38.11
# via -r requirements/test.in
multidict==6.1.0
# via
# aiohttp
...
...
@@ -331,6 +336,7 @@ numpy==1.26.4
# librosa
# matplotlib
# mistral-common
# mteb
# numba
# numexpr
# opencv-python-headless
...
...
@@ -443,6 +449,8 @@ plotly==5.24.1
# via genai-perf
pluggy==1.5.0
# via pytest
polars==1.29.0
# via mteb
pooch==1.8.2
# via librosa
portalocker==2.10.1
...
...
@@ -476,6 +484,7 @@ pydantic==2.9.2
# via
# datamodel-code-generator
# mistral-common
# mteb
pydantic-core==2.23.4
# via pydantic
pygments==2.18.0
...
...
@@ -522,6 +531,8 @@ python-dateutil==2.9.0.post0
# typepy
python-rapidjson==1.20
# via tritonclient
pytrec-eval-terrier==0.5.7
# via mteb
pytz==2024.2
# via
# pandas
...
...
@@ -564,6 +575,7 @@ requests==2.32.3
# huggingface-hub
# lm-eval
# mistral-common
# mteb
# pooch
# ray
# responses
...
...
@@ -580,6 +592,7 @@ rfc3987==1.3.8
rich==13.9.4
# via
# genai-perf
# mteb
# typer
rouge-score==0.1.2
# via lm-eval
...
...
@@ -607,16 +620,20 @@ scikit-learn==1.5.2
# via
# librosa
# lm-eval
# mteb
# sentence-transformers
scipy==1.13.1
# via
# librosa
# mteb
# scikit-learn
# sentence-transformers
# statsmodels
# vocos
sentence-transformers==3.2.1
# via -r requirements/test.in
# via
# -r requirements/test.in
# mteb
sentencepiece==0.2.0
# via mistral-common
setuptools==77.0.3
...
...
@@ -696,6 +713,7 @@ torch==2.7.0+cu128
# fastsafetensors
# lm-eval
# mamba-ssm
# mteb
# peft
# runai-model-streamer
# sentence-transformers
...
...
@@ -720,6 +738,7 @@ tqdm==4.66.6
# evaluate
# huggingface-hub
# lm-eval
# mteb
# nltk
# peft
# pqdm
...
...
@@ -759,6 +778,7 @@ typing-extensions==4.12.2
# huggingface-hub
# librosa
# mistral-common
# mteb
# pqdm
# pydantic
# pydantic-core
...
...
tests/entrypoints/openai/correctness/test_mteb.py
0 → 100644
View file @
86847700
# SPDX-License-Identifier: Apache-2.0
import
math
import
os
import
pytest
from
tests.models.language.pooling.mteb_utils
import
(
MTEB_EMBED_TASKS
,
OpenAIClientMtebEncoder
,
run_mteb_embed_task
,
run_mteb_embed_task_st
)
from
tests.utils
import
RemoteOpenAIServer
os
.
environ
[
"VLLM_LOGGING_LEVEL"
]
=
"WARNING"
MODEL_NAME
=
"BAAI/bge-m3"
DTYPE
=
"float16"
MAIN_SCORE
=
0.7873427091972599
@
pytest
.
fixture
(
scope
=
"module"
)
def
server
():
args
=
[
"--task"
,
"embed"
,
"--dtype"
,
DTYPE
,
"--enforce-eager"
,
"--max-model-len"
,
"512"
]
with
RemoteOpenAIServer
(
MODEL_NAME
,
args
)
as
remote_server
:
yield
remote_server
def
test_mteb
(
server
):
client
=
server
.
get_client
()
encoder
=
OpenAIClientMtebEncoder
(
MODEL_NAME
,
client
)
vllm_main_score
=
run_mteb_embed_task
(
encoder
,
MTEB_EMBED_TASKS
)
st_main_score
=
MAIN_SCORE
or
run_mteb_embed_task_st
(
MODEL_NAME
,
MTEB_EMBED_TASKS
)
print
(
"VLLM main score: "
,
vllm_main_score
)
print
(
"SentenceTransformer main score: "
,
st_main_score
)
print
(
"Difference: "
,
st_main_score
-
vllm_main_score
)
assert
math
.
isclose
(
st_main_score
,
vllm_main_score
,
rel_tol
=
1e-4
)
tests/models/language/pooling/test_gte.py
View file @
86847700
...
...
@@ -58,8 +58,6 @@ MODELS = [
@
pytest
.
mark
.
parametrize
(
"model_info"
,
MODELS
)
def
test_models_mteb
(
hf_runner
,
vllm_runner
,
model_info
:
EmbedModelInfo
)
->
None
:
pytest
.
skip
(
"Skipping mteb test."
)
from
.mteb_utils
import
mteb_test_embed_models
vllm_extra_kwargs
:
dict
[
str
,
Any
]
=
{}
...
...
tests/models/language/pooling/test_nomic.py
View file @
86847700
...
...
@@ -23,7 +23,6 @@ MODELS = [
@
pytest
.
mark
.
parametrize
(
"model_info"
,
MODELS
)
def
test_models_mteb
(
hf_runner
,
vllm_runner
,
model_info
:
EmbedModelInfo
)
->
None
:
pytest
.
skip
(
"Skipping mteb test."
)
from
.mteb_utils
import
mteb_test_embed_models
mteb_test_embed_models
(
hf_runner
,
vllm_runner
,
model_info
)
...
...
tests/models/language/pooling/test_snowflake_arctic_embed.py
View file @
86847700
...
...
@@ -46,7 +46,6 @@ def test_models_mteb(
vllm_runner
,
model_info
:
EmbedModelInfo
,
)
->
None
:
pytest
.
skip
(
"Skipping mteb test."
)
from
.mteb_utils
import
mteb_test_embed_models
mteb_test_embed_models
(
hf_runner
,
vllm_runner
,
model_info
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment