Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1ff67df1
Unverified
Commit
1ff67df1
authored
Dec 24, 2025
by
wang.yuqi
Committed by
GitHub
Dec 24, 2025
Browse files
[CI] Reorganization pooling_mteb_test (#31265)
Signed-off-by:
wang.yuqi
<
noooop@126.com
>
parent
7cd288a4
Changes
18
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
480 additions
and
347 deletions
+480
-347
tests/entrypoints/pooling/embed/test_correctness_mteb.py
tests/entrypoints/pooling/embed/test_correctness_mteb.py
+1
-1
tests/entrypoints/pooling/score/test_correctness_mteb.py
tests/entrypoints/pooling/score/test_correctness_mteb.py
+1
-1
tests/entrypoints/pooling/score/test_utils.py
tests/entrypoints/pooling/score/test_utils.py
+4
-5
tests/models/language/pooling_mteb_test/mteb_embed_utils.py
tests/models/language/pooling_mteb_test/mteb_embed_utils.py
+228
-0
tests/models/language/pooling_mteb_test/mteb_score_utils.py
tests/models/language/pooling_mteb_test/mteb_score_utils.py
+25
-197
tests/models/language/pooling_mteb_test/test_baai.py
tests/models/language/pooling_mteb_test/test_baai.py
+36
-32
tests/models/language/pooling_mteb_test/test_bge_reranker_v2_gemma.py
.../language/pooling_mteb_test/test_bge_reranker_v2_gemma.py
+8
-6
tests/models/language/pooling_mteb_test/test_cross_encoder.py
...s/models/language/pooling_mteb_test/test_cross_encoder.py
+11
-5
tests/models/language/pooling_mteb_test/test_gte.py
tests/models/language/pooling_mteb_test/test_gte.py
+45
-25
tests/models/language/pooling_mteb_test/test_intfloat.py
tests/models/language/pooling_mteb_test/test_intfloat.py
+17
-13
tests/models/language/pooling_mteb_test/test_jina.py
tests/models/language/pooling_mteb_test/test_jina.py
+12
-5
tests/models/language/pooling_mteb_test/test_mxbai_rerank.py
tests/models/language/pooling_mteb_test/test_mxbai_rerank.py
+8
-4
tests/models/language/pooling_mteb_test/test_nemotron.py
tests/models/language/pooling_mteb_test/test_nemotron.py
+16
-6
tests/models/language/pooling_mteb_test/test_nomic.py
tests/models/language/pooling_mteb_test/test_nomic.py
+14
-6
tests/models/language/pooling_mteb_test/test_qwen3_reranker.py
.../models/language/pooling_mteb_test/test_qwen3_reranker.py
+8
-4
tests/models/language/pooling_mteb_test/test_snowflake_arctic_embed.py
...language/pooling_mteb_test/test_snowflake_arctic_embed.py
+30
-10
tests/models/language/pooling_mteb_test/test_st_projector.py
tests/models/language/pooling_mteb_test/test_st_projector.py
+11
-5
tests/models/utils.py
tests/models/utils.py
+5
-22
No files found.
tests/entrypoints/pooling/embed/test_correctness_mteb.py
View file @
1ff67df1
...
...
@@ -4,7 +4,7 @@ import os
import
pytest
from
tests.models.language.pooling_mteb_test.mteb_utils
import
(
from
tests.models.language.pooling_mteb_test.mteb_
embed_
utils
import
(
MTEB_EMBED_TASKS
,
MTEB_EMBED_TOL
,
OpenAIClientMtebEncoder
,
...
...
tests/entrypoints/pooling/score/test_correctness_mteb.py
View file @
1ff67df1
...
...
@@ -4,7 +4,7 @@ import os
import
pytest
from
tests.models.language.pooling_mteb_test.mteb_utils
import
(
from
tests.models.language.pooling_mteb_test.mteb_
score_
utils
import
(
MTEB_RERANK_LANGS
,
MTEB_RERANK_TASKS
,
MTEB_RERANK_TOL
,
...
...
tests/entrypoints/pooling/score/test_utils.py
View file @
1ff67df1
...
...
@@ -202,11 +202,10 @@ class TestGetScorePrompt:
tokenization_kwargs
,
mock_model_no_score_template
,
):
# FIXME: Models implementing SupportsScoreTemplate must use their custom
# template implementation by default to preserve existing functionality.
# Attempting to use tokenizer_config.json templates would most likely break
# these models, as often they just inherit the template from the original LLM.
# CLI --chat-template overrides are still supported.
# FIXME: For now, we only apply a template when one is explicitly provided.
# We cannot rely on the tokenizer's chat template because many models
# inherit junk templates from their base LLM, which breaks both the models
# and the tests that use them.
with
(
patch
(
"vllm.model_executor.model_loader.get_model_cls"
,
...
...
tests/models/language/pooling_mteb_test/mteb_embed_utils.py
0 → 100644
View file @
1ff67df1
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
mteb
import
numpy
as
np
import
torch
from
mteb.models
import
ModelMeta
from
mteb.types
import
Array
from
torch.utils.data
import
DataLoader
import
tests.ci_envs
as
ci_envs
from
tests.models.utils
import
(
EmbedModelInfo
,
check_embeddings_close
,
get_vllm_extra_kwargs
,
)
# Most embedding models on the STS12 task (See #17175):
# - Model implementation and minor changes in tensor dtype
# results in differences less than 1e-4
# - Different model results in differences more than 1e-3
# 1e-4 is a good tolerance threshold
MTEB_EMBED_TASKS
=
[
"STS12"
]
MTEB_EMBED_TOL
=
1e-4
_empty_model_meta
=
ModelMeta
(
loader
=
None
,
name
=
"vllm/model"
,
revision
=
"1"
,
release_date
=
None
,
languages
=
None
,
framework
=
[],
similarity_fn_name
=
None
,
n_parameters
=
None
,
memory_usage_mb
=
None
,
max_tokens
=
None
,
embed_dim
=
None
,
license
=
None
,
open_weights
=
None
,
public_training_code
=
None
,
public_training_data
=
None
,
use_instructions
=
None
,
training_datasets
=
None
,
modalities
=
[
"text"
],
# 'image' can be added to evaluate multimodal models
)
class
MtebEmbedMixin
(
mteb
.
EncoderProtocol
):
mteb_model_meta
=
_empty_model_meta
def
similarity
(
self
,
embeddings1
:
np
.
ndarray
,
embeddings2
:
np
.
ndarray
,
)
->
np
.
ndarray
:
# Cosine similarity
norm1
=
np
.
linalg
.
norm
(
embeddings1
,
axis
=
1
,
keepdims
=
True
)
norm2
=
np
.
linalg
.
norm
(
embeddings2
,
axis
=
1
,
keepdims
=
True
)
sim
=
np
.
dot
(
embeddings1
,
embeddings2
.
T
)
/
(
norm1
*
norm2
.
T
)
return
sim
def
similarity_pairwise
(
self
,
embeddings1
:
Array
,
embeddings2
:
Array
,
)
->
Array
:
# Cosine similarity
norm1
=
np
.
linalg
.
norm
(
embeddings1
,
axis
=
1
,
keepdims
=
True
)
norm2
=
np
.
linalg
.
norm
(
embeddings2
,
axis
=
1
,
keepdims
=
True
)
sim
=
np
.
sum
(
embeddings1
*
embeddings2
,
axis
=
1
)
/
(
norm1
.
flatten
()
*
norm2
.
flatten
()
)
return
sim
class
VllmMtebEncoder
(
MtebEmbedMixin
):
def
__init__
(
self
,
vllm_model
):
self
.
llm
=
vllm_model
self
.
rng
=
np
.
random
.
default_rng
(
seed
=
42
)
def
encode
(
self
,
inputs
:
DataLoader
[
mteb
.
types
.
BatchedInput
],
*
args
,
**
kwargs
,
)
->
np
.
ndarray
:
# Hoping to discover potential scheduling
# issues by randomizing the order.
sentences
=
[
text
for
batch
in
inputs
for
text
in
batch
[
"text"
]]
r
=
self
.
rng
.
permutation
(
len
(
sentences
))
sentences
=
[
sentences
[
i
]
for
i
in
r
]
outputs
=
self
.
llm
.
embed
(
sentences
,
use_tqdm
=
False
)
embeds
=
np
.
array
(
outputs
)
embeds
=
embeds
[
np
.
argsort
(
r
)]
return
embeds
class
OpenAIClientMtebEncoder
(
MtebEmbedMixin
):
def
__init__
(
self
,
model_name
:
str
,
client
):
self
.
model_name
=
model_name
self
.
client
=
client
self
.
rng
=
np
.
random
.
default_rng
(
seed
=
42
)
def
encode
(
self
,
inputs
:
DataLoader
[
mteb
.
types
.
BatchedInput
],
*
args
,
**
kwargs
,
)
->
np
.
ndarray
:
# Hoping to discover potential scheduling
# issues by randomizing the order.
sentences
=
[
text
for
batch
in
inputs
for
text
in
batch
[
"text"
]]
r
=
self
.
rng
.
permutation
(
len
(
sentences
))
sentences
=
[
sentences
[
i
]
for
i
in
r
]
embeddings
=
self
.
client
.
embeddings
.
create
(
model
=
self
.
model_name
,
input
=
sentences
)
outputs
=
[
d
.
embedding
for
d
in
embeddings
.
data
]
embeds
=
np
.
array
(
outputs
)
embeds
=
embeds
[
np
.
argsort
(
r
)]
return
embeds
def
run_mteb_embed_task
(
encoder
:
mteb
.
EncoderProtocol
,
tasks
):
tasks
=
mteb
.
get_tasks
(
tasks
=
tasks
)
results
=
mteb
.
evaluate
(
encoder
,
tasks
,
cache
=
None
,
show_progress_bar
=
False
,
)
main_score
=
results
[
0
].
scores
[
"test"
][
0
][
"main_score"
]
return
main_score
def
mteb_test_embed_models
(
hf_runner
,
vllm_runner
,
model_info
:
EmbedModelInfo
,
vllm_extra_kwargs
=
None
,
hf_model_callback
=
None
,
atol
=
MTEB_EMBED_TOL
,
):
vllm_extra_kwargs
=
get_vllm_extra_kwargs
(
model_info
,
vllm_extra_kwargs
)
# Test embed_dims, isnan and whether to use normalize
example_prompts
=
[
"The chef prepared a delicious meal."
*
1000
]
with
vllm_runner
(
model_info
.
name
,
runner
=
"pooling"
,
max_model_len
=
model_info
.
max_model_len
,
**
vllm_extra_kwargs
,
)
as
vllm_model
:
model_config
=
vllm_model
.
llm
.
llm_engine
.
model_config
# Confirm whether vllm is using the correct architecture
if
model_info
.
architecture
:
assert
model_info
.
architecture
in
model_config
.
architectures
# Confirm whether the important configs in model_config are correct.
if
model_info
.
pooling_type
is
not
None
:
assert
model_config
.
pooler_config
.
pooling_type
==
model_info
.
pooling_type
if
model_info
.
attn_type
is
not
None
:
assert
model_config
.
attn_type
==
model_info
.
attn_type
if
model_info
.
is_prefix_caching_supported
is
not
None
:
assert
(
model_config
.
is_prefix_caching_supported
==
model_info
.
is_prefix_caching_supported
)
if
model_info
.
is_chunked_prefill_supported
is
not
None
:
assert
(
model_config
.
is_chunked_prefill_supported
==
model_info
.
is_chunked_prefill_supported
)
vllm_main_score
=
run_mteb_embed_task
(
VllmMtebEncoder
(
vllm_model
),
MTEB_EMBED_TASKS
)
vllm_dtype
=
vllm_model
.
llm
.
llm_engine
.
model_config
.
dtype
head_dtype
=
model_config
.
head_dtype
# Test embedding_size, isnan and whether to use normalize
vllm_outputs
=
vllm_model
.
embed
(
example_prompts
,
truncate_prompt_tokens
=-
1
)
outputs_tensor
=
torch
.
tensor
(
vllm_outputs
)
assert
not
torch
.
any
(
torch
.
isnan
(
outputs_tensor
))
embedding_size
=
model_config
.
embedding_size
assert
torch
.
tensor
(
vllm_outputs
).
shape
[
-
1
]
==
embedding_size
# Accelerate mteb test by setting
# SentenceTransformers mteb score to a constant
if
model_info
.
mteb_score
is
None
:
with
hf_runner
(
model_info
.
name
,
is_sentence_transformer
=
True
,
dtype
=
ci_envs
.
VLLM_CI_HF_DTYPE
or
model_info
.
hf_dtype
,
)
as
hf_model
:
# e.g. setting default parameters for the encode method of hf_runner
if
hf_model_callback
is
not
None
:
hf_model_callback
(
hf_model
)
st_main_score
=
run_mteb_embed_task
(
hf_model
,
MTEB_EMBED_TASKS
)
st_dtype
=
next
(
hf_model
.
model
.
parameters
()).
dtype
# Check embeddings close to hf outputs
hf_outputs
=
hf_model
.
encode
(
example_prompts
)
check_embeddings_close
(
embeddings_0_lst
=
hf_outputs
,
embeddings_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
tol
=
1e-2
,
)
else
:
st_main_score
=
model_info
.
mteb_score
st_dtype
=
"Constant"
print
(
"Model:"
,
model_info
.
name
)
print
(
"VLLM:"
,
f
"dtype:
{
vllm_dtype
}
"
,
f
"head_dtype:
{
head_dtype
}
"
,
vllm_main_score
)
print
(
"SentenceTransformers:"
,
st_dtype
,
st_main_score
)
print
(
"Difference:"
,
st_main_score
-
vllm_main_score
)
# We are not concerned that the vllm mteb results are better
# than SentenceTransformers, so we only perform one-sided testing.
assert
st_main_score
-
vllm_main_score
<
atol
tests/models/language/pooling_mteb_test/mteb_utils.py
→
tests/models/language/pooling_mteb_test/mteb_
score_
utils.py
View file @
1ff67df1
...
...
@@ -7,37 +7,24 @@ from pathlib import Path
import
mteb
import
numpy
as
np
import
requests
import
torch
from
mteb.models
import
ModelMeta
from
mteb.types
import
Array
from
torch.utils.data
import
DataLoader
import
tests.ci_envs
as
ci_envs
from
tests.models.utils
import
(
EmbedModelInfo
,
RerankModelInfo
,
check_embeddings_close
,
get_vllm_extra_kwargs
,
)
template_home
=
(
Path
(
__file__
).
parent
.
parent
.
parent
.
parent
.
parent
/
"examples/pooling/score/template"
)
# Most embedding models on the STS12 task (See #17175):
# - Model implementation and minor changes in tensor dtype
# results in differences less than 1e-4
# - Different model results in differences more than 1e-3
# 1e-4 is a good tolerance threshold
MTEB_EMBED_TASKS
=
[
"STS12"
]
MTEB_EMBED_TOL
=
1e-4
# See #19344
MTEB_RERANK_TASKS
=
[
"NFCorpus"
]
MTEB_RERANK_LANGS
=
[
"eng"
]
MTEB_RERANK_TOL
=
2e-3
template_home
=
(
Path
(
__file__
).
parent
.
parent
.
parent
.
parent
.
parent
/
"examples/pooling/score/template"
)
_empty_model_meta
=
ModelMeta
(
loader
=
None
,
name
=
"vllm/model"
,
...
...
@@ -60,84 +47,11 @@ _empty_model_meta = ModelMeta(
)
class
Vllm
MtebEncoder
(
mteb
.
EncoderProtocol
):
class
Mteb
Cross
Encoder
Mixin
(
mteb
.
Cross
EncoderProtocol
):
mteb_model_meta
=
_empty_model_meta
def
__init__
(
self
,
vllm_model
):
self
.
llm
=
vllm_model
self
.
rng
=
np
.
random
.
default_rng
(
seed
=
42
)
def
encode
(
self
,
inputs
:
DataLoader
[
mteb
.
types
.
BatchedInput
],
*
args
,
**
kwargs
,
)
->
np
.
ndarray
:
# Hoping to discover potential scheduling
# issues by randomizing the order.
sentences
=
[
text
for
batch
in
inputs
for
text
in
batch
[
"text"
]]
r
=
self
.
rng
.
permutation
(
len
(
sentences
))
sentences
=
[
sentences
[
i
]
for
i
in
r
]
outputs
=
self
.
llm
.
embed
(
sentences
,
use_tqdm
=
False
)
embeds
=
np
.
array
(
outputs
)
embeds
=
embeds
[
np
.
argsort
(
r
)]
return
embeds
def
similarity
(
self
,
embeddings1
:
np
.
ndarray
,
embeddings2
:
np
.
ndarray
,
)
->
np
.
ndarray
:
# Cosine similarity
norm1
=
np
.
linalg
.
norm
(
embeddings1
,
axis
=
1
,
keepdims
=
True
)
norm2
=
np
.
linalg
.
norm
(
embeddings2
,
axis
=
1
,
keepdims
=
True
)
sim
=
np
.
dot
(
embeddings1
,
embeddings2
.
T
)
/
(
norm1
*
norm2
.
T
)
return
sim
def
similarity_pairwise
(
self
,
embeddings1
:
Array
,
embeddings2
:
Array
,
)
->
Array
:
# Cosine similarity
norm1
=
np
.
linalg
.
norm
(
embeddings1
,
axis
=
1
,
keepdims
=
True
)
norm2
=
np
.
linalg
.
norm
(
embeddings2
,
axis
=
1
,
keepdims
=
True
)
sim
=
np
.
sum
(
embeddings1
*
embeddings2
,
axis
=
1
)
/
(
norm1
.
flatten
()
*
norm2
.
flatten
()
)
return
sim
class
OpenAIClientMtebEncoder
(
VllmMtebEncoder
):
def
__init__
(
self
,
model_name
:
str
,
client
):
self
.
model_name
=
model_name
self
.
client
=
client
self
.
rng
=
np
.
random
.
default_rng
(
seed
=
42
)
def
encode
(
self
,
inputs
:
DataLoader
[
mteb
.
types
.
BatchedInput
],
*
args
,
**
kwargs
,
)
->
np
.
ndarray
:
# Hoping to discover potential scheduling
# issues by randomizing the order.
sentences
=
[
text
for
batch
in
inputs
for
text
in
batch
[
"text"
]]
r
=
self
.
rng
.
permutation
(
len
(
sentences
))
sentences
=
[
sentences
[
i
]
for
i
in
r
]
embeddings
=
self
.
client
.
embeddings
.
create
(
model
=
self
.
model_name
,
input
=
sentences
)
outputs
=
[
d
.
embedding
for
d
in
embeddings
.
data
]
embeds
=
np
.
array
(
outputs
)
embeds
=
embeds
[
np
.
argsort
(
r
)]
return
embeds
class
VllmMtebCrossEncoder
(
mteb
.
CrossEncoderProtocol
):
mteb_model_meta
=
_empty_model_meta
class
VllmMtebCrossEncoder
(
MtebCrossEncoderMixin
):
def
__init__
(
self
,
vllm_model
):
self
.
llm
=
vllm_model
self
.
rng
=
np
.
random
.
default_rng
(
seed
=
42
)
...
...
@@ -164,7 +78,7 @@ class VllmMtebCrossEncoder(mteb.CrossEncoderProtocol):
return
scores
class
ScoreClientMtebEncoder
(
m
teb
.
CrossEncoder
Protocol
):
class
ScoreClientMtebEncoder
(
M
tebCrossEncoder
Mixin
):
mteb_model_meta
=
_empty_model_meta
def
__init__
(
self
,
model_name
:
str
,
url
):
...
...
@@ -216,102 +130,6 @@ class RerankClientMtebEncoder(ScoreClientMtebEncoder):
return
response
[
"results"
][
0
][
"relevance_score"
]
def
run_mteb_embed_task
(
encoder
:
mteb
.
EncoderProtocol
,
tasks
):
tasks
=
mteb
.
get_tasks
(
tasks
=
tasks
)
results
=
mteb
.
evaluate
(
encoder
,
tasks
,
cache
=
None
,
show_progress_bar
=
False
,
)
main_score
=
results
[
0
].
scores
[
"test"
][
0
][
"main_score"
]
return
main_score
def
mteb_test_embed_models
(
hf_runner
,
vllm_runner
,
model_info
:
EmbedModelInfo
,
vllm_extra_kwargs
=
None
,
hf_model_callback
=
None
,
atol
=
MTEB_EMBED_TOL
,
):
vllm_extra_kwargs
=
get_vllm_extra_kwargs
(
model_info
,
vllm_extra_kwargs
)
# Test embed_dims, isnan and whether to use normalize
example_prompts
=
[
"The chef prepared a delicious meal."
*
1000
]
with
vllm_runner
(
model_info
.
name
,
runner
=
"pooling"
,
max_model_len
=
model_info
.
max_model_len
,
**
vllm_extra_kwargs
,
)
as
vllm_model
:
model_config
=
vllm_model
.
llm
.
llm_engine
.
model_config
# Confirm whether vllm is using the correct architecture
if
model_info
.
architecture
:
assert
model_info
.
architecture
in
model_config
.
architectures
# Confirm whether vllm uses the correct default_pooling_type, which
# relates to whether chunked prefill and prefix caching are enabled
assert
(
model_config
.
_model_info
.
default_pooling_type
==
model_info
.
default_pooling_type
)
vllm_main_score
=
run_mteb_embed_task
(
VllmMtebEncoder
(
vllm_model
),
MTEB_EMBED_TASKS
)
vllm_dtype
=
vllm_model
.
llm
.
llm_engine
.
model_config
.
dtype
head_dtype
=
model_config
.
head_dtype
# Test embedding_size, isnan and whether to use normalize
vllm_outputs
=
vllm_model
.
embed
(
example_prompts
,
truncate_prompt_tokens
=-
1
)
outputs_tensor
=
torch
.
tensor
(
vllm_outputs
)
assert
not
torch
.
any
(
torch
.
isnan
(
outputs_tensor
))
embedding_size
=
model_config
.
embedding_size
assert
torch
.
tensor
(
vllm_outputs
).
shape
[
-
1
]
==
embedding_size
# Accelerate mteb test by setting
# SentenceTransformers mteb score to a constant
if
model_info
.
mteb_score
is
None
:
with
hf_runner
(
model_info
.
name
,
is_sentence_transformer
=
True
,
dtype
=
ci_envs
.
VLLM_CI_HF_DTYPE
or
model_info
.
hf_dtype
,
)
as
hf_model
:
# e.g. setting default parameters for the encode method of hf_runner
if
hf_model_callback
is
not
None
:
hf_model_callback
(
hf_model
)
st_main_score
=
run_mteb_embed_task
(
hf_model
,
MTEB_EMBED_TASKS
)
st_dtype
=
next
(
hf_model
.
model
.
parameters
()).
dtype
# Check embeddings close to hf outputs
hf_outputs
=
hf_model
.
encode
(
example_prompts
)
check_embeddings_close
(
embeddings_0_lst
=
hf_outputs
,
embeddings_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
tol
=
1e-2
,
)
else
:
st_main_score
=
model_info
.
mteb_score
st_dtype
=
"Constant"
print
(
"Model:"
,
model_info
.
name
)
print
(
"VLLM:"
,
f
"dtype:
{
vllm_dtype
}
"
,
f
"head_dtype:
{
head_dtype
}
"
,
vllm_main_score
)
print
(
"SentenceTransformers:"
,
st_dtype
,
st_main_score
)
print
(
"Difference:"
,
st_main_score
-
vllm_main_score
)
# We are not concerned that the vllm mteb results are better
# than SentenceTransformers, so we only perform one-sided testing.
assert
st_main_score
-
vllm_main_score
<
atol
def
run_mteb_rerank
(
cross_encoder
:
mteb
.
CrossEncoderProtocol
,
tasks
,
languages
):
with
tempfile
.
TemporaryDirectory
()
as
prediction_folder
:
bm25s
=
mteb
.
get_model
(
"bm25s"
)
...
...
@@ -391,18 +209,28 @@ def mteb_test_rerank_models(
# Score API is only enabled for num_labels == 1
assert
model_config
.
hf_config
.
num_labels
==
1
# Confirm whether vllm uses the correct default_pooling_type, which
# relates to whether chunked prefill and prefix caching are enabled
assert
(
model_config
.
_model_info
.
default_pooling_type
==
model_info
.
default_pooling_type
)
# Maybe load chat_template.
chat_template
:
str
|
None
=
None
if
model_info
.
chat_template_name
is
not
None
:
chat_template
=
(
template_home
/
model_info
.
chat_template_name
).
read_text
()
vllm_model
.
chat_template
=
chat_template
# Confirm whether the important configs in model_config are correct.
if
model_info
.
pooling_type
is
not
None
:
assert
model_config
.
pooler_config
.
pooling_type
==
model_info
.
pooling_type
if
model_info
.
attn_type
is
not
None
:
assert
model_config
.
attn_type
==
model_info
.
attn_type
if
model_info
.
is_prefix_caching_supported
is
not
None
:
assert
(
model_config
.
is_prefix_caching_supported
==
model_info
.
is_prefix_caching_supported
)
if
model_info
.
is_chunked_prefill_supported
is
not
None
:
assert
(
model_config
.
is_chunked_prefill_supported
==
model_info
.
is_chunked_prefill_supported
)
vllm_main_score
=
run_mteb_rerank
(
vllm_mteb_encoder
(
vllm_model
),
tasks
=
MTEB_RERANK_TASKS
,
...
...
tests/models/language/pooling_mteb_test/test_baai.py
View file @
1ff67df1
...
...
@@ -4,90 +4,94 @@ import pytest
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.utils
import
(
CLSPoolingEmbedModelInfo
,
CLSPoolingRerankModelInfo
,
EmbedModelInfo
,
LASTPoolingEmbedModelInfo
,
RerankModelInfo
,
)
from
.mteb_utils
import
mteb_test_embed_models
,
mteb_test_rerank_models
from
.mteb_embed_utils
import
mteb_test_embed_models
from
.mteb_score_utils
import
mteb_test_rerank_models
MODELS
=
[
########## BertModel
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-base-en"
,
architecture
=
"BertModel"
,
mteb_score
=
0.779336792
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
CLSPoolingEmbedModelInfo
(
"BAAI/bge-base-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
"BAAI/bge-small-en"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
"BAAI/bge-small-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
"BAAI/bge-large-en"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
"BAAI/bge-large-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-base-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
EmbedModelInfo
(
"BAAI/bge-small-en"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
EmbedModelInfo
(
"BAAI/bge-small-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
EmbedModelInfo
(
"BAAI/bge-large-en"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
EmbedModelInfo
(
"BAAI/bge-large-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
EmbedModelInfo
(
"BAAI/bge-large-zh-noinstruct"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-base-en-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-base-zh-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-small-en-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-small-zh-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-large-en-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-large-zh-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
########## XLMRobertaModel
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-m3"
,
architecture
=
"XLMRobertaModel"
,
mteb_score
=
0.787343078
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
########## Qwen2Model
LASTPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-code-v1"
,
architecture
=
"Qwen2Model"
,
mteb_score
=
0.75724465
,
dtype
=
"float32"
,
pooling_type
=
"LAST"
,
attn_type
=
"decoder"
,
is_prefix_caching_supported
=
True
,
is_chunked_prefill_supported
=
True
,
enable_test
=
True
,
),
]
RERANK_MODELS
=
[
########## XLMRobertaForSequenceClassification
CLSPooling
RerankModelInfo
(
RerankModelInfo
(
"BAAI/bge-reranker-base"
,
architecture
=
"XLMRobertaForSequenceClassification"
,
mteb_score
=
0.32398
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
CLSPooling
RerankModelInfo
(
RerankModelInfo
(
"BAAI/bge-reranker-large"
,
architecture
=
"XLMRobertaForSequenceClassification"
,
enable_test
=
False
,
),
CLSPooling
RerankModelInfo
(
RerankModelInfo
(
"BAAI/bge-reranker-v2-m3"
,
architecture
=
"XLMRobertaForSequenceClassification"
,
enable_test
=
False
,
...
...
tests/models/language/pooling_mteb_test/test_bge_reranker_v2_gemma.py
View file @
1ff67df1
...
...
@@ -9,14 +9,12 @@ import torch
from
torch.utils.data
import
DataLoader
from
tests.conftest
import
HfRunner
from
tests.models.language.pooling_mteb_test.mteb_utils
import
(
VllmMtebCrossEncoder
,
mteb_test_rerank_models
,
)
from
tests.models.utils
import
LASTPoolingRerankModelInfo
,
RerankModelInfo
from
tests.models.utils
import
RerankModelInfo
from
.mteb_score_utils
import
VllmMtebCrossEncoder
,
mteb_test_rerank_models
RERANK_MODELS
=
[
LASTPooling
RerankModelInfo
(
RerankModelInfo
(
"BAAI/bge-reranker-v2-gemma"
,
architecture
=
"GemmaForSequenceClassification"
,
mteb_score
=
0.33757
,
...
...
@@ -25,6 +23,10 @@ RERANK_MODELS = [
"classifier_from_token"
:
[
"Yes"
],
"method"
:
"no_post_processing"
,
},
pooling_type
=
"LAST"
,
attn_type
=
"decoder"
,
is_prefix_caching_supported
=
True
,
is_chunked_prefill_supported
=
True
,
),
]
...
...
tests/models/language/pooling_mteb_test/test_cross_encoder.py
View file @
1ff67df1
...
...
@@ -3,23 +3,29 @@
import
pytest
from
tests.models.utils
import
(
CLSPoolingRerankModelInfo
,
LASTPoolingRerankModelInfo
,
RerankModelInfo
,
)
from
.mteb_utils
import
mteb_test_rerank_models
from
.mteb_
score_
utils
import
mteb_test_rerank_models
RERANK_MODELS
=
[
CLSPooling
RerankModelInfo
(
RerankModelInfo
(
"cross-encoder/ms-marco-TinyBERT-L-2-v2"
,
mteb_score
=
0.32898
,
architecture
=
"BertForSequenceClassification"
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
),
LASTPooling
RerankModelInfo
(
RerankModelInfo
(
"tomaarsen/Qwen3-Reranker-0.6B-seq-cls"
,
mteb_score
=
0.25736
,
architecture
=
"Qwen3ForSequenceClassification"
,
pooling_type
=
"LAST"
,
attn_type
=
"decoder"
,
is_prefix_caching_supported
=
True
,
is_chunked_prefill_supported
=
True
,
),
]
...
...
tests/models/language/pooling_mteb_test/test_gte.py
View file @
1ff67df1
...
...
@@ -5,36 +5,32 @@ import pytest
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.utils
import
(
CLSPoolingEmbedModelInfo
,
CLSPoolingRerankModelInfo
,
EmbedModelInfo
,
LASTPoolingEmbedModelInfo
,
RerankModelInfo
,
)
from
.mteb_utils
import
mteb_test_embed_models
,
mteb_test_rerank_models
from
.mteb_embed_utils
import
mteb_test_embed_models
from
.mteb_score_utils
import
mteb_test_rerank_models
MODELS
=
[
########## BertModel
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"thenlper/gte-large"
,
mteb_score
=
0.76807651
,
architecture
=
"BertModel"
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
CLSPoolingEmbedModelInfo
(
"thenlper/gte-base"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
"thenlper/gte-small"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
EmbedModelInfo
(
"thenlper/gte-base"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
EmbedModelInfo
(
"thenlper/gte-small"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
EmbedModelInfo
(
"thenlper/gte-large-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
"thenlper/gte-base-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
EmbedModelInfo
(
"thenlper/gte-base-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
EmbedModelInfo
(
"thenlper/gte-small-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
########### NewModel
...
...
@@ -43,48 +39,64 @@ MODELS = [
# - whether to use token_type_embeddings
# - whether to use context expansion
# So only test one (the most widely used) model
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Alibaba-NLP/gte-multilingual-base"
,
architecture
=
"GteNewModel"
,
mteb_score
=
0.775074696
,
hf_overrides
=
{
"architectures"
:
[
"GteNewModel"
]},
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Alibaba-NLP/gte-base-en-v1.5"
,
architecture
=
"GteNewModel"
,
hf_overrides
=
{
"architectures"
:
[
"GteNewModel"
]},
enable_test
=
False
,
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Alibaba-NLP/gte-large-en-v1.5"
,
architecture
=
"GteNewModel"
,
hf_overrides
=
{
"architectures"
:
[
"GteNewModel"
]},
enable_test
=
False
,
),
########### Qwen2ForCausalLM
LASTPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Alibaba-NLP/gte-Qwen2-1.5B-instruct"
,
mteb_score
=
0.758473459018872
,
architecture
=
"Qwen2ForCausalLM"
,
pooling_type
=
"LAST"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
########## ModernBertModel
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Alibaba-NLP/gte-modernbert-base"
,
mteb_score
=
0.748193353
,
architecture
=
"ModernBertModel"
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
########## Qwen3ForCausalLM
LASTPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Qwen/Qwen3-Embedding-0.6B"
,
mteb_score
=
0.771163695
,
architecture
=
"Qwen3ForCausalLM"
,
dtype
=
"float32"
,
pooling_type
=
"LAST"
,
attn_type
=
"decoder"
,
is_prefix_caching_supported
=
True
,
is_chunked_prefill_supported
=
True
,
enable_test
=
True
,
),
LASTPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Qwen/Qwen3-Embedding-4B"
,
architecture
=
"Qwen3ForCausalLM"
,
dtype
=
"float32"
,
...
...
@@ -93,18 +105,26 @@ MODELS = [
]
RERANK_MODELS
=
[
CLSPooling
RerankModelInfo
(
RerankModelInfo
(
# classifier_pooling: mean
"Alibaba-NLP/gte-reranker-modernbert-base"
,
mteb_score
=
0.33386
,
architecture
=
"ModernBertForSequenceClassification"
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
CLSPooling
RerankModelInfo
(
RerankModelInfo
(
"Alibaba-NLP/gte-multilingual-reranker-base"
,
mteb_score
=
0.33062
,
architecture
=
"GteNewForSequenceClassification"
,
hf_overrides
=
{
"architectures"
:
[
"GteNewForSequenceClassification"
]},
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
]
...
...
tests/models/language/pooling_mteb_test/test_intfloat.py
View file @
1ff67df1
...
...
@@ -3,40 +3,44 @@
import
pytest
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.utils
import
CLSPoolingEmbedModelInfo
,
EmbedModelInfo
from
tests.models.utils
import
EmbedModelInfo
from
.mteb_utils
import
mteb_test_embed_models
from
.mteb_
embed_
utils
import
mteb_test_embed_models
MODELS
=
[
########## BertModel
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"intfloat/e5-small"
,
architecture
=
"BertModel"
,
mteb_score
=
0.742285423
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
CLSPoolingEmbedModelInfo
(
"intfloat/e5-base"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
"intfloat/e5-large"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
EmbedModelInfo
(
"intfloat/e5-base"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
EmbedModelInfo
(
"intfloat/e5-large"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
EmbedModelInfo
(
"intfloat/multilingual-e5-small"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
########## XLMRobertaModel
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"intfloat/multilingual-e5-base"
,
architecture
=
"XLMRobertaModel"
,
mteb_score
=
0.779325955
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"intfloat/multilingual-e5-large"
,
architecture
=
"XLMRobertaModel"
,
enable_test
=
False
,
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"intfloat/multilingual-e5-large-instruct"
,
architecture
=
"XLMRobertaModel"
,
enable_test
=
False
,
...
...
tests/models/language/pooling_mteb_test/test_jina.py
View file @
1ff67df1
...
...
@@ -10,30 +10,37 @@ from tests.models.language.pooling.embed_utils import (
matryoshka_fy
,
)
from
tests.models.utils
import
(
CLSPoolingEmbedModelInfo
,
CLSPoolingRerankModelInfo
,
EmbedModelInfo
,
RerankModelInfo
,
)
from
vllm
import
PoolingParams
from
.mteb_utils
import
mteb_test_embed_models
,
mteb_test_rerank_models
from
.mteb_embed_utils
import
mteb_test_embed_models
from
.mteb_score_utils
import
mteb_test_rerank_models
EMBEDDING_MODELS
=
[
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"jinaai/jina-embeddings-v3"
,
mteb_score
=
0.824413164
,
architecture
=
"XLMRobertaModel"
,
is_matryoshka
=
True
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
dtype
=
"float32"
,
)
]
RERANK_MODELS
=
[
CLSPooling
RerankModelInfo
(
RerankModelInfo
(
"jinaai/jina-reranker-v2-base-multilingual"
,
mteb_score
=
0.33643
,
architecture
=
"XLMRobertaForSequenceClassification"
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
)
]
...
...
tests/models/language/pooling_mteb_test/test_mxbai_rerank.py
View file @
1ff67df1
...
...
@@ -6,9 +6,9 @@ import pytest
import
torch
from
tests.conftest
import
HfRunner
from
tests.models.utils
import
LASTPoolingRerankModelInfo
,
RerankModelInfo
from
tests.models.utils
import
RerankModelInfo
from
.mteb_utils
import
mteb_test_rerank_models
from
.mteb_
score_
utils
import
mteb_test_rerank_models
mxbai_rerank_hf_overrides
=
{
"architectures"
:
[
"Qwen2ForSequenceClassification"
],
...
...
@@ -17,14 +17,18 @@ mxbai_rerank_hf_overrides = {
}
RERANK_MODELS
=
[
LASTPooling
RerankModelInfo
(
RerankModelInfo
(
"mixedbread-ai/mxbai-rerank-base-v2"
,
architecture
=
"Qwen2ForSequenceClassification"
,
hf_overrides
=
mxbai_rerank_hf_overrides
,
mteb_score
=
0.273
,
pooling_type
=
"LAST"
,
attn_type
=
"decoder"
,
is_prefix_caching_supported
=
True
,
is_chunked_prefill_supported
=
True
,
enable_test
=
True
,
),
LASTPooling
RerankModelInfo
(
RerankModelInfo
(
"mixedbread-ai/mxbai-rerank-large-v2"
,
architecture
=
"Qwen2ForSequenceClassification"
,
hf_overrides
=
mxbai_rerank_hf_overrides
,
...
...
tests/models/language/pooling_mteb_test/test_nemotron.py
View file @
1ff67df1
...
...
@@ -3,29 +3,39 @@
import
pytest
from
tests.models.language.pooling_mteb_test.mteb_embed_utils
import
(
mteb_test_embed_models
,
)
from
tests.models.language.pooling_mteb_test.mteb_score_utils
import
(
mteb_test_rerank_models
,
)
from
tests.models.utils
import
(
EmbedModelInfo
,
LASTPoolingEmbedModelInfo
,
LASTPoolingRerankModelInfo
,
RerankModelInfo
,
)
from
.mteb_utils
import
mteb_test_embed_models
,
mteb_test_rerank_models
EMBEDDING_MODELS
=
[
LASTPooling
EmbedModelInfo
(
EmbedModelInfo
(
"nvidia/llama-nemotron-embed-1b-v2"
,
architecture
=
"LlamaBidirectionalModel"
,
mteb_score
=
0.689164662128673
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
)
]
RERANK_MODELS
=
[
LASTPooling
RerankModelInfo
(
RerankModelInfo
(
"nvidia/llama-nemotron-rerank-1b-v2"
,
architecture
=
"LlamaBidirectionalForSequenceClassification"
,
chat_template_name
=
"nemotron-rerank.jinja"
,
mteb_score
=
0.33994
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
),
]
...
...
tests/models/language/pooling_mteb_test/test_nomic.py
View file @
1ff67df1
...
...
@@ -4,30 +4,38 @@
import
pytest
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.utils
import
CLSPoolingEmbedModelInfo
,
EmbedModelInfo
from
tests.models.utils
import
EmbedModelInfo
from
.mteb_utils
import
mteb_test_embed_models
from
.mteb_
embed_
utils
import
mteb_test_embed_models
MODELS
=
[
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"nomic-ai/nomic-embed-text-v1"
,
architecture
=
"NomicBertModel"
,
mteb_score
=
0.737568559
,
enable_test
=
True
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"nomic-ai/nomic-embed-text-v1.5"
,
architecture
=
"NomicBertModel"
,
enable_test
=
False
,
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"nomic-ai/CodeRankEmbed"
,
architecture
=
"NomicBertModel"
,
enable_test
=
False
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"nomic-ai/nomic-embed-text-v2-moe"
,
architecture
=
"NomicBertModel"
,
mteb_score
=
0.715488912
,
enable_test
=
True
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
),
]
...
...
tests/models/language/pooling_mteb_test/test_qwen3_reranker.py
View file @
1ff67df1
...
...
@@ -6,10 +6,10 @@ import pytest
import
torch
from
tests.conftest
import
HfRunner
from
tests.models.utils
import
LASTPoolingRerankModelInfo
,
RerankModelInfo
from
tests.models.utils
import
RerankModelInfo
from
tests.utils
import
multi_gpu_test
from
.mteb_utils
import
mteb_test_rerank_models
from
.mteb_
score_
utils
import
mteb_test_rerank_models
qwen3_reranker_hf_overrides
=
{
"architectures"
:
[
"Qwen3ForSequenceClassification"
],
...
...
@@ -18,14 +18,18 @@ qwen3_reranker_hf_overrides = {
}
RERANK_MODELS
=
[
LASTPooling
RerankModelInfo
(
RerankModelInfo
(
"Qwen/Qwen3-Reranker-0.6B"
,
architecture
=
"Qwen3ForSequenceClassification"
,
mteb_score
=
0.25736
,
hf_overrides
=
qwen3_reranker_hf_overrides
,
pooling_type
=
"LAST"
,
attn_type
=
"decoder"
,
is_prefix_caching_supported
=
True
,
is_chunked_prefill_supported
=
True
,
enable_test
=
True
,
),
LASTPooling
RerankModelInfo
(
RerankModelInfo
(
"Qwen/Qwen3-Reranker-4B"
,
architecture
=
"Qwen3ForSequenceClassification"
,
hf_overrides
=
qwen3_reranker_hf_overrides
,
...
...
tests/models/language/pooling_mteb_test/test_snowflake_arctic_embed.py
View file @
1ff67df1
...
...
@@ -4,62 +4,82 @@
import
pytest
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.utils
import
CLSPoolingEmbedModelInfo
,
EmbedModelInfo
from
tests.models.utils
import
EmbedModelInfo
from
.mteb_utils
import
mteb_test_embed_models
from
.mteb_
embed_
utils
import
mteb_test_embed_models
MODELS
=
[
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-xs"
,
is_matryoshka
=
False
,
architecture
=
"BertModel"
,
mteb_score
=
0.714927797
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-s"
,
is_matryoshka
=
False
,
architecture
=
"BertModel"
,
enable_test
=
False
,
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-m"
,
is_matryoshka
=
False
,
architecture
=
"BertModel"
,
enable_test
=
False
,
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-m-long"
,
is_matryoshka
=
False
,
architecture
=
"NomicBertModel"
,
mteb_score
=
0.681146831
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-l"
,
is_matryoshka
=
False
,
architecture
=
"BertModel"
,
enable_test
=
False
,
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-m-v1.5"
,
is_matryoshka
=
True
,
architecture
=
"BertModel"
,
mteb_score
=
0.649088363
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-l-v2.0"
,
is_matryoshka
=
True
,
architecture
=
"XLMRobertaModel"
,
mteb_score
=
0.712258299
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-m-v2.0"
,
is_matryoshka
=
True
,
architecture
=
"GteModel"
,
mteb_score
=
0.706622444
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
]
...
...
tests/models/language/pooling_mteb_test/test_st_projector.py
View file @
1ff67df1
...
...
@@ -3,25 +3,31 @@
import
pytest
from
tests.models.utils
import
(
CLSPoolingEmbedModelInfo
,
EmbedModelInfo
,
LASTPoolingEmbedModelInfo
,
)
from
.mteb_utils
import
mteb_test_embed_models
from
.mteb_
embed_
utils
import
mteb_test_embed_models
# ST models with projector (Dense) layers
ST_PROJECTOR_MODELS
=
[
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"TencentBAC/Conan-embedding-v1"
,
architecture
=
"BertModel"
,
mteb_score
=
0.688611955
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
),
LASTPooling
EmbedModelInfo
(
EmbedModelInfo
(
"google/embeddinggemma-300m"
,
architecture
=
"Gemma3TextModel"
,
mteb_score
=
0.7473819294684156
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
dtype
=
"float32"
,
),
...
...
tests/models/utils.py
View file @
1ff67df1
...
...
@@ -10,7 +10,7 @@ import torch
import
torch.nn.functional
as
F
from
transformers
import
PretrainedConfig
from
vllm.config.model
import
ModelConfig
,
ModelDType
,
RunnerOption
from
vllm.config.model
import
AttnTypeStr
,
ModelConfig
,
ModelDType
,
RunnerOption
from
vllm.logprobs
import
Logprob
,
PromptLogprobs
,
SampleLogprobs
from
vllm.multimodal.processing
import
InputProcessingContext
from
vllm.tokenizers
import
cached_tokenizer_from_config
...
...
@@ -375,7 +375,10 @@ class ModelInfo:
max_model_len
:
int
|
None
=
None
hf_dtype
:
str
=
"float32"
hf_overrides
:
dict
[
str
,
Any
]
|
None
=
None
default_pooling_type
:
str
=
""
pooling_type
:
str
|
None
=
None
attn_type
:
AttnTypeStr
|
None
=
None
is_prefix_caching_supported
:
bool
|
None
=
None
is_chunked_prefill_supported
:
bool
|
None
=
None
enable_test
:
bool
=
True
...
...
@@ -386,32 +389,12 @@ class EmbedModelInfo(ModelInfo):
matryoshka_dimensions
:
list
[
int
]
|
None
=
None
@
dataclass
class
CLSPoolingEmbedModelInfo
(
EmbedModelInfo
):
default_pooling_type
:
str
=
"CLS"
@
dataclass
class
LASTPoolingEmbedModelInfo
(
EmbedModelInfo
):
default_pooling_type
:
str
=
"LAST"
@
dataclass
class
RerankModelInfo
(
ModelInfo
):
mteb_score
:
float
|
None
=
None
chat_template_name
:
str
|
None
=
None
@
dataclass
class
CLSPoolingRerankModelInfo
(
RerankModelInfo
):
default_pooling_type
:
str
=
"CLS"
@
dataclass
class
LASTPoolingRerankModelInfo
(
RerankModelInfo
):
default_pooling_type
:
str
=
"LAST"
@
dataclass
class
GenerateModelInfo
(
ModelInfo
):
hf_dtype
:
str
=
"auto"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment