Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1ff67df1
Unverified
Commit
1ff67df1
authored
Dec 24, 2025
by
wang.yuqi
Committed by
GitHub
Dec 24, 2025
Browse files
[CI] Reorganization pooling_mteb_test (#31265)
Signed-off-by:
wang.yuqi
<
noooop@126.com
>
parent
7cd288a4
Changes
18
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
480 additions
and
347 deletions
+480
-347
tests/entrypoints/pooling/embed/test_correctness_mteb.py
tests/entrypoints/pooling/embed/test_correctness_mteb.py
+1
-1
tests/entrypoints/pooling/score/test_correctness_mteb.py
tests/entrypoints/pooling/score/test_correctness_mteb.py
+1
-1
tests/entrypoints/pooling/score/test_utils.py
tests/entrypoints/pooling/score/test_utils.py
+4
-5
tests/models/language/pooling_mteb_test/mteb_embed_utils.py
tests/models/language/pooling_mteb_test/mteb_embed_utils.py
+228
-0
tests/models/language/pooling_mteb_test/mteb_score_utils.py
tests/models/language/pooling_mteb_test/mteb_score_utils.py
+25
-197
tests/models/language/pooling_mteb_test/test_baai.py
tests/models/language/pooling_mteb_test/test_baai.py
+36
-32
tests/models/language/pooling_mteb_test/test_bge_reranker_v2_gemma.py
.../language/pooling_mteb_test/test_bge_reranker_v2_gemma.py
+8
-6
tests/models/language/pooling_mteb_test/test_cross_encoder.py
...s/models/language/pooling_mteb_test/test_cross_encoder.py
+11
-5
tests/models/language/pooling_mteb_test/test_gte.py
tests/models/language/pooling_mteb_test/test_gte.py
+45
-25
tests/models/language/pooling_mteb_test/test_intfloat.py
tests/models/language/pooling_mteb_test/test_intfloat.py
+17
-13
tests/models/language/pooling_mteb_test/test_jina.py
tests/models/language/pooling_mteb_test/test_jina.py
+12
-5
tests/models/language/pooling_mteb_test/test_mxbai_rerank.py
tests/models/language/pooling_mteb_test/test_mxbai_rerank.py
+8
-4
tests/models/language/pooling_mteb_test/test_nemotron.py
tests/models/language/pooling_mteb_test/test_nemotron.py
+16
-6
tests/models/language/pooling_mteb_test/test_nomic.py
tests/models/language/pooling_mteb_test/test_nomic.py
+14
-6
tests/models/language/pooling_mteb_test/test_qwen3_reranker.py
.../models/language/pooling_mteb_test/test_qwen3_reranker.py
+8
-4
tests/models/language/pooling_mteb_test/test_snowflake_arctic_embed.py
...language/pooling_mteb_test/test_snowflake_arctic_embed.py
+30
-10
tests/models/language/pooling_mteb_test/test_st_projector.py
tests/models/language/pooling_mteb_test/test_st_projector.py
+11
-5
tests/models/utils.py
tests/models/utils.py
+5
-22
No files found.
tests/entrypoints/pooling/embed/test_correctness_mteb.py
View file @
1ff67df1
...
@@ -4,7 +4,7 @@ import os
...
@@ -4,7 +4,7 @@ import os
import
pytest
import
pytest
from
tests.models.language.pooling_mteb_test.mteb_utils
import
(
from
tests.models.language.pooling_mteb_test.mteb_
embed_
utils
import
(
MTEB_EMBED_TASKS
,
MTEB_EMBED_TASKS
,
MTEB_EMBED_TOL
,
MTEB_EMBED_TOL
,
OpenAIClientMtebEncoder
,
OpenAIClientMtebEncoder
,
...
...
tests/entrypoints/pooling/score/test_correctness_mteb.py
View file @
1ff67df1
...
@@ -4,7 +4,7 @@ import os
...
@@ -4,7 +4,7 @@ import os
import
pytest
import
pytest
from
tests.models.language.pooling_mteb_test.mteb_utils
import
(
from
tests.models.language.pooling_mteb_test.mteb_
score_
utils
import
(
MTEB_RERANK_LANGS
,
MTEB_RERANK_LANGS
,
MTEB_RERANK_TASKS
,
MTEB_RERANK_TASKS
,
MTEB_RERANK_TOL
,
MTEB_RERANK_TOL
,
...
...
tests/entrypoints/pooling/score/test_utils.py
View file @
1ff67df1
...
@@ -202,11 +202,10 @@ class TestGetScorePrompt:
...
@@ -202,11 +202,10 @@ class TestGetScorePrompt:
tokenization_kwargs
,
tokenization_kwargs
,
mock_model_no_score_template
,
mock_model_no_score_template
,
):
):
# FIXME: Models implementing SupportsScoreTemplate must use their custom
# FIXME: For now, we only apply a template when one is explicitly provided.
# template implementation by default to preserve existing functionality.
# We cannot rely on the tokenizer's chat template because many models
# Attempting to use tokenizer_config.json templates would most likely break
# inherit junk templates from their base LLM, which breaks both the models
# these models, as often they just inherit the template from the original LLM.
# and the tests that use them.
# CLI --chat-template overrides are still supported.
with
(
with
(
patch
(
patch
(
"vllm.model_executor.model_loader.get_model_cls"
,
"vllm.model_executor.model_loader.get_model_cls"
,
...
...
tests/models/language/pooling_mteb_test/mteb_embed_utils.py
0 → 100644
View file @
1ff67df1
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
mteb
import
numpy
as
np
import
torch
from
mteb.models
import
ModelMeta
from
mteb.types
import
Array
from
torch.utils.data
import
DataLoader
import
tests.ci_envs
as
ci_envs
from
tests.models.utils
import
(
EmbedModelInfo
,
check_embeddings_close
,
get_vllm_extra_kwargs
,
)
# Most embedding models on the STS12 task (See #17175):
# - Model implementation and minor changes in tensor dtype
# results in differences less than 1e-4
# - Different model results in differences more than 1e-3
# 1e-4 is a good tolerance threshold
MTEB_EMBED_TASKS
=
[
"STS12"
]
MTEB_EMBED_TOL
=
1e-4
_empty_model_meta
=
ModelMeta
(
loader
=
None
,
name
=
"vllm/model"
,
revision
=
"1"
,
release_date
=
None
,
languages
=
None
,
framework
=
[],
similarity_fn_name
=
None
,
n_parameters
=
None
,
memory_usage_mb
=
None
,
max_tokens
=
None
,
embed_dim
=
None
,
license
=
None
,
open_weights
=
None
,
public_training_code
=
None
,
public_training_data
=
None
,
use_instructions
=
None
,
training_datasets
=
None
,
modalities
=
[
"text"
],
# 'image' can be added to evaluate multimodal models
)
class
MtebEmbedMixin
(
mteb
.
EncoderProtocol
):
mteb_model_meta
=
_empty_model_meta
def
similarity
(
self
,
embeddings1
:
np
.
ndarray
,
embeddings2
:
np
.
ndarray
,
)
->
np
.
ndarray
:
# Cosine similarity
norm1
=
np
.
linalg
.
norm
(
embeddings1
,
axis
=
1
,
keepdims
=
True
)
norm2
=
np
.
linalg
.
norm
(
embeddings2
,
axis
=
1
,
keepdims
=
True
)
sim
=
np
.
dot
(
embeddings1
,
embeddings2
.
T
)
/
(
norm1
*
norm2
.
T
)
return
sim
def
similarity_pairwise
(
self
,
embeddings1
:
Array
,
embeddings2
:
Array
,
)
->
Array
:
# Cosine similarity
norm1
=
np
.
linalg
.
norm
(
embeddings1
,
axis
=
1
,
keepdims
=
True
)
norm2
=
np
.
linalg
.
norm
(
embeddings2
,
axis
=
1
,
keepdims
=
True
)
sim
=
np
.
sum
(
embeddings1
*
embeddings2
,
axis
=
1
)
/
(
norm1
.
flatten
()
*
norm2
.
flatten
()
)
return
sim
class
VllmMtebEncoder
(
MtebEmbedMixin
):
def
__init__
(
self
,
vllm_model
):
self
.
llm
=
vllm_model
self
.
rng
=
np
.
random
.
default_rng
(
seed
=
42
)
def
encode
(
self
,
inputs
:
DataLoader
[
mteb
.
types
.
BatchedInput
],
*
args
,
**
kwargs
,
)
->
np
.
ndarray
:
# Hoping to discover potential scheduling
# issues by randomizing the order.
sentences
=
[
text
for
batch
in
inputs
for
text
in
batch
[
"text"
]]
r
=
self
.
rng
.
permutation
(
len
(
sentences
))
sentences
=
[
sentences
[
i
]
for
i
in
r
]
outputs
=
self
.
llm
.
embed
(
sentences
,
use_tqdm
=
False
)
embeds
=
np
.
array
(
outputs
)
embeds
=
embeds
[
np
.
argsort
(
r
)]
return
embeds
class
OpenAIClientMtebEncoder
(
MtebEmbedMixin
):
def
__init__
(
self
,
model_name
:
str
,
client
):
self
.
model_name
=
model_name
self
.
client
=
client
self
.
rng
=
np
.
random
.
default_rng
(
seed
=
42
)
def
encode
(
self
,
inputs
:
DataLoader
[
mteb
.
types
.
BatchedInput
],
*
args
,
**
kwargs
,
)
->
np
.
ndarray
:
# Hoping to discover potential scheduling
# issues by randomizing the order.
sentences
=
[
text
for
batch
in
inputs
for
text
in
batch
[
"text"
]]
r
=
self
.
rng
.
permutation
(
len
(
sentences
))
sentences
=
[
sentences
[
i
]
for
i
in
r
]
embeddings
=
self
.
client
.
embeddings
.
create
(
model
=
self
.
model_name
,
input
=
sentences
)
outputs
=
[
d
.
embedding
for
d
in
embeddings
.
data
]
embeds
=
np
.
array
(
outputs
)
embeds
=
embeds
[
np
.
argsort
(
r
)]
return
embeds
def
run_mteb_embed_task
(
encoder
:
mteb
.
EncoderProtocol
,
tasks
):
tasks
=
mteb
.
get_tasks
(
tasks
=
tasks
)
results
=
mteb
.
evaluate
(
encoder
,
tasks
,
cache
=
None
,
show_progress_bar
=
False
,
)
main_score
=
results
[
0
].
scores
[
"test"
][
0
][
"main_score"
]
return
main_score
def
mteb_test_embed_models
(
hf_runner
,
vllm_runner
,
model_info
:
EmbedModelInfo
,
vllm_extra_kwargs
=
None
,
hf_model_callback
=
None
,
atol
=
MTEB_EMBED_TOL
,
):
vllm_extra_kwargs
=
get_vllm_extra_kwargs
(
model_info
,
vllm_extra_kwargs
)
# Test embed_dims, isnan and whether to use normalize
example_prompts
=
[
"The chef prepared a delicious meal."
*
1000
]
with
vllm_runner
(
model_info
.
name
,
runner
=
"pooling"
,
max_model_len
=
model_info
.
max_model_len
,
**
vllm_extra_kwargs
,
)
as
vllm_model
:
model_config
=
vllm_model
.
llm
.
llm_engine
.
model_config
# Confirm whether vllm is using the correct architecture
if
model_info
.
architecture
:
assert
model_info
.
architecture
in
model_config
.
architectures
# Confirm whether the important configs in model_config are correct.
if
model_info
.
pooling_type
is
not
None
:
assert
model_config
.
pooler_config
.
pooling_type
==
model_info
.
pooling_type
if
model_info
.
attn_type
is
not
None
:
assert
model_config
.
attn_type
==
model_info
.
attn_type
if
model_info
.
is_prefix_caching_supported
is
not
None
:
assert
(
model_config
.
is_prefix_caching_supported
==
model_info
.
is_prefix_caching_supported
)
if
model_info
.
is_chunked_prefill_supported
is
not
None
:
assert
(
model_config
.
is_chunked_prefill_supported
==
model_info
.
is_chunked_prefill_supported
)
vllm_main_score
=
run_mteb_embed_task
(
VllmMtebEncoder
(
vllm_model
),
MTEB_EMBED_TASKS
)
vllm_dtype
=
vllm_model
.
llm
.
llm_engine
.
model_config
.
dtype
head_dtype
=
model_config
.
head_dtype
# Test embedding_size, isnan and whether to use normalize
vllm_outputs
=
vllm_model
.
embed
(
example_prompts
,
truncate_prompt_tokens
=-
1
)
outputs_tensor
=
torch
.
tensor
(
vllm_outputs
)
assert
not
torch
.
any
(
torch
.
isnan
(
outputs_tensor
))
embedding_size
=
model_config
.
embedding_size
assert
torch
.
tensor
(
vllm_outputs
).
shape
[
-
1
]
==
embedding_size
# Accelerate mteb test by setting
# SentenceTransformers mteb score to a constant
if
model_info
.
mteb_score
is
None
:
with
hf_runner
(
model_info
.
name
,
is_sentence_transformer
=
True
,
dtype
=
ci_envs
.
VLLM_CI_HF_DTYPE
or
model_info
.
hf_dtype
,
)
as
hf_model
:
# e.g. setting default parameters for the encode method of hf_runner
if
hf_model_callback
is
not
None
:
hf_model_callback
(
hf_model
)
st_main_score
=
run_mteb_embed_task
(
hf_model
,
MTEB_EMBED_TASKS
)
st_dtype
=
next
(
hf_model
.
model
.
parameters
()).
dtype
# Check embeddings close to hf outputs
hf_outputs
=
hf_model
.
encode
(
example_prompts
)
check_embeddings_close
(
embeddings_0_lst
=
hf_outputs
,
embeddings_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
tol
=
1e-2
,
)
else
:
st_main_score
=
model_info
.
mteb_score
st_dtype
=
"Constant"
print
(
"Model:"
,
model_info
.
name
)
print
(
"VLLM:"
,
f
"dtype:
{
vllm_dtype
}
"
,
f
"head_dtype:
{
head_dtype
}
"
,
vllm_main_score
)
print
(
"SentenceTransformers:"
,
st_dtype
,
st_main_score
)
print
(
"Difference:"
,
st_main_score
-
vllm_main_score
)
# We are not concerned that the vllm mteb results are better
# than SentenceTransformers, so we only perform one-sided testing.
assert
st_main_score
-
vllm_main_score
<
atol
tests/models/language/pooling_mteb_test/mteb_utils.py
→
tests/models/language/pooling_mteb_test/mteb_
score_
utils.py
View file @
1ff67df1
...
@@ -7,37 +7,24 @@ from pathlib import Path
...
@@ -7,37 +7,24 @@ from pathlib import Path
import
mteb
import
mteb
import
numpy
as
np
import
numpy
as
np
import
requests
import
requests
import
torch
from
mteb.models
import
ModelMeta
from
mteb.models
import
ModelMeta
from
mteb.types
import
Array
from
torch.utils.data
import
DataLoader
from
torch.utils.data
import
DataLoader
import
tests.ci_envs
as
ci_envs
from
tests.models.utils
import
(
from
tests.models.utils
import
(
EmbedModelInfo
,
RerankModelInfo
,
RerankModelInfo
,
check_embeddings_close
,
get_vllm_extra_kwargs
,
get_vllm_extra_kwargs
,
)
)
template_home
=
(
Path
(
__file__
).
parent
.
parent
.
parent
.
parent
.
parent
/
"examples/pooling/score/template"
)
# Most embedding models on the STS12 task (See #17175):
# - Model implementation and minor changes in tensor dtype
# results in differences less than 1e-4
# - Different model results in differences more than 1e-3
# 1e-4 is a good tolerance threshold
MTEB_EMBED_TASKS
=
[
"STS12"
]
MTEB_EMBED_TOL
=
1e-4
# See #19344
# See #19344
MTEB_RERANK_TASKS
=
[
"NFCorpus"
]
MTEB_RERANK_TASKS
=
[
"NFCorpus"
]
MTEB_RERANK_LANGS
=
[
"eng"
]
MTEB_RERANK_LANGS
=
[
"eng"
]
MTEB_RERANK_TOL
=
2e-3
MTEB_RERANK_TOL
=
2e-3
template_home
=
(
Path
(
__file__
).
parent
.
parent
.
parent
.
parent
.
parent
/
"examples/pooling/score/template"
)
_empty_model_meta
=
ModelMeta
(
_empty_model_meta
=
ModelMeta
(
loader
=
None
,
loader
=
None
,
name
=
"vllm/model"
,
name
=
"vllm/model"
,
...
@@ -60,84 +47,11 @@ _empty_model_meta = ModelMeta(
...
@@ -60,84 +47,11 @@ _empty_model_meta = ModelMeta(
)
)
class
Vllm
MtebEncoder
(
mteb
.
EncoderProtocol
):
class
Mteb
Cross
Encoder
Mixin
(
mteb
.
Cross
EncoderProtocol
):
mteb_model_meta
=
_empty_model_meta
mteb_model_meta
=
_empty_model_meta
def
__init__
(
self
,
vllm_model
):
self
.
llm
=
vllm_model
self
.
rng
=
np
.
random
.
default_rng
(
seed
=
42
)
def
encode
(
self
,
inputs
:
DataLoader
[
mteb
.
types
.
BatchedInput
],
*
args
,
**
kwargs
,
)
->
np
.
ndarray
:
# Hoping to discover potential scheduling
# issues by randomizing the order.
sentences
=
[
text
for
batch
in
inputs
for
text
in
batch
[
"text"
]]
r
=
self
.
rng
.
permutation
(
len
(
sentences
))
sentences
=
[
sentences
[
i
]
for
i
in
r
]
outputs
=
self
.
llm
.
embed
(
sentences
,
use_tqdm
=
False
)
embeds
=
np
.
array
(
outputs
)
embeds
=
embeds
[
np
.
argsort
(
r
)]
return
embeds
def
similarity
(
self
,
embeddings1
:
np
.
ndarray
,
embeddings2
:
np
.
ndarray
,
)
->
np
.
ndarray
:
# Cosine similarity
norm1
=
np
.
linalg
.
norm
(
embeddings1
,
axis
=
1
,
keepdims
=
True
)
norm2
=
np
.
linalg
.
norm
(
embeddings2
,
axis
=
1
,
keepdims
=
True
)
sim
=
np
.
dot
(
embeddings1
,
embeddings2
.
T
)
/
(
norm1
*
norm2
.
T
)
return
sim
def
similarity_pairwise
(
self
,
embeddings1
:
Array
,
embeddings2
:
Array
,
)
->
Array
:
# Cosine similarity
norm1
=
np
.
linalg
.
norm
(
embeddings1
,
axis
=
1
,
keepdims
=
True
)
norm2
=
np
.
linalg
.
norm
(
embeddings2
,
axis
=
1
,
keepdims
=
True
)
sim
=
np
.
sum
(
embeddings1
*
embeddings2
,
axis
=
1
)
/
(
norm1
.
flatten
()
*
norm2
.
flatten
()
)
return
sim
class
OpenAIClientMtebEncoder
(
VllmMtebEncoder
):
def
__init__
(
self
,
model_name
:
str
,
client
):
self
.
model_name
=
model_name
self
.
client
=
client
self
.
rng
=
np
.
random
.
default_rng
(
seed
=
42
)
def
encode
(
self
,
inputs
:
DataLoader
[
mteb
.
types
.
BatchedInput
],
*
args
,
**
kwargs
,
)
->
np
.
ndarray
:
# Hoping to discover potential scheduling
# issues by randomizing the order.
sentences
=
[
text
for
batch
in
inputs
for
text
in
batch
[
"text"
]]
r
=
self
.
rng
.
permutation
(
len
(
sentences
))
sentences
=
[
sentences
[
i
]
for
i
in
r
]
embeddings
=
self
.
client
.
embeddings
.
create
(
model
=
self
.
model_name
,
input
=
sentences
)
outputs
=
[
d
.
embedding
for
d
in
embeddings
.
data
]
embeds
=
np
.
array
(
outputs
)
embeds
=
embeds
[
np
.
argsort
(
r
)]
return
embeds
class
VllmMtebCrossEncoder
(
mteb
.
CrossEncoderProtocol
):
mteb_model_meta
=
_empty_model_meta
class
VllmMtebCrossEncoder
(
MtebCrossEncoderMixin
):
def
__init__
(
self
,
vllm_model
):
def
__init__
(
self
,
vllm_model
):
self
.
llm
=
vllm_model
self
.
llm
=
vllm_model
self
.
rng
=
np
.
random
.
default_rng
(
seed
=
42
)
self
.
rng
=
np
.
random
.
default_rng
(
seed
=
42
)
...
@@ -164,7 +78,7 @@ class VllmMtebCrossEncoder(mteb.CrossEncoderProtocol):
...
@@ -164,7 +78,7 @@ class VllmMtebCrossEncoder(mteb.CrossEncoderProtocol):
return
scores
return
scores
class
ScoreClientMtebEncoder
(
m
teb
.
CrossEncoder
Protocol
):
class
ScoreClientMtebEncoder
(
M
tebCrossEncoder
Mixin
):
mteb_model_meta
=
_empty_model_meta
mteb_model_meta
=
_empty_model_meta
def
__init__
(
self
,
model_name
:
str
,
url
):
def
__init__
(
self
,
model_name
:
str
,
url
):
...
@@ -216,102 +130,6 @@ class RerankClientMtebEncoder(ScoreClientMtebEncoder):
...
@@ -216,102 +130,6 @@ class RerankClientMtebEncoder(ScoreClientMtebEncoder):
return
response
[
"results"
][
0
][
"relevance_score"
]
return
response
[
"results"
][
0
][
"relevance_score"
]
def
run_mteb_embed_task
(
encoder
:
mteb
.
EncoderProtocol
,
tasks
):
tasks
=
mteb
.
get_tasks
(
tasks
=
tasks
)
results
=
mteb
.
evaluate
(
encoder
,
tasks
,
cache
=
None
,
show_progress_bar
=
False
,
)
main_score
=
results
[
0
].
scores
[
"test"
][
0
][
"main_score"
]
return
main_score
def
mteb_test_embed_models
(
hf_runner
,
vllm_runner
,
model_info
:
EmbedModelInfo
,
vllm_extra_kwargs
=
None
,
hf_model_callback
=
None
,
atol
=
MTEB_EMBED_TOL
,
):
vllm_extra_kwargs
=
get_vllm_extra_kwargs
(
model_info
,
vllm_extra_kwargs
)
# Test embed_dims, isnan and whether to use normalize
example_prompts
=
[
"The chef prepared a delicious meal."
*
1000
]
with
vllm_runner
(
model_info
.
name
,
runner
=
"pooling"
,
max_model_len
=
model_info
.
max_model_len
,
**
vllm_extra_kwargs
,
)
as
vllm_model
:
model_config
=
vllm_model
.
llm
.
llm_engine
.
model_config
# Confirm whether vllm is using the correct architecture
if
model_info
.
architecture
:
assert
model_info
.
architecture
in
model_config
.
architectures
# Confirm whether vllm uses the correct default_pooling_type, which
# relates to whether chunked prefill and prefix caching are enabled
assert
(
model_config
.
_model_info
.
default_pooling_type
==
model_info
.
default_pooling_type
)
vllm_main_score
=
run_mteb_embed_task
(
VllmMtebEncoder
(
vllm_model
),
MTEB_EMBED_TASKS
)
vllm_dtype
=
vllm_model
.
llm
.
llm_engine
.
model_config
.
dtype
head_dtype
=
model_config
.
head_dtype
# Test embedding_size, isnan and whether to use normalize
vllm_outputs
=
vllm_model
.
embed
(
example_prompts
,
truncate_prompt_tokens
=-
1
)
outputs_tensor
=
torch
.
tensor
(
vllm_outputs
)
assert
not
torch
.
any
(
torch
.
isnan
(
outputs_tensor
))
embedding_size
=
model_config
.
embedding_size
assert
torch
.
tensor
(
vllm_outputs
).
shape
[
-
1
]
==
embedding_size
# Accelerate mteb test by setting
# SentenceTransformers mteb score to a constant
if
model_info
.
mteb_score
is
None
:
with
hf_runner
(
model_info
.
name
,
is_sentence_transformer
=
True
,
dtype
=
ci_envs
.
VLLM_CI_HF_DTYPE
or
model_info
.
hf_dtype
,
)
as
hf_model
:
# e.g. setting default parameters for the encode method of hf_runner
if
hf_model_callback
is
not
None
:
hf_model_callback
(
hf_model
)
st_main_score
=
run_mteb_embed_task
(
hf_model
,
MTEB_EMBED_TASKS
)
st_dtype
=
next
(
hf_model
.
model
.
parameters
()).
dtype
# Check embeddings close to hf outputs
hf_outputs
=
hf_model
.
encode
(
example_prompts
)
check_embeddings_close
(
embeddings_0_lst
=
hf_outputs
,
embeddings_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
tol
=
1e-2
,
)
else
:
st_main_score
=
model_info
.
mteb_score
st_dtype
=
"Constant"
print
(
"Model:"
,
model_info
.
name
)
print
(
"VLLM:"
,
f
"dtype:
{
vllm_dtype
}
"
,
f
"head_dtype:
{
head_dtype
}
"
,
vllm_main_score
)
print
(
"SentenceTransformers:"
,
st_dtype
,
st_main_score
)
print
(
"Difference:"
,
st_main_score
-
vllm_main_score
)
# We are not concerned that the vllm mteb results are better
# than SentenceTransformers, so we only perform one-sided testing.
assert
st_main_score
-
vllm_main_score
<
atol
def
run_mteb_rerank
(
cross_encoder
:
mteb
.
CrossEncoderProtocol
,
tasks
,
languages
):
def
run_mteb_rerank
(
cross_encoder
:
mteb
.
CrossEncoderProtocol
,
tasks
,
languages
):
with
tempfile
.
TemporaryDirectory
()
as
prediction_folder
:
with
tempfile
.
TemporaryDirectory
()
as
prediction_folder
:
bm25s
=
mteb
.
get_model
(
"bm25s"
)
bm25s
=
mteb
.
get_model
(
"bm25s"
)
...
@@ -391,18 +209,28 @@ def mteb_test_rerank_models(
...
@@ -391,18 +209,28 @@ def mteb_test_rerank_models(
# Score API is only enabled for num_labels == 1
# Score API is only enabled for num_labels == 1
assert
model_config
.
hf_config
.
num_labels
==
1
assert
model_config
.
hf_config
.
num_labels
==
1
# Confirm whether vllm uses the correct default_pooling_type, which
# Maybe load chat_template.
# relates to whether chunked prefill and prefix caching are enabled
assert
(
model_config
.
_model_info
.
default_pooling_type
==
model_info
.
default_pooling_type
)
chat_template
:
str
|
None
=
None
chat_template
:
str
|
None
=
None
if
model_info
.
chat_template_name
is
not
None
:
if
model_info
.
chat_template_name
is
not
None
:
chat_template
=
(
template_home
/
model_info
.
chat_template_name
).
read_text
()
chat_template
=
(
template_home
/
model_info
.
chat_template_name
).
read_text
()
vllm_model
.
chat_template
=
chat_template
vllm_model
.
chat_template
=
chat_template
# Confirm whether the important configs in model_config are correct.
if
model_info
.
pooling_type
is
not
None
:
assert
model_config
.
pooler_config
.
pooling_type
==
model_info
.
pooling_type
if
model_info
.
attn_type
is
not
None
:
assert
model_config
.
attn_type
==
model_info
.
attn_type
if
model_info
.
is_prefix_caching_supported
is
not
None
:
assert
(
model_config
.
is_prefix_caching_supported
==
model_info
.
is_prefix_caching_supported
)
if
model_info
.
is_chunked_prefill_supported
is
not
None
:
assert
(
model_config
.
is_chunked_prefill_supported
==
model_info
.
is_chunked_prefill_supported
)
vllm_main_score
=
run_mteb_rerank
(
vllm_main_score
=
run_mteb_rerank
(
vllm_mteb_encoder
(
vllm_model
),
vllm_mteb_encoder
(
vllm_model
),
tasks
=
MTEB_RERANK_TASKS
,
tasks
=
MTEB_RERANK_TASKS
,
...
...
tests/models/language/pooling_mteb_test/test_baai.py
View file @
1ff67df1
...
@@ -4,90 +4,94 @@ import pytest
...
@@ -4,90 +4,94 @@ import pytest
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.utils
import
(
from
tests.models.utils
import
(
CLSPoolingEmbedModelInfo
,
CLSPoolingRerankModelInfo
,
EmbedModelInfo
,
EmbedModelInfo
,
LASTPoolingEmbedModelInfo
,
RerankModelInfo
,
RerankModelInfo
,
)
)
from
.mteb_utils
import
mteb_test_embed_models
,
mteb_test_rerank_models
from
.mteb_embed_utils
import
mteb_test_embed_models
from
.mteb_score_utils
import
mteb_test_rerank_models
MODELS
=
[
MODELS
=
[
########## BertModel
########## BertModel
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-base-en"
,
"BAAI/bge-base-en"
,
architecture
=
"BertModel"
,
architecture
=
"BertModel"
,
mteb_score
=
0.779336792
,
mteb_score
=
0.779336792
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
CLSPoolingEmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-base-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
"BAAI/bge-base-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
EmbedModelInfo
(
"BAAI/bge-small-en"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
),
EmbedModelInfo
(
"BAAI/bge-small-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-large-en"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
"BAAI/bge-small-en"
,
architecture
=
"BertModel"
,
enable_test
=
False
EmbedModelInfo
(
"BAAI/bge-large-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
),
EmbedModelInfo
(
CLSPoolingEmbedModelInfo
(
"BAAI/bge-small-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
"BAAI/bge-large-en"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
"BAAI/bge-large-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
"BAAI/bge-large-zh-noinstruct"
,
architecture
=
"BertModel"
,
enable_test
=
False
"BAAI/bge-large-zh-noinstruct"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-base-en-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
"BAAI/bge-base-en-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-base-zh-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
"BAAI/bge-base-zh-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-small-en-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
"BAAI/bge-small-en-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-small-zh-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
"BAAI/bge-small-zh-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-large-en-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
"BAAI/bge-large-en-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-large-zh-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
"BAAI/bge-large-zh-v1.5"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
),
########## XLMRobertaModel
########## XLMRobertaModel
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-m3"
,
"BAAI/bge-m3"
,
architecture
=
"XLMRobertaModel"
,
architecture
=
"XLMRobertaModel"
,
mteb_score
=
0.787343078
,
mteb_score
=
0.787343078
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
########## Qwen2Model
########## Qwen2Model
LASTPooling
EmbedModelInfo
(
EmbedModelInfo
(
"BAAI/bge-code-v1"
,
"BAAI/bge-code-v1"
,
architecture
=
"Qwen2Model"
,
architecture
=
"Qwen2Model"
,
mteb_score
=
0.75724465
,
mteb_score
=
0.75724465
,
dtype
=
"float32"
,
dtype
=
"float32"
,
pooling_type
=
"LAST"
,
attn_type
=
"decoder"
,
is_prefix_caching_supported
=
True
,
is_chunked_prefill_supported
=
True
,
enable_test
=
True
,
enable_test
=
True
,
),
),
]
]
RERANK_MODELS
=
[
RERANK_MODELS
=
[
########## XLMRobertaForSequenceClassification
########## XLMRobertaForSequenceClassification
CLSPooling
RerankModelInfo
(
RerankModelInfo
(
"BAAI/bge-reranker-base"
,
"BAAI/bge-reranker-base"
,
architecture
=
"XLMRobertaForSequenceClassification"
,
architecture
=
"XLMRobertaForSequenceClassification"
,
mteb_score
=
0.32398
,
mteb_score
=
0.32398
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
CLSPooling
RerankModelInfo
(
RerankModelInfo
(
"BAAI/bge-reranker-large"
,
"BAAI/bge-reranker-large"
,
architecture
=
"XLMRobertaForSequenceClassification"
,
architecture
=
"XLMRobertaForSequenceClassification"
,
enable_test
=
False
,
enable_test
=
False
,
),
),
CLSPooling
RerankModelInfo
(
RerankModelInfo
(
"BAAI/bge-reranker-v2-m3"
,
"BAAI/bge-reranker-v2-m3"
,
architecture
=
"XLMRobertaForSequenceClassification"
,
architecture
=
"XLMRobertaForSequenceClassification"
,
enable_test
=
False
,
enable_test
=
False
,
...
...
tests/models/language/pooling_mteb_test/test_bge_reranker_v2_gemma.py
View file @
1ff67df1
...
@@ -9,14 +9,12 @@ import torch
...
@@ -9,14 +9,12 @@ import torch
from
torch.utils.data
import
DataLoader
from
torch.utils.data
import
DataLoader
from
tests.conftest
import
HfRunner
from
tests.conftest
import
HfRunner
from
tests.models.language.pooling_mteb_test.mteb_utils
import
(
from
tests.models.utils
import
RerankModelInfo
VllmMtebCrossEncoder
,
mteb_test_rerank_models
,
from
.mteb_score_utils
import
VllmMtebCrossEncoder
,
mteb_test_rerank_models
)
from
tests.models.utils
import
LASTPoolingRerankModelInfo
,
RerankModelInfo
RERANK_MODELS
=
[
RERANK_MODELS
=
[
LASTPooling
RerankModelInfo
(
RerankModelInfo
(
"BAAI/bge-reranker-v2-gemma"
,
"BAAI/bge-reranker-v2-gemma"
,
architecture
=
"GemmaForSequenceClassification"
,
architecture
=
"GemmaForSequenceClassification"
,
mteb_score
=
0.33757
,
mteb_score
=
0.33757
,
...
@@ -25,6 +23,10 @@ RERANK_MODELS = [
...
@@ -25,6 +23,10 @@ RERANK_MODELS = [
"classifier_from_token"
:
[
"Yes"
],
"classifier_from_token"
:
[
"Yes"
],
"method"
:
"no_post_processing"
,
"method"
:
"no_post_processing"
,
},
},
pooling_type
=
"LAST"
,
attn_type
=
"decoder"
,
is_prefix_caching_supported
=
True
,
is_chunked_prefill_supported
=
True
,
),
),
]
]
...
...
tests/models/language/pooling_mteb_test/test_cross_encoder.py
View file @
1ff67df1
...
@@ -3,23 +3,29 @@
...
@@ -3,23 +3,29 @@
import
pytest
import
pytest
from
tests.models.utils
import
(
from
tests.models.utils
import
(
CLSPoolingRerankModelInfo
,
LASTPoolingRerankModelInfo
,
RerankModelInfo
,
RerankModelInfo
,
)
)
from
.mteb_utils
import
mteb_test_rerank_models
from
.mteb_
score_
utils
import
mteb_test_rerank_models
RERANK_MODELS
=
[
RERANK_MODELS
=
[
CLSPooling
RerankModelInfo
(
RerankModelInfo
(
"cross-encoder/ms-marco-TinyBERT-L-2-v2"
,
"cross-encoder/ms-marco-TinyBERT-L-2-v2"
,
mteb_score
=
0.32898
,
mteb_score
=
0.32898
,
architecture
=
"BertForSequenceClassification"
,
architecture
=
"BertForSequenceClassification"
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
),
),
LASTPooling
RerankModelInfo
(
RerankModelInfo
(
"tomaarsen/Qwen3-Reranker-0.6B-seq-cls"
,
"tomaarsen/Qwen3-Reranker-0.6B-seq-cls"
,
mteb_score
=
0.25736
,
mteb_score
=
0.25736
,
architecture
=
"Qwen3ForSequenceClassification"
,
architecture
=
"Qwen3ForSequenceClassification"
,
pooling_type
=
"LAST"
,
attn_type
=
"decoder"
,
is_prefix_caching_supported
=
True
,
is_chunked_prefill_supported
=
True
,
),
),
]
]
...
...
tests/models/language/pooling_mteb_test/test_gte.py
View file @
1ff67df1
...
@@ -5,36 +5,32 @@ import pytest
...
@@ -5,36 +5,32 @@ import pytest
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.utils
import
(
from
tests.models.utils
import
(
CLSPoolingEmbedModelInfo
,
CLSPoolingRerankModelInfo
,
EmbedModelInfo
,
EmbedModelInfo
,
LASTPoolingEmbedModelInfo
,
RerankModelInfo
,
RerankModelInfo
,
)
)
from
.mteb_utils
import
mteb_test_embed_models
,
mteb_test_rerank_models
from
.mteb_embed_utils
import
mteb_test_embed_models
from
.mteb_score_utils
import
mteb_test_rerank_models
MODELS
=
[
MODELS
=
[
########## BertModel
########## BertModel
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"thenlper/gte-large"
,
"thenlper/gte-large"
,
mteb_score
=
0.76807651
,
mteb_score
=
0.76807651
,
architecture
=
"BertModel"
,
architecture
=
"BertModel"
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
CLSPoolingEmbedModelInfo
(
EmbedModelInfo
(
"thenlper/gte-base"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
"thenlper/gte-base"
,
architecture
=
"BertModel"
,
enable_test
=
False
EmbedModelInfo
(
"thenlper/gte-small"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
),
EmbedModelInfo
(
CLSPoolingEmbedModelInfo
(
"thenlper/gte-small"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
"thenlper/gte-large-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
"thenlper/gte-large-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
),
CLSPoolingEmbedModelInfo
(
EmbedModelInfo
(
"thenlper/gte-base-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
"thenlper/gte-base-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
EmbedModelInfo
(
),
CLSPoolingEmbedModelInfo
(
"thenlper/gte-small-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
"thenlper/gte-small-zh"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
),
########### NewModel
########### NewModel
...
@@ -43,48 +39,64 @@ MODELS = [
...
@@ -43,48 +39,64 @@ MODELS = [
# - whether to use token_type_embeddings
# - whether to use token_type_embeddings
# - whether to use context expansion
# - whether to use context expansion
# So only test one (the most widely used) model
# So only test one (the most widely used) model
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Alibaba-NLP/gte-multilingual-base"
,
"Alibaba-NLP/gte-multilingual-base"
,
architecture
=
"GteNewModel"
,
architecture
=
"GteNewModel"
,
mteb_score
=
0.775074696
,
mteb_score
=
0.775074696
,
hf_overrides
=
{
"architectures"
:
[
"GteNewModel"
]},
hf_overrides
=
{
"architectures"
:
[
"GteNewModel"
]},
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Alibaba-NLP/gte-base-en-v1.5"
,
"Alibaba-NLP/gte-base-en-v1.5"
,
architecture
=
"GteNewModel"
,
architecture
=
"GteNewModel"
,
hf_overrides
=
{
"architectures"
:
[
"GteNewModel"
]},
hf_overrides
=
{
"architectures"
:
[
"GteNewModel"
]},
enable_test
=
False
,
enable_test
=
False
,
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Alibaba-NLP/gte-large-en-v1.5"
,
"Alibaba-NLP/gte-large-en-v1.5"
,
architecture
=
"GteNewModel"
,
architecture
=
"GteNewModel"
,
hf_overrides
=
{
"architectures"
:
[
"GteNewModel"
]},
hf_overrides
=
{
"architectures"
:
[
"GteNewModel"
]},
enable_test
=
False
,
enable_test
=
False
,
),
),
########### Qwen2ForCausalLM
########### Qwen2ForCausalLM
LASTPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Alibaba-NLP/gte-Qwen2-1.5B-instruct"
,
"Alibaba-NLP/gte-Qwen2-1.5B-instruct"
,
mteb_score
=
0.758473459018872
,
mteb_score
=
0.758473459018872
,
architecture
=
"Qwen2ForCausalLM"
,
architecture
=
"Qwen2ForCausalLM"
,
pooling_type
=
"LAST"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
########## ModernBertModel
########## ModernBertModel
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Alibaba-NLP/gte-modernbert-base"
,
"Alibaba-NLP/gte-modernbert-base"
,
mteb_score
=
0.748193353
,
mteb_score
=
0.748193353
,
architecture
=
"ModernBertModel"
,
architecture
=
"ModernBertModel"
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
########## Qwen3ForCausalLM
########## Qwen3ForCausalLM
LASTPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Qwen/Qwen3-Embedding-0.6B"
,
"Qwen/Qwen3-Embedding-0.6B"
,
mteb_score
=
0.771163695
,
mteb_score
=
0.771163695
,
architecture
=
"Qwen3ForCausalLM"
,
architecture
=
"Qwen3ForCausalLM"
,
dtype
=
"float32"
,
dtype
=
"float32"
,
pooling_type
=
"LAST"
,
attn_type
=
"decoder"
,
is_prefix_caching_supported
=
True
,
is_chunked_prefill_supported
=
True
,
enable_test
=
True
,
enable_test
=
True
,
),
),
LASTPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Qwen/Qwen3-Embedding-4B"
,
"Qwen/Qwen3-Embedding-4B"
,
architecture
=
"Qwen3ForCausalLM"
,
architecture
=
"Qwen3ForCausalLM"
,
dtype
=
"float32"
,
dtype
=
"float32"
,
...
@@ -93,18 +105,26 @@ MODELS = [
...
@@ -93,18 +105,26 @@ MODELS = [
]
]
RERANK_MODELS
=
[
RERANK_MODELS
=
[
CLSPooling
RerankModelInfo
(
RerankModelInfo
(
# classifier_pooling: mean
# classifier_pooling: mean
"Alibaba-NLP/gte-reranker-modernbert-base"
,
"Alibaba-NLP/gte-reranker-modernbert-base"
,
mteb_score
=
0.33386
,
mteb_score
=
0.33386
,
architecture
=
"ModernBertForSequenceClassification"
,
architecture
=
"ModernBertForSequenceClassification"
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
CLSPooling
RerankModelInfo
(
RerankModelInfo
(
"Alibaba-NLP/gte-multilingual-reranker-base"
,
"Alibaba-NLP/gte-multilingual-reranker-base"
,
mteb_score
=
0.33062
,
mteb_score
=
0.33062
,
architecture
=
"GteNewForSequenceClassification"
,
architecture
=
"GteNewForSequenceClassification"
,
hf_overrides
=
{
"architectures"
:
[
"GteNewForSequenceClassification"
]},
hf_overrides
=
{
"architectures"
:
[
"GteNewForSequenceClassification"
]},
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
]
]
...
...
tests/models/language/pooling_mteb_test/test_intfloat.py
View file @
1ff67df1
...
@@ -3,40 +3,44 @@
...
@@ -3,40 +3,44 @@
import
pytest
import
pytest
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.utils
import
CLSPoolingEmbedModelInfo
,
EmbedModelInfo
from
tests.models.utils
import
EmbedModelInfo
from
.mteb_utils
import
mteb_test_embed_models
from
.mteb_
embed_
utils
import
mteb_test_embed_models
MODELS
=
[
MODELS
=
[
########## BertModel
########## BertModel
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"intfloat/e5-small"
,
"intfloat/e5-small"
,
architecture
=
"BertModel"
,
architecture
=
"BertModel"
,
mteb_score
=
0.742285423
,
mteb_score
=
0.742285423
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
CLSPoolingEmbedModelInfo
(
EmbedModelInfo
(
"intfloat/e5-base"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
"intfloat/e5-base"
,
architecture
=
"BertModel"
,
enable_test
=
False
EmbedModelInfo
(
"intfloat/e5-large"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
),
EmbedModelInfo
(
CLSPoolingEmbedModelInfo
(
"intfloat/e5-large"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
CLSPoolingEmbedModelInfo
(
"intfloat/multilingual-e5-small"
,
architecture
=
"BertModel"
,
enable_test
=
False
"intfloat/multilingual-e5-small"
,
architecture
=
"BertModel"
,
enable_test
=
False
),
),
########## XLMRobertaModel
########## XLMRobertaModel
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"intfloat/multilingual-e5-base"
,
"intfloat/multilingual-e5-base"
,
architecture
=
"XLMRobertaModel"
,
architecture
=
"XLMRobertaModel"
,
mteb_score
=
0.779325955
,
mteb_score
=
0.779325955
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"intfloat/multilingual-e5-large"
,
"intfloat/multilingual-e5-large"
,
architecture
=
"XLMRobertaModel"
,
architecture
=
"XLMRobertaModel"
,
enable_test
=
False
,
enable_test
=
False
,
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"intfloat/multilingual-e5-large-instruct"
,
"intfloat/multilingual-e5-large-instruct"
,
architecture
=
"XLMRobertaModel"
,
architecture
=
"XLMRobertaModel"
,
enable_test
=
False
,
enable_test
=
False
,
...
...
tests/models/language/pooling_mteb_test/test_jina.py
View file @
1ff67df1
...
@@ -10,30 +10,37 @@ from tests.models.language.pooling.embed_utils import (
...
@@ -10,30 +10,37 @@ from tests.models.language.pooling.embed_utils import (
matryoshka_fy
,
matryoshka_fy
,
)
)
from
tests.models.utils
import
(
from
tests.models.utils
import
(
CLSPoolingEmbedModelInfo
,
CLSPoolingRerankModelInfo
,
EmbedModelInfo
,
EmbedModelInfo
,
RerankModelInfo
,
RerankModelInfo
,
)
)
from
vllm
import
PoolingParams
from
vllm
import
PoolingParams
from
.mteb_utils
import
mteb_test_embed_models
,
mteb_test_rerank_models
from
.mteb_embed_utils
import
mteb_test_embed_models
from
.mteb_score_utils
import
mteb_test_rerank_models
EMBEDDING_MODELS
=
[
EMBEDDING_MODELS
=
[
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"jinaai/jina-embeddings-v3"
,
"jinaai/jina-embeddings-v3"
,
mteb_score
=
0.824413164
,
mteb_score
=
0.824413164
,
architecture
=
"XLMRobertaModel"
,
architecture
=
"XLMRobertaModel"
,
is_matryoshka
=
True
,
is_matryoshka
=
True
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
dtype
=
"float32"
,
dtype
=
"float32"
,
)
)
]
]
RERANK_MODELS
=
[
RERANK_MODELS
=
[
CLSPooling
RerankModelInfo
(
RerankModelInfo
(
"jinaai/jina-reranker-v2-base-multilingual"
,
"jinaai/jina-reranker-v2-base-multilingual"
,
mteb_score
=
0.33643
,
mteb_score
=
0.33643
,
architecture
=
"XLMRobertaForSequenceClassification"
,
architecture
=
"XLMRobertaForSequenceClassification"
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
)
)
]
]
...
...
tests/models/language/pooling_mteb_test/test_mxbai_rerank.py
View file @
1ff67df1
...
@@ -6,9 +6,9 @@ import pytest
...
@@ -6,9 +6,9 @@ import pytest
import
torch
import
torch
from
tests.conftest
import
HfRunner
from
tests.conftest
import
HfRunner
from
tests.models.utils
import
LASTPoolingRerankModelInfo
,
RerankModelInfo
from
tests.models.utils
import
RerankModelInfo
from
.mteb_utils
import
mteb_test_rerank_models
from
.mteb_
score_
utils
import
mteb_test_rerank_models
mxbai_rerank_hf_overrides
=
{
mxbai_rerank_hf_overrides
=
{
"architectures"
:
[
"Qwen2ForSequenceClassification"
],
"architectures"
:
[
"Qwen2ForSequenceClassification"
],
...
@@ -17,14 +17,18 @@ mxbai_rerank_hf_overrides = {
...
@@ -17,14 +17,18 @@ mxbai_rerank_hf_overrides = {
}
}
RERANK_MODELS
=
[
RERANK_MODELS
=
[
LASTPooling
RerankModelInfo
(
RerankModelInfo
(
"mixedbread-ai/mxbai-rerank-base-v2"
,
"mixedbread-ai/mxbai-rerank-base-v2"
,
architecture
=
"Qwen2ForSequenceClassification"
,
architecture
=
"Qwen2ForSequenceClassification"
,
hf_overrides
=
mxbai_rerank_hf_overrides
,
hf_overrides
=
mxbai_rerank_hf_overrides
,
mteb_score
=
0.273
,
mteb_score
=
0.273
,
pooling_type
=
"LAST"
,
attn_type
=
"decoder"
,
is_prefix_caching_supported
=
True
,
is_chunked_prefill_supported
=
True
,
enable_test
=
True
,
enable_test
=
True
,
),
),
LASTPooling
RerankModelInfo
(
RerankModelInfo
(
"mixedbread-ai/mxbai-rerank-large-v2"
,
"mixedbread-ai/mxbai-rerank-large-v2"
,
architecture
=
"Qwen2ForSequenceClassification"
,
architecture
=
"Qwen2ForSequenceClassification"
,
hf_overrides
=
mxbai_rerank_hf_overrides
,
hf_overrides
=
mxbai_rerank_hf_overrides
,
...
...
tests/models/language/pooling_mteb_test/test_nemotron.py
View file @
1ff67df1
...
@@ -3,29 +3,39 @@
...
@@ -3,29 +3,39 @@
import
pytest
import
pytest
from
tests.models.language.pooling_mteb_test.mteb_embed_utils
import
(
mteb_test_embed_models
,
)
from
tests.models.language.pooling_mteb_test.mteb_score_utils
import
(
mteb_test_rerank_models
,
)
from
tests.models.utils
import
(
from
tests.models.utils
import
(
EmbedModelInfo
,
EmbedModelInfo
,
LASTPoolingEmbedModelInfo
,
LASTPoolingRerankModelInfo
,
RerankModelInfo
,
RerankModelInfo
,
)
)
from
.mteb_utils
import
mteb_test_embed_models
,
mteb_test_rerank_models
EMBEDDING_MODELS
=
[
EMBEDDING_MODELS
=
[
LASTPooling
EmbedModelInfo
(
EmbedModelInfo
(
"nvidia/llama-nemotron-embed-1b-v2"
,
"nvidia/llama-nemotron-embed-1b-v2"
,
architecture
=
"LlamaBidirectionalModel"
,
architecture
=
"LlamaBidirectionalModel"
,
mteb_score
=
0.689164662128673
,
mteb_score
=
0.689164662128673
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
)
)
]
]
RERANK_MODELS
=
[
RERANK_MODELS
=
[
LASTPooling
RerankModelInfo
(
RerankModelInfo
(
"nvidia/llama-nemotron-rerank-1b-v2"
,
"nvidia/llama-nemotron-rerank-1b-v2"
,
architecture
=
"LlamaBidirectionalForSequenceClassification"
,
architecture
=
"LlamaBidirectionalForSequenceClassification"
,
chat_template_name
=
"nemotron-rerank.jinja"
,
chat_template_name
=
"nemotron-rerank.jinja"
,
mteb_score
=
0.33994
,
mteb_score
=
0.33994
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
),
),
]
]
...
...
tests/models/language/pooling_mteb_test/test_nomic.py
View file @
1ff67df1
...
@@ -4,30 +4,38 @@
...
@@ -4,30 +4,38 @@
import
pytest
import
pytest
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.utils
import
CLSPoolingEmbedModelInfo
,
EmbedModelInfo
from
tests.models.utils
import
EmbedModelInfo
from
.mteb_utils
import
mteb_test_embed_models
from
.mteb_
embed_
utils
import
mteb_test_embed_models
MODELS
=
[
MODELS
=
[
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"nomic-ai/nomic-embed-text-v1"
,
"nomic-ai/nomic-embed-text-v1"
,
architecture
=
"NomicBertModel"
,
architecture
=
"NomicBertModel"
,
mteb_score
=
0.737568559
,
mteb_score
=
0.737568559
,
enable_test
=
True
,
enable_test
=
True
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"nomic-ai/nomic-embed-text-v1.5"
,
"nomic-ai/nomic-embed-text-v1.5"
,
architecture
=
"NomicBertModel"
,
architecture
=
"NomicBertModel"
,
enable_test
=
False
,
enable_test
=
False
,
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"nomic-ai/CodeRankEmbed"
,
architecture
=
"NomicBertModel"
,
enable_test
=
False
"nomic-ai/CodeRankEmbed"
,
architecture
=
"NomicBertModel"
,
enable_test
=
False
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"nomic-ai/nomic-embed-text-v2-moe"
,
"nomic-ai/nomic-embed-text-v2-moe"
,
architecture
=
"NomicBertModel"
,
architecture
=
"NomicBertModel"
,
mteb_score
=
0.715488912
,
mteb_score
=
0.715488912
,
enable_test
=
True
,
enable_test
=
True
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
),
),
]
]
...
...
tests/models/language/pooling_mteb_test/test_qwen3_reranker.py
View file @
1ff67df1
...
@@ -6,10 +6,10 @@ import pytest
...
@@ -6,10 +6,10 @@ import pytest
import
torch
import
torch
from
tests.conftest
import
HfRunner
from
tests.conftest
import
HfRunner
from
tests.models.utils
import
LASTPoolingRerankModelInfo
,
RerankModelInfo
from
tests.models.utils
import
RerankModelInfo
from
tests.utils
import
multi_gpu_test
from
tests.utils
import
multi_gpu_test
from
.mteb_utils
import
mteb_test_rerank_models
from
.mteb_
score_
utils
import
mteb_test_rerank_models
qwen3_reranker_hf_overrides
=
{
qwen3_reranker_hf_overrides
=
{
"architectures"
:
[
"Qwen3ForSequenceClassification"
],
"architectures"
:
[
"Qwen3ForSequenceClassification"
],
...
@@ -18,14 +18,18 @@ qwen3_reranker_hf_overrides = {
...
@@ -18,14 +18,18 @@ qwen3_reranker_hf_overrides = {
}
}
RERANK_MODELS
=
[
RERANK_MODELS
=
[
LASTPooling
RerankModelInfo
(
RerankModelInfo
(
"Qwen/Qwen3-Reranker-0.6B"
,
"Qwen/Qwen3-Reranker-0.6B"
,
architecture
=
"Qwen3ForSequenceClassification"
,
architecture
=
"Qwen3ForSequenceClassification"
,
mteb_score
=
0.25736
,
mteb_score
=
0.25736
,
hf_overrides
=
qwen3_reranker_hf_overrides
,
hf_overrides
=
qwen3_reranker_hf_overrides
,
pooling_type
=
"LAST"
,
attn_type
=
"decoder"
,
is_prefix_caching_supported
=
True
,
is_chunked_prefill_supported
=
True
,
enable_test
=
True
,
enable_test
=
True
,
),
),
LASTPooling
RerankModelInfo
(
RerankModelInfo
(
"Qwen/Qwen3-Reranker-4B"
,
"Qwen/Qwen3-Reranker-4B"
,
architecture
=
"Qwen3ForSequenceClassification"
,
architecture
=
"Qwen3ForSequenceClassification"
,
hf_overrides
=
qwen3_reranker_hf_overrides
,
hf_overrides
=
qwen3_reranker_hf_overrides
,
...
...
tests/models/language/pooling_mteb_test/test_snowflake_arctic_embed.py
View file @
1ff67df1
...
@@ -4,62 +4,82 @@
...
@@ -4,62 +4,82 @@
import
pytest
import
pytest
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.language.pooling.embed_utils
import
correctness_test_embed_models
from
tests.models.utils
import
CLSPoolingEmbedModelInfo
,
EmbedModelInfo
from
tests.models.utils
import
EmbedModelInfo
from
.mteb_utils
import
mteb_test_embed_models
from
.mteb_
embed_
utils
import
mteb_test_embed_models
MODELS
=
[
MODELS
=
[
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-xs"
,
"Snowflake/snowflake-arctic-embed-xs"
,
is_matryoshka
=
False
,
is_matryoshka
=
False
,
architecture
=
"BertModel"
,
architecture
=
"BertModel"
,
mteb_score
=
0.714927797
,
mteb_score
=
0.714927797
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-s"
,
"Snowflake/snowflake-arctic-embed-s"
,
is_matryoshka
=
False
,
is_matryoshka
=
False
,
architecture
=
"BertModel"
,
architecture
=
"BertModel"
,
enable_test
=
False
,
enable_test
=
False
,
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-m"
,
"Snowflake/snowflake-arctic-embed-m"
,
is_matryoshka
=
False
,
is_matryoshka
=
False
,
architecture
=
"BertModel"
,
architecture
=
"BertModel"
,
enable_test
=
False
,
enable_test
=
False
,
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-m-long"
,
"Snowflake/snowflake-arctic-embed-m-long"
,
is_matryoshka
=
False
,
is_matryoshka
=
False
,
architecture
=
"NomicBertModel"
,
architecture
=
"NomicBertModel"
,
mteb_score
=
0.681146831
,
mteb_score
=
0.681146831
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-l"
,
"Snowflake/snowflake-arctic-embed-l"
,
is_matryoshka
=
False
,
is_matryoshka
=
False
,
architecture
=
"BertModel"
,
architecture
=
"BertModel"
,
enable_test
=
False
,
enable_test
=
False
,
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-m-v1.5"
,
"Snowflake/snowflake-arctic-embed-m-v1.5"
,
is_matryoshka
=
True
,
is_matryoshka
=
True
,
architecture
=
"BertModel"
,
architecture
=
"BertModel"
,
mteb_score
=
0.649088363
,
mteb_score
=
0.649088363
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-l-v2.0"
,
"Snowflake/snowflake-arctic-embed-l-v2.0"
,
is_matryoshka
=
True
,
is_matryoshka
=
True
,
architecture
=
"XLMRobertaModel"
,
architecture
=
"XLMRobertaModel"
,
mteb_score
=
0.712258299
,
mteb_score
=
0.712258299
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"Snowflake/snowflake-arctic-embed-m-v2.0"
,
"Snowflake/snowflake-arctic-embed-m-v2.0"
,
is_matryoshka
=
True
,
is_matryoshka
=
True
,
architecture
=
"GteModel"
,
architecture
=
"GteModel"
,
mteb_score
=
0.706622444
,
mteb_score
=
0.706622444
,
pooling_type
=
"CLS"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
]
]
...
...
tests/models/language/pooling_mteb_test/test_st_projector.py
View file @
1ff67df1
...
@@ -3,25 +3,31 @@
...
@@ -3,25 +3,31 @@
import
pytest
import
pytest
from
tests.models.utils
import
(
from
tests.models.utils
import
(
CLSPoolingEmbedModelInfo
,
EmbedModelInfo
,
EmbedModelInfo
,
LASTPoolingEmbedModelInfo
,
)
)
from
.mteb_utils
import
mteb_test_embed_models
from
.mteb_
embed_
utils
import
mteb_test_embed_models
# ST models with projector (Dense) layers
# ST models with projector (Dense) layers
ST_PROJECTOR_MODELS
=
[
ST_PROJECTOR_MODELS
=
[
CLSPooling
EmbedModelInfo
(
EmbedModelInfo
(
"TencentBAC/Conan-embedding-v1"
,
"TencentBAC/Conan-embedding-v1"
,
architecture
=
"BertModel"
,
architecture
=
"BertModel"
,
mteb_score
=
0.688611955
,
mteb_score
=
0.688611955
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
),
),
LASTPooling
EmbedModelInfo
(
EmbedModelInfo
(
"google/embeddinggemma-300m"
,
"google/embeddinggemma-300m"
,
architecture
=
"Gemma3TextModel"
,
architecture
=
"Gemma3TextModel"
,
mteb_score
=
0.7473819294684156
,
mteb_score
=
0.7473819294684156
,
pooling_type
=
"MEAN"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
enable_test
=
True
,
dtype
=
"float32"
,
dtype
=
"float32"
,
),
),
...
...
tests/models/utils.py
View file @
1ff67df1
...
@@ -10,7 +10,7 @@ import torch
...
@@ -10,7 +10,7 @@ import torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.config.model
import
ModelConfig
,
ModelDType
,
RunnerOption
from
vllm.config.model
import
AttnTypeStr
,
ModelConfig
,
ModelDType
,
RunnerOption
from
vllm.logprobs
import
Logprob
,
PromptLogprobs
,
SampleLogprobs
from
vllm.logprobs
import
Logprob
,
PromptLogprobs
,
SampleLogprobs
from
vllm.multimodal.processing
import
InputProcessingContext
from
vllm.multimodal.processing
import
InputProcessingContext
from
vllm.tokenizers
import
cached_tokenizer_from_config
from
vllm.tokenizers
import
cached_tokenizer_from_config
...
@@ -375,7 +375,10 @@ class ModelInfo:
...
@@ -375,7 +375,10 @@ class ModelInfo:
max_model_len
:
int
|
None
=
None
max_model_len
:
int
|
None
=
None
hf_dtype
:
str
=
"float32"
hf_dtype
:
str
=
"float32"
hf_overrides
:
dict
[
str
,
Any
]
|
None
=
None
hf_overrides
:
dict
[
str
,
Any
]
|
None
=
None
default_pooling_type
:
str
=
""
pooling_type
:
str
|
None
=
None
attn_type
:
AttnTypeStr
|
None
=
None
is_prefix_caching_supported
:
bool
|
None
=
None
is_chunked_prefill_supported
:
bool
|
None
=
None
enable_test
:
bool
=
True
enable_test
:
bool
=
True
...
@@ -386,32 +389,12 @@ class EmbedModelInfo(ModelInfo):
...
@@ -386,32 +389,12 @@ class EmbedModelInfo(ModelInfo):
matryoshka_dimensions
:
list
[
int
]
|
None
=
None
matryoshka_dimensions
:
list
[
int
]
|
None
=
None
@
dataclass
class
CLSPoolingEmbedModelInfo
(
EmbedModelInfo
):
default_pooling_type
:
str
=
"CLS"
@
dataclass
class
LASTPoolingEmbedModelInfo
(
EmbedModelInfo
):
default_pooling_type
:
str
=
"LAST"
@
dataclass
@
dataclass
class
RerankModelInfo
(
ModelInfo
):
class
RerankModelInfo
(
ModelInfo
):
mteb_score
:
float
|
None
=
None
mteb_score
:
float
|
None
=
None
chat_template_name
:
str
|
None
=
None
chat_template_name
:
str
|
None
=
None
@
dataclass
class
CLSPoolingRerankModelInfo
(
RerankModelInfo
):
default_pooling_type
:
str
=
"CLS"
@
dataclass
class
LASTPoolingRerankModelInfo
(
RerankModelInfo
):
default_pooling_type
:
str
=
"LAST"
@
dataclass
@
dataclass
class
GenerateModelInfo
(
ModelInfo
):
class
GenerateModelInfo
(
ModelInfo
):
hf_dtype
:
str
=
"auto"
hf_dtype
:
str
=
"auto"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment