Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4429d934
Unverified
Commit
4429d934
authored
Dec 15, 2025
by
wang.yuqi
Committed by
GitHub
Dec 15, 2025
Browse files
[Model] Automatic conversion of TokenClassification model (#30666)
Signed-off-by:
wang.yuqi
<
yuqi.wang@daocloud.io
>
parent
33278073
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
45 additions
and
0 deletions
+45
-0
tests/models/language/pooling/test_token_classification.py
tests/models/language/pooling/test_token_classification.py
+31
-0
tests/models/registry.py
tests/models/registry.py
+1
-0
vllm/config/model.py
vllm/config/model.py
+1
-0
vllm/model_executor/models/adapters.py
vllm/model_executor/models/adapters.py
+12
-0
No files found.
tests/models/language/pooling/test_token_classification.py
View file @
4429d934
...
@@ -68,3 +68,34 @@ def test_modernbert_models(
...
@@ -68,3 +68,34 @@ def test_modernbert_models(
hf_output
=
torch
.
tensor
(
hf_output
).
cpu
().
float
()
hf_output
=
torch
.
tensor
(
hf_output
).
cpu
().
float
()
vllm_output
=
torch
.
tensor
(
vllm_output
).
cpu
().
float
()
vllm_output
=
torch
.
tensor
(
vllm_output
).
cpu
().
float
()
assert
torch
.
allclose
(
hf_output
,
vllm_output
,
atol
=
1e-2
)
assert
torch
.
allclose
(
hf_output
,
vllm_output
,
atol
=
1e-2
)
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"bd2lcco/Qwen3-0.6B-finetuned"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
@
torch
.
inference_mode
def
test_auto_conversion
(
hf_runner
,
vllm_runner
,
example_prompts
,
model
:
str
,
dtype
:
str
,
)
->
None
:
with
vllm_runner
(
model
,
max_model_len
=
1024
,
dtype
=
dtype
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
token_classify
(
example_prompts
)
with
hf_runner
(
model
,
dtype
=
dtype
,
auto_cls
=
AutoModelForTokenClassification
)
as
hf_model
:
tokenizer
=
hf_model
.
tokenizer
hf_outputs
=
[]
for
prompt
in
example_prompts
:
inputs
=
tokenizer
([
prompt
],
return_tensors
=
"pt"
)
inputs
=
hf_model
.
wrap_device
(
inputs
)
output
=
hf_model
.
model
(
**
inputs
)
hf_outputs
.
append
(
softmax
(
output
.
logits
[
0
]))
# check logits difference
for
hf_output
,
vllm_output
in
zip
(
hf_outputs
,
vllm_outputs
):
hf_output
=
torch
.
tensor
(
hf_output
).
cpu
().
float
()
vllm_output
=
torch
.
tensor
(
vllm_output
).
cpu
().
float
()
assert
torch
.
allclose
(
hf_output
,
vllm_output
,
atol
=
1e-2
)
tests/models/registry.py
View file @
4429d934
...
@@ -573,6 +573,7 @@ _AUTOMATIC_CONVERTED_MODELS = {
...
@@ -573,6 +573,7 @@ _AUTOMATIC_CONVERTED_MODELS = {
"Qwen3ForSequenceClassification"
:
_HfExamplesInfo
(
"Qwen3ForSequenceClassification"
:
_HfExamplesInfo
(
"tomaarsen/Qwen3-Reranker-0.6B-seq-cls"
"tomaarsen/Qwen3-Reranker-0.6B-seq-cls"
),
),
"Qwen3ForTokenClassification"
:
_HfExamplesInfo
(
"bd2lcco/Qwen3-0.6B-finetuned"
),
}
}
_MULTIMODAL_EXAMPLE_MODELS
=
{
_MULTIMODAL_EXAMPLE_MODELS
=
{
...
...
vllm/config/model.py
View file @
4429d934
...
@@ -1796,6 +1796,7 @@ _SUFFIX_TO_DEFAULTS: list[tuple[str, tuple[RunnerType, ConvertType]]] = [
...
@@ -1796,6 +1796,7 @@ _SUFFIX_TO_DEFAULTS: list[tuple[str, tuple[RunnerType, ConvertType]]] = [
(
"ForTextEncoding"
,
(
"pooling"
,
"embed"
)),
(
"ForTextEncoding"
,
(
"pooling"
,
"embed"
)),
(
"EmbeddingModel"
,
(
"pooling"
,
"embed"
)),
(
"EmbeddingModel"
,
(
"pooling"
,
"embed"
)),
(
"ForSequenceClassification"
,
(
"pooling"
,
"classify"
)),
(
"ForSequenceClassification"
,
(
"pooling"
,
"classify"
)),
(
"ForTokenClassification"
,
(
"pooling"
,
"classify"
)),
(
"ForAudioClassification"
,
(
"pooling"
,
"classify"
)),
(
"ForAudioClassification"
,
(
"pooling"
,
"classify"
)),
(
"ForImageClassification"
,
(
"pooling"
,
"classify"
)),
(
"ForImageClassification"
,
(
"pooling"
,
"classify"
)),
(
"ForVideoClassification"
,
(
"pooling"
,
"classify"
)),
(
"ForVideoClassification"
,
(
"pooling"
,
"classify"
)),
...
...
vllm/model_executor/models/adapters.py
View file @
4429d934
...
@@ -337,6 +337,18 @@ def as_seq_cls_model(cls: _T) -> _T:
...
@@ -337,6 +337,18 @@ def as_seq_cls_model(cls: _T) -> _T:
tokens
=
getattr
(
text_config
,
"classifier_from_token"
,
None
)
tokens
=
getattr
(
text_config
,
"classifier_from_token"
,
None
)
method
=
getattr
(
text_config
,
"method"
,
None
)
method
=
getattr
(
text_config
,
"method"
,
None
)
def
auto_set_score_bias
(
weights
):
for
name
,
weight
in
weights
:
if
name
==
"score.bias"
:
device
=
self
.
score
.
weight
.
device
dtype
=
self
.
score
.
weight
.
dtype
bias
=
weight
.
to
(
device
).
to
(
dtype
)
self
.
score
.
bias
=
torch
.
nn
.
Parameter
(
bias
)
self
.
score
.
skip_bias_add
=
False
else
:
yield
name
,
weight
weights
=
auto_set_score_bias
(
weights
)
if
tokens
is
None
and
method
is
None
:
if
tokens
is
None
and
method
is
None
:
return
super
().
load_weights
(
weights
)
return
super
().
load_weights
(
weights
)
else
:
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment