Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
013b5408
Unverified
Commit
013b5408
authored
Jan 01, 2026
by
Andreas Karatzas
Committed by
GitHub
Jan 02, 2026
Browse files
[ROCm][CI] Fix ModernBERT token classification test (#31612)
Signed-off-by:
Andreas Karatzas
<
akaratza@amd.com
>
parent
5ac55eb3
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
7 deletions
+18
-7
tests/models/language/pooling/test_token_classification.py
tests/models/language/pooling/test_token_classification.py
+18
-7
No files found.
tests/models/language/pooling/test_token_classification.py
View file @
013b5408
...
...
@@ -34,8 +34,8 @@ def test_bert_models(
# check logits difference
for
hf_output
,
vllm_output
in
zip
(
hf_outputs
,
vllm_outputs
):
hf_output
=
torch
.
tensor
(
hf_output
).
cpu
().
float
()
vllm_output
=
torch
.
tensor
(
vllm_output
).
cpu
().
float
()
hf_output
=
hf_output
.
detach
().
clone
(
).
cpu
().
float
()
vllm_output
=
vllm_output
.
detach
().
clone
(
).
cpu
().
float
()
assert
torch
.
allclose
(
hf_output
,
vllm_output
,
1e-2
)
...
...
@@ -49,11 +49,22 @@ def test_modernbert_models(
model
:
str
,
dtype
:
str
,
)
->
None
:
from
vllm.platforms
import
current_platform
with
vllm_runner
(
model
,
max_model_len
=
None
,
dtype
=
dtype
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
token_classify
(
example_prompts
)
# Use eager attention on ROCm to avoid HF Transformers flash attention
# accuracy issues: https://github.com/vllm-project/vllm/issues/30167
hf_model_kwargs
=
{}
if
current_platform
.
is_rocm
():
hf_model_kwargs
[
"attn_implementation"
]
=
"eager"
with
hf_runner
(
model
,
dtype
=
dtype
,
auto_cls
=
AutoModelForTokenClassification
model
,
dtype
=
dtype
,
auto_cls
=
AutoModelForTokenClassification
,
model_kwargs
=
hf_model_kwargs
,
)
as
hf_model
:
tokenizer
=
hf_model
.
tokenizer
hf_outputs
=
[]
...
...
@@ -65,8 +76,8 @@ def test_modernbert_models(
# check logits difference
for
hf_output
,
vllm_output
in
zip
(
hf_outputs
,
vllm_outputs
):
hf_output
=
torch
.
tensor
(
hf_output
).
cpu
().
float
()
vllm_output
=
torch
.
tensor
(
vllm_output
).
cpu
().
float
()
hf_output
=
hf_output
.
detach
().
clone
(
).
cpu
().
float
()
vllm_output
=
vllm_output
.
detach
().
clone
(
).
cpu
().
float
()
assert
torch
.
allclose
(
hf_output
,
vllm_output
,
atol
=
1e-2
)
...
...
@@ -96,6 +107,6 @@ def test_auto_conversion(
# check logits difference
for
hf_output
,
vllm_output
in
zip
(
hf_outputs
,
vllm_outputs
):
hf_output
=
torch
.
tensor
(
hf_output
).
cpu
().
float
()
vllm_output
=
torch
.
tensor
(
vllm_output
).
cpu
().
float
()
hf_output
=
hf_output
.
detach
().
clone
(
).
cpu
().
float
()
vllm_output
=
vllm_output
.
detach
().
clone
(
).
cpu
().
float
()
assert
torch
.
allclose
(
hf_output
,
vllm_output
,
atol
=
1e-2
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment