Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a1648c40
Unverified
Commit
a1648c40
authored
Jan 08, 2026
by
Divakar Verma
Committed by
GitHub
Jan 09, 2026
Browse files
[ROCm][CI] Fix test_token_classification.py::test_bert_models (#31993)
Signed-off-by:
Divakar Verma
<
divakar.verma@amd.com
>
parent
e2d49ec2
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
4 deletions
+12
-4
tests/models/language/pooling/test_token_classification.py
tests/models/language/pooling/test_token_classification.py
+12
-4
No files found.
tests/models/language/pooling/test_token_classification.py
View file @
a1648c40
...
...
@@ -5,6 +5,7 @@ import torch
from
transformers
import
AutoModelForTokenClassification
from
tests.models.utils
import
softmax
from
vllm.platforms
import
current_platform
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"boltuix/NeuroBERT-NER"
])
...
...
@@ -21,8 +22,17 @@ def test_bert_models(
with
vllm_runner
(
model
,
max_model_len
=
None
,
dtype
=
dtype
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
token_classify
(
example_prompts
)
# Use eager attention on ROCm to avoid HF Transformers flash attention
# accuracy issues: https://github.com/vllm-project/vllm/issues/30167
hf_model_kwargs
=
{}
if
current_platform
.
is_rocm
():
hf_model_kwargs
[
"attn_implementation"
]
=
"eager"
with
hf_runner
(
model
,
dtype
=
dtype
,
auto_cls
=
AutoModelForTokenClassification
model
,
dtype
=
dtype
,
auto_cls
=
AutoModelForTokenClassification
,
model_kwargs
=
hf_model_kwargs
,
)
as
hf_model
:
tokenizer
=
hf_model
.
tokenizer
hf_outputs
=
[]
...
...
@@ -36,7 +46,7 @@ def test_bert_models(
for
hf_output
,
vllm_output
in
zip
(
hf_outputs
,
vllm_outputs
):
hf_output
=
hf_output
.
detach
().
clone
().
cpu
().
float
()
vllm_output
=
vllm_output
.
detach
().
clone
().
cpu
().
float
()
assert
torch
.
all
close
(
hf_output
,
vllm_output
,
1e-2
)
torch
.
testing
.
assert_
close
(
hf_output
,
vllm_output
,
atol
=
1.2e-2
,
rtol
=
1e-3
)
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"disham993/electrical-ner-ModernBERT-base"
])
...
...
@@ -49,8 +59,6 @@ def test_modernbert_models(
model
:
str
,
dtype
:
str
,
)
->
None
:
from
vllm.platforms
import
current_platform
with
vllm_runner
(
model
,
max_model_len
=
None
,
dtype
=
dtype
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
token_classify
(
example_prompts
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment