Unverified Commit 013b5408 authored by Andreas Karatzas's avatar Andreas Karatzas Committed by GitHub
Browse files

[ROCm][CI] Fix ModernBERT token classification test (#31612)


Signed-off-by: default avatarAndreas Karatzas <akaratza@amd.com>
parent 5ac55eb3
...@@ -34,8 +34,8 @@ def test_bert_models( ...@@ -34,8 +34,8 @@ def test_bert_models(
# check logits difference # check logits difference
for hf_output, vllm_output in zip(hf_outputs, vllm_outputs): for hf_output, vllm_output in zip(hf_outputs, vllm_outputs):
hf_output = torch.tensor(hf_output).cpu().float() hf_output = hf_output.detach().clone().cpu().float()
vllm_output = torch.tensor(vllm_output).cpu().float() vllm_output = vllm_output.detach().clone().cpu().float()
assert torch.allclose(hf_output, vllm_output, 1e-2) assert torch.allclose(hf_output, vllm_output, 1e-2)
...@@ -49,11 +49,22 @@ def test_modernbert_models( ...@@ -49,11 +49,22 @@ def test_modernbert_models(
model: str, model: str,
dtype: str, dtype: str,
) -> None: ) -> None:
from vllm.platforms import current_platform
with vllm_runner(model, max_model_len=None, dtype=dtype) as vllm_model: with vllm_runner(model, max_model_len=None, dtype=dtype) as vllm_model:
vllm_outputs = vllm_model.token_classify(example_prompts) vllm_outputs = vllm_model.token_classify(example_prompts)
# Use eager attention on ROCm to avoid HF Transformers flash attention
# accuracy issues: https://github.com/vllm-project/vllm/issues/30167
hf_model_kwargs = {}
if current_platform.is_rocm():
hf_model_kwargs["attn_implementation"] = "eager"
with hf_runner( with hf_runner(
model, dtype=dtype, auto_cls=AutoModelForTokenClassification model,
dtype=dtype,
auto_cls=AutoModelForTokenClassification,
model_kwargs=hf_model_kwargs,
) as hf_model: ) as hf_model:
tokenizer = hf_model.tokenizer tokenizer = hf_model.tokenizer
hf_outputs = [] hf_outputs = []
...@@ -65,8 +76,8 @@ def test_modernbert_models( ...@@ -65,8 +76,8 @@ def test_modernbert_models(
# check logits difference # check logits difference
for hf_output, vllm_output in zip(hf_outputs, vllm_outputs): for hf_output, vllm_output in zip(hf_outputs, vllm_outputs):
hf_output = torch.tensor(hf_output).cpu().float() hf_output = hf_output.detach().clone().cpu().float()
vllm_output = torch.tensor(vllm_output).cpu().float() vllm_output = vllm_output.detach().clone().cpu().float()
assert torch.allclose(hf_output, vllm_output, atol=1e-2) assert torch.allclose(hf_output, vllm_output, atol=1e-2)
...@@ -96,6 +107,6 @@ def test_auto_conversion( ...@@ -96,6 +107,6 @@ def test_auto_conversion(
# check logits difference # check logits difference
for hf_output, vllm_output in zip(hf_outputs, vllm_outputs): for hf_output, vllm_output in zip(hf_outputs, vllm_outputs):
hf_output = torch.tensor(hf_output).cpu().float() hf_output = hf_output.detach().clone().cpu().float()
vllm_output = torch.tensor(vllm_output).cpu().float() vllm_output = vllm_output.detach().clone().cpu().float()
assert torch.allclose(hf_output, vllm_output, atol=1e-2) assert torch.allclose(hf_output, vllm_output, atol=1e-2)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment