[CI][Pooling] Stabilize ModernBERT test (#32909)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>

[CI][Pooling] Stabilize ModernBERT test (#32909)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
6c006457 · Andreas Karatzas · GitHub · b781eeaa · 6c006457
Unverified Commit 6c006457 authored Jan 26, 2026 by Andreas Karatzas Committed by GitHub Jan 27, 2026
Show whitespace changes
Inline Side-by-side

Showing with 27 additions and 0 deletions

tests/models/language/pooling/test_token_classification.py tests/models/language/pooling/test_token_classification.py +27 -0

No files found.
--- a/tests/models/language/pooling/test_token_classification.py
+++ b/tests/models/language/pooling/test_token_classification.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import random
+import numpy as np
 import pytest
 import torch
 from transformers import AutoModelForTokenClassification
@@ -8,6 +11,20 @@ from tests.models.utils import softmax
 from vllm.platforms import current_platform
+@pytest.fixture(autouse=True)
+def seed_everything():
+    """Seed all random number generators for reproducibility."""
+    seed = 0
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    yield
 @pytest.mark.parametrize("model", ["boltuix/NeuroBERT-NER"])
 # The float32 is required for this tiny model to pass the test.
 @pytest.mark.parametrize("dtype", ["float"])
@@ -51,6 +68,7 @@ def test_bert_models(
 @pytest.mark.parametrize("model", ["disham993/electrical-ner-ModernBERT-base"])
 @pytest.mark.parametrize("dtype", ["float"])
+@pytest.mark.flaky(reruns=3)
 @torch.inference_mode
 def test_modernbert_models(
    hf_runner,
@@ -59,6 +77,15 @@ def test_modernbert_models(
    model: str,
    dtype: str,
 ) -> None:
+    # NOTE: https://github.com/vllm-project/vllm/pull/32403
+    # `disham993/electrical-ner-ModernBERT-base` is a randomly initialized
+    # model, which can cause numerical precision variance and edge cases.
+    # We use @flaky(reruns=3) to mitigate intermittent failures.
+    print(
+        f"\n[NOTE] Testing {model} (randomly initialized weights) - "
+        "flaky tolerance enabled due to numerical precision variance."
+    )
    with vllm_runner(model, max_model_len=None, dtype=dtype) as vllm_model:
        vllm_outputs = vllm_model.token_classify(example_prompts)