[CI/Build] Add new CI job to validate Hybrid Models for every PR (#20147)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>

[CI/Build] Add new CI job to validate Hybrid Models for every PR (#20147)
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
8615d977 · Thomas Parnell · GitHub · 7b460c25 · 8615d977 · 8615d977
Unverified Commit 8615d977 authored Jun 28, 2025 by Thomas Parnell Committed by GitHub Jun 27, 2025
Showing with 16 additions and 1 deletion

.buildkite/test-pipeline.yaml .buildkite/test-pipeline.yaml +12 -1

pyproject.toml pyproject.toml +1 -0

tests/models/language/generation/test_hybrid.py tests/models/language/generation/test_hybrid.py +3 -0

No files found.
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -536,6 +536,17 @@ steps:
    - pip freeze | grep -E 'torch'
    - pytest -v -s models/language -m core_model

+- label: Language Models Test (Hybrid) # 35 min
+  mirror_hardwares: [amdexperimental]
+  torch_nightly: true
+  source_file_dependencies:
+  - vllm/
+  - tests/models/language/generation
+  commands:
+    # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
+    - pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
+    - pytest -v -s models/language/generation -m hybrid_model
+
 - label: Language Models Test (Extended Generation) # 1hr20min
  mirror_hardwares: [amdexperimental]
  optional: true
@@ -545,7 +556,7 @@ steps:
  commands:
    # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
    - pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
-    - pytest -v -s models/language/generation -m 'not core_model'
+    - pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'

 - label: Language Models Test (Extended Pooling)  # 36min
  mirror_hardwares: [amdexperimental]

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -150,6 +150,7 @@ skip_gitignore = true
 markers = [
    "skip_global_cleanup",
    "core_model: enable this model test in each PR instead of only nightly",
+    "hybrid_model: models that contain mamba layers (including pure SSM and hybrid architectures)",
    "cpu_model: enable this model test in CPU tests",
    "split: run this test as part of a split",
    "distributed: run this test only in distributed GPU tests",

--- a/tests/models/language/generation/test_hybrid.py
+++ b/tests/models/language/generation/test_hybrid.py
@@ -9,6 +9,9 @@ from vllm.sampling_params import SamplingParams

 from ...utils import check_logprobs_close, check_outputs_equal

+# Mark all tests as hybrid
+pytestmark = pytest.mark.hybrid_model
+
 # NOTE: The first model in each list is taken as the primary model,
 # meaning that it will be used in all tests in this file
 # The rest of the models will only be tested by test_models