[CI] [Hybrid] Speed up hybrid models test by removing large models (#22563)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>

[CI] [Hybrid] Speed up hybrid models test by removing large models (#22563)
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
1bf5e1f2 · Thomas Parnell · GitHub · a6022e6f · 1bf5e1f2
Unverified Commit 1bf5e1f2 authored Aug 09, 2025 by Thomas Parnell Committed by GitHub Aug 09, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 14 deletions

tests/models/language/generation/test_hybrid.py tests/models/language/generation/test_hybrid.py +7 -14

No files found.
--- a/tests/models/language/generation/test_hybrid.py
+++ b/tests/models/language/generation/test_hybrid.py
@@ -20,7 +20,7 @@ pytestmark = pytest.mark.hybrid_model
 SSM_MODELS = [
    "state-spaces/mamba-130m-hf",
    "tiiuae/falcon-mamba-tiny-dev",
-    "mistralai/Mamba-Codestral-7B-v0.1",
+    "yujiepan/mamba2-codestral-v0.1-tiny-random",
 ]
 HYBRID_MODELS = [
@@ -29,8 +29,6 @@ HYBRID_MODELS = [
    # "pfnet/plamo-2-1b",
    "Zyphra/Zamba2-1.2B-instruct",
    "hmellor/tiny-random-BambaForCausalLM",
-    "ibm-ai-platform/Bamba-9B-v1",
-    "nvidia/Nemotron-H-8B-Base-8K",
    "ibm-granite/granite-4.0-tiny-preview",
    "tiiuae/Falcon-H1-0.5B-Base",
 ]
@@ -40,23 +38,18 @@ HF_UNSUPPORTED_MODELS = [
    # Mamba2 is buggy for Codestral as it doesn't handle n_groups, so the test
    # doesn't compare vLLM output with HF output.
    # See https://github.com/huggingface/transformers/pull/35943
-    "mistralai/Mamba-Codestral-7B-v0.1",
+    "yujiepan/mamba2-codestral-v0.1-tiny-random",
-    # Note: I'm not seeing the same output from vLLM V0 vs. HF transformers
+    # transformers 4.55 is still producing garbage for this model
-    # for Nemotron-H-8B; currently only compare vLLM V0 vs. vLLM V1
+    # TODO(tdoublep): follow-up on transformers side
-    "nvidia/Nemotron-H-8B-Base-8K",
+    "ibm-granite/granite-4.0-tiny-preview"
-    # NOTE: Currently the test fails due to HF transformers issue fixed in:
-    # https://github.com/huggingface/transformers/pull/39033
-    # We will enable vLLM test for Granite after next HF transformers release.
-    "ibm-granite/granite-4.0-tiny-preview",
 ]
 V1_SUPPORTED_MODELS = [
    "state-spaces/mamba-130m-hf",
    "ai21labs/Jamba-tiny-dev",
-    "mistralai/Mamba-Codestral-7B-v0.1",
+    "yujiepan/mamba2-codestral-v0.1-tiny-random",
-    "ibm-ai-platform/Bamba-9B-v1",
    "Zyphra/Zamba2-1.2B-instruct",
-    "nvidia/Nemotron-H-8B-Base-8K",
+    "hmellor/tiny-random-BambaForCausalLM",
    "ibm-granite/granite-4.0-tiny-preview",
    "tiiuae/Falcon-H1-0.5B-Base",
 ]