[CI/Build] Fix some V1 tests not being run (#25569)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

[CI/Build] Fix some V1 tests not being run (#25569)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
db1e42f6 · Cyrus Leung · GitHub · bc9d7b55 · db1e42f6 · db1e42f6
Unverified Commit db1e42f6 authored Sep 26, 2025 by Cyrus Leung Committed by GitHub Sep 26, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 95 deletions

.buildkite/test-pipeline.yaml .buildkite/test-pipeline.yaml +4 -2

tests/v1/test_kv_sharing.py tests/v1/test_kv_sharing.py +4 -93

No files found.
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -300,10 +300,12 @@ steps:
    - pytest -v -s v1/spec_decode
    - pytest -v -s v1/kv_connector/unit
    - pytest -v -s v1/metrics
+    - pytest -v -s v1/test_kv_sharing.py
+    - pytest -v -s v1/test_metrics_reader.py
+    - pytest -v -s v1/test_oracle.py
+    - pytest -v -s v1/test_request.py
    - pytest -v -s v1/test_serial_utils.py
    - pytest -v -s v1/test_utils.py
-    - pytest -v -s v1/test_oracle.py
-    - pytest -v -s v1/test_metrics_reader.py
    # Integration test for streaming correctness (requires special branch).
    - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
    - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine

--- a/tests/v1/test_kv_sharing.py
+++ b/tests/v1/test_kv_sharing.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

-from unittest.mock import Mock
-
 import torch

-from vllm.v1.attention.backends.flash_attn import (
-    FlashAttentionBackend, FlashAttentionMetadataBuilder)
-from vllm.v1.attention.backends.flex_attention import (
-    FlexAttentionBackend, FlexAttentionMetadataBuilder)
 from vllm.v1.kv_cache_interface import FullAttentionSpec, KVCacheGroupSpec
-from vllm.v1.worker.utils import (AttentionGroup,
-                                  initialize_kv_cache_for_kv_sharing)
+from vllm.v1.worker.utils import add_kv_sharing_layers_to_kv_cache_groups


 def new_kv_cache_spec():
@@ -37,56 +30,17 @@ def test_initialize_kv_cache_for_kv_sharing_different_attn_groups():
                         new_kv_cache_spec()),
    ]

-    attn_groups = [
-        # KV cache group 0 has two attention groups
-        [
-            AttentionGroup(
-                backend=FlashAttentionBackend,
-                metadata_builder=Mock(spec=FlashAttentionMetadataBuilder),
-                layer_names=["model.layers.0"],
-            ),
-            AttentionGroup(
-                backend=FlexAttentionBackend,
-                metadata_builder=Mock(spec=FlexAttentionMetadataBuilder),
-                layer_names=["model.layers.1"],
-            ),
-        ],
-    ]
-
-    # Only layers 0 and 1 will have KV caches allocated
-    kv_caches = {
-        "model.layers.0": torch.zeros(1, 2, 3),
-        "model.layers.1": torch.ones(1, 2, 3),
-    }
-
-    initialize_kv_cache_for_kv_sharing(
+    add_kv_sharing_layers_to_kv_cache_groups(
        shared_kv_cache_layers=shared_kv_cache_layers,
        kv_cache_groups=kv_cache_groups,
-        kv_caches=kv_caches,
-        attn_groups=attn_groups,
    )

-    # Check that the KV caches were shared correctly
-    assert kv_caches["model.layers.2"].data_ptr(
-    ) == kv_caches["model.layers.0"].data_ptr()
-    assert kv_caches["model.layers.3"].data_ptr(
-    ) == kv_caches["model.layers.1"].data_ptr()
-
    # Check that the layers were added to the correct KV cache group
    assert len(kv_cache_groups) == 1
    assert kv_cache_groups[0].layer_names == [
        "model.layers.0", "model.layers.1", "model.layers.2", "model.layers.3"
    ]

-    # Check that the layers were added to the attention groups
-    assert len(attn_groups) == 1 and len(attn_groups[0]) == 2
-    assert attn_groups[0][0].layer_names == [
-        "model.layers.0", "model.layers.2"
-    ]
-    assert attn_groups[0][1].layer_names == [
-        "model.layers.1", "model.layers.3"
-    ]
-

 def test_initialize_kv_cache_for_kv_sharing_same_attn_groups():
    """
@@ -103,48 +57,17 @@ def test_initialize_kv_cache_for_kv_sharing_same_attn_groups():
                         new_kv_cache_spec()),
    ]

-    attn_groups = [
-        # KV cache group 0 has a single attention group
-        # as all layers have the same flash attention backend
-        [
-            AttentionGroup(
-                backend=FlashAttentionBackend,
-                metadata_builder=Mock(spec=FlashAttentionMetadataBuilder),
-                layer_names=["model.layers.0", "model.layers.1"],
-            ),
-        ],
-    ]
-
-    kv_caches = {
-        "model.layers.0": torch.zeros(1, 2, 3),
-        "model.layers.1": torch.ones(1, 2, 3),
-    }
-
-    initialize_kv_cache_for_kv_sharing(
+    add_kv_sharing_layers_to_kv_cache_groups(
        shared_kv_cache_layers=shared_kv_cache_layers,
        kv_cache_groups=kv_cache_groups,
-        kv_caches=kv_caches,
-        attn_groups=attn_groups,
    )

-    # Check that the KV caches were shared correctly
-    assert kv_caches["model.layers.2"].data_ptr(
-    ) == kv_caches["model.layers.0"].data_ptr()
-    assert kv_caches["model.layers.3"].data_ptr(
-    ) == kv_caches["model.layers.1"].data_ptr()
-
    # Check that the layers were added to the correct KV cache group
    assert len(kv_cache_groups) == 1
    assert kv_cache_groups[0].layer_names == [
        "model.layers.0", "model.layers.1", "model.layers.2", "model.layers.3"
    ]

-    # Check that the layers were added to the attention groups
-    assert len(attn_groups) == 1 and len(attn_groups[0]) == 1
-    assert attn_groups[0][0].layer_names == [
-        "model.layers.0", "model.layers.1", "model.layers.2", "model.layers.3"
-    ]
-

 def test_initialize_kv_cache_for_kv_sharing_no_attn_groups():
    """
@@ -162,23 +85,11 @@ def test_initialize_kv_cache_for_kv_sharing_no_attn_groups():
        KVCacheGroupSpec(["model.layers.1"], new_kv_cache_spec()),
    ]

-    kv_caches = {
-        "model.layers.0": torch.zeros(1, 2, 3),
-        "model.layers.1": torch.ones(1, 2, 3),
-    }
-
-    initialize_kv_cache_for_kv_sharing(
+    add_kv_sharing_layers_to_kv_cache_groups(
        shared_kv_cache_layers=shared_kv_cache_layers,
        kv_cache_groups=kv_cache_groups,
-        kv_caches=kv_caches,
    )

-    # Check that the KV caches were shared correctly
-    assert kv_caches["model.layers.2"].data_ptr(
-    ) == kv_caches["model.layers.0"].data_ptr()
-    assert kv_caches["model.layers.3"].data_ptr(
-    ) == kv_caches["model.layers.1"].data_ptr()
-
    # Check that the layers were added to the correct KV cache group
    assert len(kv_cache_groups) == 2
    assert kv_cache_groups[0].layer_names == [