Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
db1e42f6
Unverified
Commit
db1e42f6
authored
Sep 26, 2025
by
Cyrus Leung
Committed by
GitHub
Sep 26, 2025
Browse files
[CI/Build] Fix some V1 tests not being run (#25569)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
bc9d7b55
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
95 deletions
+8
-95
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+4
-2
tests/v1/test_kv_sharing.py
tests/v1/test_kv_sharing.py
+4
-93
No files found.
.buildkite/test-pipeline.yaml
View file @
db1e42f6
...
...
@@ -300,10 +300,12 @@ steps:
-
pytest -v -s v1/spec_decode
-
pytest -v -s v1/kv_connector/unit
-
pytest -v -s v1/metrics
-
pytest -v -s v1/test_kv_sharing.py
-
pytest -v -s v1/test_metrics_reader.py
-
pytest -v -s v1/test_oracle.py
-
pytest -v -s v1/test_request.py
-
pytest -v -s v1/test_serial_utils.py
-
pytest -v -s v1/test_utils.py
-
pytest -v -s v1/test_oracle.py
-
pytest -v -s v1/test_metrics_reader.py
# Integration test for streaming correctness (requires special branch).
-
pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
-
pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
...
...
tests/v1/test_kv_sharing.py
View file @
db1e42f6
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
unittest.mock
import
Mock
import
torch
from
vllm.v1.attention.backends.flash_attn
import
(
FlashAttentionBackend
,
FlashAttentionMetadataBuilder
)
from
vllm.v1.attention.backends.flex_attention
import
(
FlexAttentionBackend
,
FlexAttentionMetadataBuilder
)
from
vllm.v1.kv_cache_interface
import
FullAttentionSpec
,
KVCacheGroupSpec
from
vllm.v1.worker.utils
import
(
AttentionGroup
,
initialize_kv_cache_for_kv_sharing
)
from
vllm.v1.worker.utils
import
add_kv_sharing_layers_to_kv_cache_groups
def
new_kv_cache_spec
():
...
...
@@ -37,56 +30,17 @@ def test_initialize_kv_cache_for_kv_sharing_different_attn_groups():
new_kv_cache_spec
()),
]
attn_groups
=
[
# KV cache group 0 has two attention groups
[
AttentionGroup
(
backend
=
FlashAttentionBackend
,
metadata_builder
=
Mock
(
spec
=
FlashAttentionMetadataBuilder
),
layer_names
=
[
"model.layers.0"
],
),
AttentionGroup
(
backend
=
FlexAttentionBackend
,
metadata_builder
=
Mock
(
spec
=
FlexAttentionMetadataBuilder
),
layer_names
=
[
"model.layers.1"
],
),
],
]
# Only layers 0 and 1 will have KV caches allocated
kv_caches
=
{
"model.layers.0"
:
torch
.
zeros
(
1
,
2
,
3
),
"model.layers.1"
:
torch
.
ones
(
1
,
2
,
3
),
}
initialize_kv_cache_for_kv_sharing
(
add_kv_sharing_layers_to_kv_cache_groups
(
shared_kv_cache_layers
=
shared_kv_cache_layers
,
kv_cache_groups
=
kv_cache_groups
,
kv_caches
=
kv_caches
,
attn_groups
=
attn_groups
,
)
# Check that the KV caches were shared correctly
assert
kv_caches
[
"model.layers.2"
].
data_ptr
(
)
==
kv_caches
[
"model.layers.0"
].
data_ptr
()
assert
kv_caches
[
"model.layers.3"
].
data_ptr
(
)
==
kv_caches
[
"model.layers.1"
].
data_ptr
()
# Check that the layers were added to the correct KV cache group
assert
len
(
kv_cache_groups
)
==
1
assert
kv_cache_groups
[
0
].
layer_names
==
[
"model.layers.0"
,
"model.layers.1"
,
"model.layers.2"
,
"model.layers.3"
]
# Check that the layers were added to the attention groups
assert
len
(
attn_groups
)
==
1
and
len
(
attn_groups
[
0
])
==
2
assert
attn_groups
[
0
][
0
].
layer_names
==
[
"model.layers.0"
,
"model.layers.2"
]
assert
attn_groups
[
0
][
1
].
layer_names
==
[
"model.layers.1"
,
"model.layers.3"
]
def
test_initialize_kv_cache_for_kv_sharing_same_attn_groups
():
"""
...
...
@@ -103,48 +57,17 @@ def test_initialize_kv_cache_for_kv_sharing_same_attn_groups():
new_kv_cache_spec
()),
]
attn_groups
=
[
# KV cache group 0 has a single attention group
# as all layers have the same flash attention backend
[
AttentionGroup
(
backend
=
FlashAttentionBackend
,
metadata_builder
=
Mock
(
spec
=
FlashAttentionMetadataBuilder
),
layer_names
=
[
"model.layers.0"
,
"model.layers.1"
],
),
],
]
kv_caches
=
{
"model.layers.0"
:
torch
.
zeros
(
1
,
2
,
3
),
"model.layers.1"
:
torch
.
ones
(
1
,
2
,
3
),
}
initialize_kv_cache_for_kv_sharing
(
add_kv_sharing_layers_to_kv_cache_groups
(
shared_kv_cache_layers
=
shared_kv_cache_layers
,
kv_cache_groups
=
kv_cache_groups
,
kv_caches
=
kv_caches
,
attn_groups
=
attn_groups
,
)
# Check that the KV caches were shared correctly
assert
kv_caches
[
"model.layers.2"
].
data_ptr
(
)
==
kv_caches
[
"model.layers.0"
].
data_ptr
()
assert
kv_caches
[
"model.layers.3"
].
data_ptr
(
)
==
kv_caches
[
"model.layers.1"
].
data_ptr
()
# Check that the layers were added to the correct KV cache group
assert
len
(
kv_cache_groups
)
==
1
assert
kv_cache_groups
[
0
].
layer_names
==
[
"model.layers.0"
,
"model.layers.1"
,
"model.layers.2"
,
"model.layers.3"
]
# Check that the layers were added to the attention groups
assert
len
(
attn_groups
)
==
1
and
len
(
attn_groups
[
0
])
==
1
assert
attn_groups
[
0
][
0
].
layer_names
==
[
"model.layers.0"
,
"model.layers.1"
,
"model.layers.2"
,
"model.layers.3"
]
def
test_initialize_kv_cache_for_kv_sharing_no_attn_groups
():
"""
...
...
@@ -162,23 +85,11 @@ def test_initialize_kv_cache_for_kv_sharing_no_attn_groups():
KVCacheGroupSpec
([
"model.layers.1"
],
new_kv_cache_spec
()),
]
kv_caches
=
{
"model.layers.0"
:
torch
.
zeros
(
1
,
2
,
3
),
"model.layers.1"
:
torch
.
ones
(
1
,
2
,
3
),
}
initialize_kv_cache_for_kv_sharing
(
add_kv_sharing_layers_to_kv_cache_groups
(
shared_kv_cache_layers
=
shared_kv_cache_layers
,
kv_cache_groups
=
kv_cache_groups
,
kv_caches
=
kv_caches
,
)
# Check that the KV caches were shared correctly
assert
kv_caches
[
"model.layers.2"
].
data_ptr
(
)
==
kv_caches
[
"model.layers.0"
].
data_ptr
()
assert
kv_caches
[
"model.layers.3"
].
data_ptr
(
)
==
kv_caches
[
"model.layers.1"
].
data_ptr
()
# Check that the layers were added to the correct KV cache group
assert
len
(
kv_cache_groups
)
==
2
assert
kv_cache_groups
[
0
].
layer_names
==
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment