Unverified Commit 33ef1941 authored by Nicolò Lucchesi's avatar Nicolò Lucchesi Committed by GitHub
Browse files

[Bugfix][CI] Fix...


[Bugfix][CI] Fix `v1/kv_connector/unit/test_nixl_connector_hma.py::test_fewer_blocks_with_hma` (#40597)
Signed-off-by: default avatarNickLucche <nlucches@redhat.com>
parent a4905133
...@@ -2,9 +2,11 @@ ...@@ -2,9 +2,11 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for NixlConnectorScheduler with HMA and Mamba N-1 prefill.""" """Unit tests for NixlConnectorScheduler with HMA and Mamba N-1 prefill."""
import gc
from unittest.mock import patch from unittest.mock import patch
import pytest import pytest
import torch
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.config import KVTransferConfig from vllm.config import KVTransferConfig
...@@ -196,12 +198,13 @@ def test_fewer_blocks_with_hma(monkeypatch, model_name, sw_size): ...@@ -196,12 +198,13 @@ def test_fewer_blocks_with_hma(monkeypatch, model_name, sw_size):
llm_kwargs = { llm_kwargs = {
"model": model_name, "model": model_name,
"enforce_eager": True, "enforce_eager": True,
"gpu_memory_utilization": 0.47, "gpu_memory_utilization": 0.3,
"kv_transfer_config": kv_transfer_config, "kv_transfer_config": kv_transfer_config,
"max_model_len": 2048, "max_model_len": 2048,
"max_num_seqs": 1,
# NOTE: Make sure HMA is enabled # NOTE: Make sure HMA is enabled
"disable_hybrid_kv_cache_manager": False, "disable_hybrid_kv_cache_manager": False,
"max_num_batched_tokens": 1024, "max_num_batched_tokens": 2048,
"enable_prefix_caching": False, "enable_prefix_caching": False,
"block_size": block_size, "block_size": block_size,
} }
...@@ -248,6 +251,8 @@ def test_fewer_blocks_with_hma(monkeypatch, model_name, sw_size): ...@@ -248,6 +251,8 @@ def test_fewer_blocks_with_hma(monkeypatch, model_name, sw_size):
assert len(group_block_ids) == expected_num_remote_blocks assert len(group_block_ids) == expected_num_remote_blocks
def run_test_and_cleanup(): def run_test_and_cleanup():
gc.collect()
torch.accelerator.empty_cache()
llm = LLM(**llm_kwargs) llm = LLM(**llm_kwargs)
try: try:
run_hma_test(llm) run_hma_test(llm)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment