[Bugfix][CI] Fix...

[Bugfix][CI] Fix `v1/kv_connector/unit/test_nixl_connector_hma.py::test_fewer_blocks_with_hma` (#40597) Signed-off-by: NickLucche <nlucches@redhat.com>

[Bugfix][CI] Fix...
[Bugfix][CI] Fix `v1/kv_connector/unit/test_nixl_connector_hma.py::test_fewer_blocks_with_hma` (#40597) Signed-off-by: NickLucche <nlucches@redhat.com>
33ef1941 · Nicolò Lucchesi · GitHub · a4905133 · 33ef1941
Unverified Commit 33ef1941 authored Apr 22, 2026 by Nicolò Lucchesi Committed by GitHub Apr 22, 2026
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 2 deletions

tests/v1/kv_connector/unit/test_nixl_connector_hma.py tests/v1/kv_connector/unit/test_nixl_connector_hma.py +7 -2

No files found.
--- a/tests/v1/kv_connector/unit/test_nixl_connector_hma.py
+++ b/tests/v1/kv_connector/unit/test_nixl_connector_hma.py
@@ -2,9 +2,11 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Unit tests for NixlConnectorScheduler with HMA and Mamba N-1 prefill."""

+import gc
 from unittest.mock import patch

 import pytest
+import torch

 from vllm import LLM, SamplingParams
 from vllm.config import KVTransferConfig
@@ -196,12 +198,13 @@ def test_fewer_blocks_with_hma(monkeypatch, model_name, sw_size):
    llm_kwargs = {
        "model": model_name,
        "enforce_eager": True,
-        "gpu_memory_utilization": 0.47,
+        "gpu_memory_utilization": 0.3,
        "kv_transfer_config": kv_transfer_config,
        "max_model_len": 2048,
+        "max_num_seqs": 1,
        # NOTE: Make sure HMA is enabled
        "disable_hybrid_kv_cache_manager": False,
-        "max_num_batched_tokens": 1024,
+        "max_num_batched_tokens": 2048,
        "enable_prefix_caching": False,
        "block_size": block_size,
    }
@@ -248,6 +251,8 @@ def test_fewer_blocks_with_hma(monkeypatch, model_name, sw_size):
            assert len(group_block_ids) == expected_num_remote_blocks

    def run_test_and_cleanup():
+        gc.collect()
+        torch.accelerator.empty_cache()
        llm = LLM(**llm_kwargs)
        try:
            run_hma_test(llm)