[ROCm][CI] Increase the memory threshold for test_deep_sleep_fp8_kvcache (#30104)

Signed-off-by: charlifu <charlifu@amd.com>

[ROCm][CI] Increase the memory threshold for test_deep_sleep_fp8_kvcache (#30104)
Signed-off-by: charlifu <charlifu@amd.com>
2c22c4ca · Charlie Fu · GitHub · 5867819e · 2c22c4ca
Unverified Commit 2c22c4ca authored Dec 04, 2025 by Charlie Fu Committed by GitHub Dec 05, 2025
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 2 deletions

tests/basic_correctness/test_cumem.py tests/basic_correctness/test_cumem.py +7 -2

No files found.
--- a/tests/basic_correctness/test_cumem.py
+++ b/tests/basic_correctness/test_cumem.py
@@ -260,13 +260,18 @@ def test_deep_sleep_fp8_kvcache():
    llm.sleep(level=2)

    used_bytes = current_platform.get_current_memory_usage() - used_bytes_baseline
-    assert used_bytes < 3 * GiB_bytes
+
+    # Rocm uses more memory for CudaGraphs, so we add 2 GiB more for the threshold
+    rocm_extra_mem_bytes = 2 * GiB_bytes if current_platform.is_rocm() else 0
+    mem_threshold_after_sleep = 3 * GiB_bytes + rocm_extra_mem_bytes
+    assert used_bytes < mem_threshold_after_sleep

    llm.wake_up(tags=["weights"])
    llm.collective_rpc("reload_weights")

    used_bytes = current_platform.get_current_memory_usage() - used_bytes_baseline
-    assert used_bytes < 4 * GiB_bytes
+    mem_threshold_after_wake_up = 4 * GiB_bytes + rocm_extra_mem_bytes
+    assert used_bytes < mem_threshold_after_wake_up

    # now allocate kv cache and cuda graph memory
    llm.wake_up(tags=["kv_cache"])