Unverified Commit 2c22c4ca authored by Charlie Fu's avatar Charlie Fu Committed by GitHub
Browse files

[ROCm][CI] Increase the memory threshold for test_deep_sleep_fp8_kvcache (#30104)


Signed-off-by: default avatarcharlifu <charlifu@amd.com>
parent 5867819e
...@@ -260,13 +260,18 @@ def test_deep_sleep_fp8_kvcache(): ...@@ -260,13 +260,18 @@ def test_deep_sleep_fp8_kvcache():
llm.sleep(level=2) llm.sleep(level=2)
used_bytes = current_platform.get_current_memory_usage() - used_bytes_baseline used_bytes = current_platform.get_current_memory_usage() - used_bytes_baseline
assert used_bytes < 3 * GiB_bytes
# Rocm uses more memory for CudaGraphs, so we add 2 GiB more for the threshold
rocm_extra_mem_bytes = 2 * GiB_bytes if current_platform.is_rocm() else 0
mem_threshold_after_sleep = 3 * GiB_bytes + rocm_extra_mem_bytes
assert used_bytes < mem_threshold_after_sleep
llm.wake_up(tags=["weights"]) llm.wake_up(tags=["weights"])
llm.collective_rpc("reload_weights") llm.collective_rpc("reload_weights")
used_bytes = current_platform.get_current_memory_usage() - used_bytes_baseline used_bytes = current_platform.get_current_memory_usage() - used_bytes_baseline
assert used_bytes < 4 * GiB_bytes mem_threshold_after_wake_up = 4 * GiB_bytes + rocm_extra_mem_bytes
assert used_bytes < mem_threshold_after_wake_up
# now allocate kv cache and cuda graph memory # now allocate kv cache and cuda graph memory
llm.wake_up(tags=["kv_cache"]) llm.wake_up(tags=["kv_cache"])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment