Unverified Commit 6d4d0a61 authored by Schwinn Saereesitthipitak's avatar Schwinn Saereesitthipitak Committed by GitHub
Browse files

fix(tests): compare shadow failover restore to RO footprint (#8247)


Signed-off-by: default avatarSchwinn Saereesitthipitak <schwinns@nvidia.com>
Co-authored-by: default avatarDan Gil <dagil@nvidia.com>
parent b2f7f220
......@@ -79,7 +79,7 @@ def _start_primary(
*,
weights_hash: str,
quiesced_memory_after_shadow_b: int,
shadow_a_released_bytes: int,
shadow_b_released_bytes: int,
):
primary = manager.start_engine("primary", read_only_weights=True)
assert_completion_ok(
......@@ -94,7 +94,7 @@ def _start_primary(
"Primary active memory",
quiesced_memory_after_shadow_b,
primary_memory_in_use,
shadow_a_released_bytes,
shadow_b_released_bytes,
)
weights_with_primary, _ = wait_for_active_layout(
......@@ -210,7 +210,7 @@ def _run_shadow_failover_test(
)
(
weights_state_after_shadow_a,
shadow_a_released_bytes,
_,
_,
) = quiesce_engine(
weights_gms,
......@@ -231,7 +231,7 @@ def _run_shadow_failover_test(
)
(
weights_state_after_shadow_b,
_,
shadow_b_released_bytes,
quiesced_memory_after_shadow_b,
) = quiesce_engine(
weights_gms,
......@@ -248,6 +248,8 @@ def _run_shadow_failover_test(
kv_events_after_shadow_quiesce = kv_cache_gms.get_event_history().events
assert_kv_history(kv_events_after_shadow_quiesce, cleared_layouts=2)
# Later engines import the committed weights layout read-only, so
# compare them against the importer footprint from shadow-b.
primary, weights_with_primary = _start_primary(
manager,
frontend_port,
......@@ -255,7 +257,7 @@ def _run_shadow_failover_test(
kv_cache_gms,
weights_hash=weights_hash,
quiesced_memory_after_shadow_b=quiesced_memory_after_shadow_b,
shadow_a_released_bytes=shadow_a_released_bytes,
shadow_b_released_bytes=shadow_b_released_bytes,
)
resume_result = _resume_shadow_after_primary_failover(
shadow_a,
......@@ -269,7 +271,7 @@ def _run_shadow_failover_test(
"Shadow resume memory",
quiesced_memory_after_shadow_b,
shadow_memory_after_resume,
shadow_a_released_bytes,
shadow_b_released_bytes,
)
# Once the primary is gone, the failover shadow should finish resume
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment